## Get data from Meetup.com

We are going to get data from Meetup's API. You can create custom requests using their API console, then make them via Python by copying the secure link here.

https://secure.meetup.com/meetup_api/console/?path=/2/members

In [1]:
import pandas as pd
import numpy as np
import requests
import os
import json
from time import sleep

api_key = '646928682c4b5d6f5f6c782a6b351b29'

#### Get PyNash Members

In [2]:
def get_all_groups(location_str, radius=25, write_path=None):
    cols = ('group_id', 'group_name', 'num_members', 'category_id', 
            'category_name', 'organizer_id', 'group_urlname')
    all_groups = pd.DataFrame(columns=cols)

    for page in np.arange(10):
        q = 'https://api.meetup.com/find/groups?&sign=true&location={}&radius={}&page=200&offset={}'.format(location_str, radius, page)
        q += '&key={}'.format(api_key) 
        response = requests.get(q).json()
        if len(response) > 0:
            try:
                for g in response:
                    s = pd.Series((g['id'], g['name'], g['members'], g['category']['id'], 
                                   g['category']['name'], g['organizer']['id'], g['urlname']),
                                 index=cols)
                    all_groups = all_groups.append(s, ignore_index=True)
            except KeyError as exc:
                print(g['name'], exc)
        # Sleep briefly so that API doesn't get overwhelmed
        sleep(0.2)

    all_groups = all_groups.astype({'group_id': int, 'organizer_id': int, 'category_id': int, 'num_members': int})
    all_groups = all_groups.set_index('group_id')
    
    # Write to computer
    if write_path:
        all_groups.to_csv(write_path, encoding='utf-8') 

    return all_groups


def get_group_members(group_id, api_key):
    '''Accepts a Meetup group ID number and returns dataframe with all member in the group.'''
    # Initialize variables
    members = pd.DataFrame()
    page = 0
    bad_iters = 0
    
    # Keep querying until there are no more results
    all_results = False
    while all_results == False:
        q = 'https://api.meetup.com/2/members?
        q += '&sign=true&group_id={}&only=name,id,city,state,hometown,joined,visited,lat,lon&page=200&offset={}'.format(group_id, page)
        q += '&key={}'.format(api_key)
        response = requests.get(q).json()
        if 'results' in response.keys():
            if len(response['results']) == 0:
                all_results = True
            try:
                tdf = pd.DataFrame.from_dict(response['results'])
                members = members.append(tdf)
                page += 1
            except KeyError as exc:
                all_results = True
                bad_iters += 1
                if bad_iters > 5:
                    all_results=True
                pass
            except json.decoder.JSONDecodeError:
                bad_iters += 1
                if bad_iters > 5:
                    all_results=True
                pass

    members['group_id'] = group_id
    
    return members



def agg_group_members(list_of_group_ids, api_key, write_path=None, intermediate_path=None):
    all_members = pd.DataFrame()
    
    for g in list_of_group_ids:
        sleep(1)
        try:
            tdf = get_group_members(g, api_key)
            if intermediate_path:
                tdf.to_csv('{}/{}.csv'.format(intermediate_path, g), encoding='utf-8')
            all_members = all_members.append(tdf)
        except KeyError as exc:
            print(group, exc)
            continue

    # Write to computer
    if write_path:
        #for date_col in ['joined', 'visited']:
            #    members[date_col] = pd.to_datetime(members[date_col], unit='ms')
        all_members.to_csv(write_path, encoding='utf-8') 
        
    return all_members

#### Get memberships for individual members

In [71]:
urlname

'meetup-group-kpVyqswI'

In [92]:
response.text

'[]'

In [89]:
#urlname = 'meetup-group-kpVyqswI'
q = 'https://api.meetup.com/{}/events?'.format(urlname)
q += '&sign=true&page=200&status=past&only=id,name,status,time,yes_rsvp_count&desc=True'
q += '&{}'.format(api_key)
response = requests.get(q)
events_df = pd.DataFrame.from_dict(response)
events_df.time = pd.to_datetime(events_df.time, unit='ms')
events_df['group_urlname'] = urlname

ValueError: DataFrame constructor not properly called!

In [98]:
def get_events(urlname, api_key, date_filter_str=None):
    ''' Takes a Meetup group urlname and returns a DataFrame of events. Optionally, filter by date.'''
    
    q = 'https://api.meetup.com/{}/events?'.format(urlname)
    q += '&sign=true&page=200&status=past&only=id,name,status,time,yes_rsvp_count&desc=True'
    q += '&{}'.format(api_key)
    response = requests.get(q)
    if response.status_code == 410:
        raise ValueError('Group not accessible.')
    if len(response.json()) == 0:
        raise ValueError('No event results.')
    
    events_df = pd.DataFrame.from_dict(response.json())
    events_df.time = pd.to_datetime(events_df.time, unit='ms')
    events_df['group_urlname'] = urlname
    
    if date_filter_str:
        events_df = events_df.loc[events_df.time > pd.to_datetime(date_filter_str)]
    
    return events_df
    
def get_event_rsvps(urlname, event_id, api_key):
    q = 'https://api.meetup.com/{}/events/{}/rsvps?'.format(urlname, event_id)
    q += '&sign=true&photo-host=public&response=yes&only=member'
    q += '&{}'.format(api_key)
    response = requests.get(q).json()
    member_list = [(urlname, event_id, mem['member']['id']) for mem in response]
    rsvp_df = pd.DataFrame(member_list, columns=['group_urlname', 'event_id', 'member_id'])
    return rsvp_df

def get_all_event_rsvps(urlname, list_of_event_ids, api_key):
    all_rsvp_df = pd.DataFrame(columns=['group_urlname', 'event_id', 'member_id'])
    for eid in list_of_event_ids:
        tdf = get_event_rsvps(urlname, eid, api_key)
        all_rsvp_df = all_rsvp_df.append(tdf, ignore_index=True)
    
    return all_rsvp_df

## Run Data

In [4]:
# read in membership data and trim to "recent" visits
edges = pd.read_csv('data\\memberships.csv', parse_dates=['joined','visited'])
# edges = edges.loc[edges.visited > pd.to_datetime('2016-01-01')]

# create a "members" dataset
members = edges[['member_id', 'name', 'hometown', 'city','state', 'lat', 'lon']].groupby('member_id').first()
members['num_groups'] = edges[['member_id']].groupby('member_id').size()

# read in group information and trim down to only groups with edges
groups = pd.read_csv('data\\groups.csv', index_col='group_id')
groups = groups.loc[edges.group_id.unique()]

In [61]:
os.remove(all_events_filename)

In [113]:
all_events_filename = './data/events.csv'
all_rsvps_filename = './data/rsvps.csv'

if not os.path.exists(all_events_filename):
    (pd.DataFrame(columns=['event_id', 'name', 'status', 'time', 'yes_rsvp_count', 'group_urlname'])
         .to_csv(all_events_filename, header=True, index=None) )
if not os.path.exists(all_rsvps_filename):
    (pd.DataFrame(columns=['group_urlname', 'event_id', 'member_id'])
         .to_csv(all_rsvps_filename, header=True, index=None) )

for i, g in groups.dropna().iloc[712:].iterrows():
    try:
        print(g.group_name)
        events = get_events(g.group_urlname, api_key, date_filter_str='2015-11-01')
        rsvps = get_all_event_rsvps(g.group_urlname, events.id.tolist(), api_key)
        
        events.to_csv(all_events_filename, header=False, mode='a', index=None)
        rsvps.to_csv(all_rsvps_filename, header=False, mode='a', index=None)
        sleep(1)
    except ValueError as exc:
        print(exc)
    except ConnectionError as exc:
        print(exc)
    finally:
        if 'events' in dir(): 
            del events
        if 'rsvps' in dir():
            del rsvps

Nashville Hiking Meetup


In [100]:
Nashville Improv & Comedy Meetup
Nasville Slow Ride


Int64Index([18964683], dtype='int64', name='group_id')

In [111]:
i = groups.loc[groups.group_name=='Nashville Hiking Meetup'].index.values[0]
groups.dropna().index.get_loc(i)

712

In [95]:
groups.loc[groups.group_name == 'Nashville Infusing & Connecting the New Technology Explosi']

Unnamed: 0_level_0,group_name,num_members,category_id,category_name,organizer_id,group_urlname
group_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
25347137,Nashville Infusing & Connecting the New Techno...,22.0,34.0,Tech,232889529.0,meetup-group-aldFOwAx


In [59]:
groups.loc[groups.group_name.isnull()]

Unnamed: 0_level_0,group_name,num_members,category_id,category_name,organizer_id,group_urlname
group_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
26351328,,,,,,
21778376,,,,,,
25241971,,,,,,
21823300,,,,,,
21821118,,,,,,


In [27]:
# from time import sleep
# import json

# edges = pd.DataFrame()
# err_ids = []
# for pid in members.index:
#     r = requests.get('https://api.meetup.com/2/groups?&sign=true&member_id={}&page=200&key=1eb16676d664fa48314391ae5b6c'.format(pid))
#     try:
#         r = r.json()
#         for membership in r['results']:
#             edge = pd.Series({'member_id': pid, 
#                               'group_id': membership['id'], 
#                               'group_name': membership['name']})
#             edges = edges.append(edge, ignore_index=True)
#     except json.decoder.JSONDecodeError:
#         print(pid)
#         err_ids.append(pid)
    
#     # Sleep briefly so that API doesn't get overwhelmed
#     sleep(0.2)
        

# # Write to computer
# write_data = True
# if write_data == True:
#     edges.to_csv('data_edges.csv') 
