## Get data from Meetup.com

We are going to get data from Meetup's API. You can create custom requests using their API console, then make them via Python by copying the secure link here.

https://secure.meetup.com/meetup_api/console/?path=/2/members

In [1]:
import pandas as pd
import numpy as np
import requests
import os
import json
from time import sleep

api_key = '646928682c4b5d6f5f6c782a6b351b29'

#### Get PyNash Members

In [2]:
def get_all_groups(location_str, radius=25, write_path=None):
    cols = ('group_id', 'group_name', 'num_members', 'category_id', 
            'category_name', 'organizer_id', 'group_urlname')
    all_groups = pd.DataFrame(columns=cols)

    for page in np.arange(10):
        q = 'https://api.meetup.com/find/groups?&sign=true&location={}&radius={}&page=200&offset={}'.format(location_str, radius, page)
        q += '&key={}'.format(api_key) 
        response = requests.get(q).json()
        if len(response) > 0:
            try:
                for g in response:
                    s = pd.Series((g['id'], g['name'], g['members'], g['category']['id'], 
                                   g['category']['name'], g['organizer']['id'], g['urlname']),
                                 index=cols)
                    all_groups = all_groups.append(s, ignore_index=True)
            except KeyError as exc:
                print(g['name'], exc)
        # Sleep briefly so that API doesn't get overwhelmed
        sleep(0.2)

    all_groups = all_groups.astype({'group_id': int, 'organizer_id': int, 'category_id': int, 'num_members': int})
    all_groups = all_groups.set_index('group_id')
    
    # Write to computer
    if write_path:
        all_groups.to_csv(write_path, encoding='utf-8') 

    return all_groups


def get_group_members(group_id, api_key):
    '''Accepts a Meetup group ID number and returns dataframe with all member in the group.'''
    # Initialize variables
    members = pd.DataFrame()
    page = 0
    bad_iters = 0
    
    # Keep querying until there are no more results
    all_results = False
    while all_results == False:
        q = 'https://api.meetup.com/2/members?
        q += '&sign=true&group_id={}&only=name,id,city,state,hometown,joined,visited,lat,lon&page=200&offset={}'.format(group_id, page)
        q += '&key={}'.format(api_key)
        response = requests.get(q).json()
        if 'results' in response.keys():
            if len(response['results']) == 0:
                all_results = True
            try:
                tdf = pd.DataFrame.from_dict(response['results'])
                members = members.append(tdf)
                page += 1
            except KeyError as exc:
                all_results = True
                bad_iters += 1
                if bad_iters > 5:
                    all_results=True
                pass
            except json.decoder.JSONDecodeError:
                bad_iters += 1
                if bad_iters > 5:
                    all_results=True
                pass

    members['group_id'] = group_id
    
    return members



def agg_group_members(list_of_group_ids, api_key, write_path=None, intermediate_path=None):
    all_members = pd.DataFrame()
    
    for g in list_of_group_ids:
        sleep(1)
        try:
            tdf = get_group_members(g, api_key)
            if intermediate_path:
                tdf.to_csv('{}/{}.csv'.format(intermediate_path, g), encoding='utf-8')
            all_members = all_members.append(tdf)
        except KeyError as exc:
            print(group, exc)
            continue

    # Write to computer
    if write_path:
        #for date_col in ['joined', 'visited']:
            #    members[date_col] = pd.to_datetime(members[date_col], unit='ms')
        all_members.to_csv(write_path, encoding='utf-8') 
        
    return all_members

#### Get memberships for individual members

In [3]:
def get_events(urlname, api_key, date_filter_str=None):
    ''' Takes a Meetup group urlname and returns a DataFrame of events. Optionally, filter by date.'''
    
    q = 'https://api.meetup.com/{}/events?'.format(urlname)
    q += '&sign=true&page=200&status=past&only=id,name,status,time,yes_rsvp_count'
    q += '&{}'.format(api_key)
    response = requests.get(q).json()
    events_df = pd.DataFrame.from_dict(response).set_index('id')
    events_df.time = pd.to_datetime(events_df.time, unit='ms')
    
    if date_filter_str:
        events_df = events_df.loc[events_df.time > pd.to_datetime(date_filter_str)]
    
    return events_df
    
def get_event_rsvps(urlname, event_id, api_key):
    q = 'https://api.meetup.com/{}/events/{}/rsvps?'.format(urlname, event_id)
    q += '&sign=true&photo-host=public&response=yes&only=member'
    q += '&{}'.format(api_key)
    response = requests.get(q).json()
    member_list = [(urlname, event_id, mem['member']['id']) for mem in response]
    rsvp_df = pd.DataFrame(member_list, columns=['group_urlname', 'event_id', 'member_id'])
    return rsvp_df

def get_all_event_rsvps(urlname, list_of_event_ids, api_key):
    all_rsvp_df = pd.DataFrame(columns=['group_urlname', 'event_id', 'member_id'])
    for eid in list_of_event_ids:
        tdf = get_event_rsvps(urlname, eid, api_key)
        all_rsvp_df = all_rsvp_df.append(tdf, ignore_index=True)
    
    return all_rsvp_df

In [27]:
# from time import sleep
# import json

# edges = pd.DataFrame()
# err_ids = []
# for pid in members.index:
#     r = requests.get('https://api.meetup.com/2/groups?&sign=true&member_id={}&page=200&key=1eb16676d664fa48314391ae5b6c'.format(pid))
#     try:
#         r = r.json()
#         for membership in r['results']:
#             edge = pd.Series({'member_id': pid, 
#                               'group_id': membership['id'], 
#                               'group_name': membership['name']})
#             edges = edges.append(edge, ignore_index=True)
#     except json.decoder.JSONDecodeError:
#         print(pid)
#         err_ids.append(pid)
    
#     # Sleep briefly so that API doesn't get overwhelmed
#     sleep(0.2)
        

# # Write to computer
# write_data = True
# if write_data == True:
#     edges.to_csv('data_edges.csv') 


## Run Data

In [4]:
# read in membership data and trim to "recent" visits
edges = pd.read_csv('data\\memberships.csv', parse_dates=['joined','visited'])
# edges = edges.loc[edges.visited > pd.to_datetime('2016-01-01')]

# create a "members" dataset
members = edges[['member_id', 'name', 'hometown', 'city','state', 'lat', 'lon']].groupby('member_id').first()
members['num_groups'] = edges[['member_id']].groupby('member_id').size()

# read in group information and trim down to only groups with edges
groups = pd.read_csv('data\\groups.csv', index_col='group_id')
groups = groups.loc[edges.group_id.unique()]

In [143]:
u = g.group_urlname

In [130]:
all_events_df = pd.DataFrame()
for i, g in groups.iterrows():
    try:
        events = get_events(g.group_urlname, api_key, date_filter_str='2015-11-01')
    except KeyError as exc:
        print(exc)
    finally:
        del events
    all_events_df = all_events_df.append(events)
    

In [148]:
get_events(u, api_key)

Unnamed: 0_level_0,name,status,time,yes_rsvp_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5179345,Brainstorming on Trails to Hike,past,2006-10-08 15:00:00,2
5186029,Spring State Natural Area Hike,past,2006-10-15 19:00:00,8
5186136,Old Stone Fort Loop,past,2006-10-22 18:00:00,7
5216416,Randor Lake Hike,past,2006-10-23 01:00:00,1
5216408,Old Stone Fort Loop,past,2006-11-12 19:00:00,14
5297288,Bearwaller Gap Hike,past,2006-12-03 15:00:00,2
5295610,Mammoth Cave Adventure Hike,past,2007-01-20 12:00:00,12
5566374,Easy Hike at Radnor Lake,past,2007-03-24 19:00:00,24
5577205,Long Day Hike in the Smoky Mountains,past,2007-03-31 12:15:00,9
5566297,Moderate difficulty hike - Volunteer Trail Day...,past,2007-04-07 15:00:00,22


In [134]:
all_events_df.index.unique().shape

(83,)

In [93]:
rsvp_df.shapea
a


(2400, 3)