## Get data from Meetup.com

We are going to get data from Meetup's API. You can create custom requests using their API console, then make them via Python by copying the secure link here.

https://secure.meetup.com/meetup_api/console/?path=/2/members

In [1]:
import pandas as pd
import numpy as np
import requests
import os
import json
from time import sleep

#### Get PyNash Members

In [6]:
def get_all_groups(location_str, radius=25, write_path=None):
    cols = ('group_id', 'group_name', 'num_members', 'category_id', 
            'category_name', 'organizer_id', 'group_urlname')
    all_groups = pd.DataFrame(columns=cols)

    for page in np.arange(10):
        q = 'https://api.meetup.com/find/groups?&sign=true&photo-host=public&location={}&radius={}&page=200&offset={}&key=1eb16676d664fa48314391ae5b6c'.format(location_str, radius, page) 
        response = requests.get(q).json()
        if len(response) > 0:
            try:
                for g in response:
                    s = pd.Series((g['id'], g['name'], g['members'], g['category']['id'], 
                                   g['category']['name'], g['organizer']['id'], g['urlname']),
                                 index=cols)
                    all_groups = all_groups.append(s, ignore_index=True)
            except KeyError as exc:
                print(g['name'], exc)
        # Sleep briefly so that API doesn't get overwhelmed
        sleep(0.2)

    all_groups = all_groups.astype({'group_id': int, 'organizer_id': int, 'category_id': int, 'num_members': int})
    all_groups = all_groups.set_index('group_id')
    
    # Write to computer
    if write_path:
        all_groups.to_csv(write_path, encoding='utf-8') 

    return all_groups


def get_group_members(group_id):

    # Initialize variables
    members = pd.DataFrame()
    page = 0
    bad_iters = 0
    
    # Keep querying until there are no more results
    all_results = False
    while all_results == False:
        q = 'https://api.meetup.com/2/members?&sign=true&photo-host=public&group_id={}&only=name,id,city,state,hometown,joined,visited,lat,lon&page=200&offset={}&key=1eb16676d664fa48314391ae5b6c'.format(group_id, page)
        response = requests.get(q).json()
        if 'results' in response.keys():
            try:
                tdf = pd.DataFrame.from_dict(response['results'])
                members = members.append(tdf)
                page += 1
            except KeyError as exc:
                all_results = True
                bad_iters += 1
                if bad_iters > 5:
                    return None
                pass
            except json.decoder.JSONDecodeError:
                bad_iters += 1
                if bad_iters > 5:
                    return None
                pass
        else:
            sleep(2)
            pass

    members['group_id'] = group_id
    
    return members



def agg_group_members(list_of_group_ids, write_path=None, intermediate_path=None):
    all_members = pd.DataFrame()
    
    for g in list_of_group_ids:
        sleep(1)
        try:
            tdf = get_group_members(g)
            if intermediate_path:
                tdf.to_csv('{}/{}.csv'.format(intermediate_path, g), encoding='utf-8')
            all_members = all_members.append(tdf)
        except KeyError as exc:
            print(group, exc)
            continue

    # Write to computer
    if write_path:
        all_members.to_csv(write_path, encoding='utf-8') 
    
    return all_members

In [4]:
empty_inds = [22982126, 25514523, 26148182, 25747812, 25842377, 21289207, 24471828, 20326297, 19072581, 18833001, 22824474, 19934899, 26258970, 25026657, 23234440, 25661378, 24985315, 19408839, 26254038, 7129942, 20229452, 26074382, 25442126, 24504847, 1819474, 24762391, 25122337, 22501996, 19764562, 22275880, 18194233, 24351103, 21771089, 23029297, 1512578, 24637264, 21846276, 23523548, 23709488, 23318990, 23181015, 4037642, 23505371, 19992350, 19215770, 4507792, 19549621, 1299250, 26042538, 19370256, 20488570, 20267150, 22632117, 19479699, 26080969, 18505473, 18737159, 19306451, 20486490, 19440811, 18487182, 24890978, 16731932, 21801924, 18916508, 21064154, 14194422, 18476335, 11138782, 25756605, 19042913, 20216370, 691322, 10580242, 23593297, 18630936, 20571755, 18642322, 1586635, 17732042, 19503486, 1680888, 1585263, 11574842, 25066933, 2749292, 19856615, 21148651, 18602694, 22782911, 20285980, 1768040, 19740004, 21506105, 21710544, 3718972, 23598515, 19591270, 8741902, 1049061, 23273659, 22916872, 17213452, 18412046, 23018525, 351080, 19459402, 19959627, 6859622, 1776410, 13993792, 4341532, 19115644, 24440003, 18502048, 2527902, 18483128, 1819536, 530494, 1438993, 510297, 1660079, 19035334, 18579141, 2662782, 1150309, 1682521, 4642582, 1770788, 20040517, 1788274, 9540322, 1694517, 17404882, 211512, 3260132, 16291092, 377617, 1334499, 3217182, 1226140, 1808866, 9452822, 1877261, 1583865, 968626]

In [5]:
groups = get_all_groups('nashville tn', write_path='data/groups.csv')

Community Experience Testers - Nashville 'category'


In [38]:
get_group_members(groups.sort_values(by='num_members').index.tolist()[5])

Unnamed: 0_level_0,city,hometown,joined,lat,lon,name,state,visited,group_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
213837877,Nashville,,1507142265000,36.18,-86.74,Christian Hooser,TN,1507105911000,26140018
6770985,Nashville,Nashville,1507051818000,36.16,-86.79,Joe,TN,1508024880000,26140018
226920452,Nashville,Nashville,1507180412000,36.08,-86.72,Rachel,TN,1508082160000,26140018


In [None]:
all_members = agg_group_members(empty_inds, 
                                write_path='data/memberships2.csv', 
                                intermediate_path='data')

#### Get memberships for each member

In [27]:
from time import sleep
import json


edges = pd.DataFrame()
err_ids = []
for pid in members.index:
    r = requests.get('https://api.meetup.com/2/groups?&sign=true&member_id={}&page=200&key=1eb16676d664fa48314391ae5b6c'.format(pid))
    try:
        r = r.json()
        for membership in r['results']:
            edge = pd.Series({'member_id': pid, 
                              'group_id': membership['id'], 
                              'group_name': membership['name']})
            edges = edges.append(edge, ignore_index=True)
    except json.decoder.JSONDecodeError:
        print(pid)
        err_ids.append(pid)
    
    # Sleep briefly so that API doesn't get overwhelmed
    sleep(0.2)
        

# Write to computer
write_data = True
if write_data == True:
    edges.to_csv('data_edges.csv') 
