#  Scraping of Data 

<b> Here we are scraping the data from the meetup.com using the REST API </b>

** Importing Requisites **

In [1]:
import requests
import pandas as pd
from json import loads
from time import sleep

** Defining base stuff **

In [2]:
api_token = '5d1e1736057567d21d18111b3e363c'

api_url_base = 'https://api.meetup.com/'

In [3]:
url = '{0}/find/groups'.format(api_url_base)


headers = {
    'sign' : 'true',
    'key' : api_token,
    'zip' : '411030'    
}

In [4]:
def get_meetup_group_data(url, headers, filename = None):
    
    # Making a get request for data
    resp = requests.get(url, params = headers)
    
    status = resp.status_code
    
    # Printing status
    print('Response Code : {}'.format(status))
    
    # check that we either got a successful response (200) or a previously retrieved, but still valid response (304)
    if status == 200:
        print('Successful.. ')
        
    else :
        print('Unsuccessful')
        exit(1)
    
    response = resp.json()
    
    cols = ('group_id', 'group_name', 'num_members', 'category_id', 
            'category_name', 'organizer_id', 'group_urlname')
    meetup_group = pd.DataFrame(columns = cols)
        
    response = resp.json()
    
    if len(response) > 0:
        
        
        try:
            for g in response:
                s = pd.Series((g['id'], g['name'], g['members'], g['category']['id'], 
                               g['category']['name'], g['organizer']['id'], g['urlname']),
                               index=cols)
                meetup_group = meetup_group.append(s, ignore_index=True)
        
        except KeyError as exc:
                print(g['name'], exc)
        
        # Sleep briefly so that API doesn't get overwhelmed
        sleep(0.2)

    meetup_group = meetup_group.astype({'group_id': int, 'organizer_id': int, 'category_id': int, 'num_members': int})
    
    writepath = '../dataFiles/{}'.format(filename)
    
    # Write to computer
    if filename:
        meetup_group.to_csv(writepath, encoding='utf-8', index = False) 

    return meetup_group
    

In [5]:
meetup_group = get_meetup_group_data(url, headers, filename = 'meetup_group.csv')

Response Code : 200
Successful.. 


In [6]:
meetup_group.head()

Unnamed: 0,group_id,group_name,num_members,category_id,category_name,organizer_id,group_urlname
0,19394496,Pune Developer's Community,4340,34,Tech,182849219,Pune-Developers-Community
1,4387912,Google Developer Group Pune : GDG Pune,6163,34,Tech,25480862,Pune-GDG
2,3730702,ExpertTalks Pune,6064,34,Tech,14196782,expert-talks-Pune
3,18446215,"Internet Of Things, Pune (IoTPune)",6804,6,Education & Learning,111363232,Internet-Of-Things-Pune-IoTPune
4,25835616,ThoughtWorks Pune Events,1261,34,Tech,235949519,ThoughtWorks-GeekNight-Pune


In [7]:
def get_events(url, headers, date_filter_str = None, urlname = None):
    
    response = requests.get(url, params = headers)
    if response.status_code == 410:
        raise ValueError('Group not accessible.')
    if len(response.json()) == 0:
        print('No event for this till now')
        return
    
    events_df = pd.DataFrame.from_dict(response.json())
    events_df.time = pd.to_datetime(events_df.time, unit='ms')
    events_df['group_urlname'] = urlname
    events_df['event_id'] = events_df['id']
    
    if date_filter_str:
        events_df = events_df.loc[events_df.time > pd.to_datetime(date_filter_str)]
        
    return events_df

In [8]:
headers = {
    'sign' : 'true',
    'key' : api_token,
    'page' : '200',
    'status' : 'past'
}

In [9]:
cols = ['event_id', 'name', 'status', 'time', 'yes_rsvp_count', 'group_urlname']
events = pd.DataFrame(columns = cols)

for gp_url in meetup_group.group_urlname:
    
    print('Getting Data for {}'.format(gp_url))
    url = api_url_base + '/{}/events'.format(gp_url)
    
    val = get_events(url = url, headers = headers, urlname = gp_url, date_filter_str='2017-01-01')
    
    # Concatenating the fetched data to events dataframe
    frame = [events, val]
    events = pd.concat(frame, ignore_index=True)
          
events = events[cols]
events.to_csv('../dataFiles/events.csv', index=False)

Getting Data for Pune-Developers-Community
Getting Data for Pune-GDG
Getting Data for expert-talks-Pune
Getting Data for Internet-Of-Things-Pune-IoTPune
Getting Data for ThoughtWorks-GeekNight-Pune
Getting Data for Pune-Mobile-Developers
Getting Data for urclubpune
Getting Data for NASSCOM-10000-Startups-Pune
Getting Data for discuss-agile-network-pune
Getting Data for Machine-Learning-Pune
Getting Data for Data-Science-and-Machine-Learning
Getting Data for Pune-Travel-Club
Getting Data for Pune-Artificial-Intelligence-Deep-Learning
Getting Data for 91springboard-Pune
Getting Data for sspune
Getting Data for Ansible-Pune
Getting Data for SkewCode2
Getting Data for Pune-WordPress-Knowledge-Exchange
Getting Data for BlueRidge-IT-meetup
Getting Data for Pune-Footballers-Club
Getting Data for meetup-group-mBrnrixQ
Getting Data for Pune-Stock-Investors-Meetup
Getting Data for Pune-Spring-Meetup
Getting Data for TechnoWise
Getting Data for meetup-group-ScPJDxDX
Getting Data for ReactJS-and-F

Getting Data for Intro-of-New-people-in-town
Getting Data for Couples-Meetup-pune
Getting Data for Lean-Coffee-Pune
Getting Data for Pune-Dance-learners
No event for this till now
Getting Data for Pune-Drinking-Meetup-at-cheap-rate-but-in-lavish-club
Getting Data for PuneDigitalMarketingConnection
Getting Data for Pune-Microsoft-Azure-Meetup
No event for this till now
Getting Data for stumble-on-success-executive-coaching-prog-for-entrepreneurs
No event for this till now
Getting Data for Pune-Amazon-RedShift-Group
No event for this till now
Getting Data for Pune-Social-Media-Management-Meetup
No event for this till now
Getting Data for Pune-Heroku-Meetup
Getting Data for Pune-Animation-Meetup
Getting Data for Investment-Business-Opportunities-in-India


In [10]:
events.head()

Unnamed: 0,event_id,name,status,time,yes_rsvp_count,group_urlname
0,236723955,Develop your very own Alexa skill in an hour,past,2017-01-14 04:30:00,101,Pune-Developers-Community
1,236770319,Clean code,past,2017-02-11 04:30:00,172,Pune-Developers-Community
2,237491427,Google Cloud Next'17 Extended - Pune by PDC,past,2017-03-18 04:30:00,127,Pune-Developers-Community
3,238211889,Insights of NLP,past,2017-03-25 04:30:00,91,Pune-Developers-Community
4,238212161,Scaling ElasticSearch,past,2017-04-22 04:30:00,74,Pune-Developers-Community


In [11]:
def get_event_rsvps(url, headers, group_urlname, event_id):
    
    '''Accepts a group urlname and event id, and returns a dataframe of RSVPs.'''
    
    response = requests.get(url, params = headers).json()
    try:
        member_list = [(group_urlname, event_id, mem['member']['id']) for mem in response]
        rsvp_df = pd.DataFrame(member_list, columns=['group_urlname', 'event_id', 'member_id'])

        return rsvp_df
    except:
        print(group_urlname, event_id)
        
        return
    

In [12]:
url = 'https://api.meetup.com/{}/events/{}/rsvps'

headers = {
    'sign' : 'true',
    'key' : api_token  
}

rsvp_df = pd.DataFrame(columns=['group_urlname', 'event_id', 'member_id'])

print("Fetching number of RSVP's For the events")
for index, event in events.iterrows():
    
    val = get_event_rsvps(url.format(event['group_urlname'], event['event_id']), headers, event['group_urlname'], event['event_id'])
    
    frame = [rsvp_df, val]
    rsvp_df = pd.concat(frame, ignore_index=True)
    
rsvp_df.to_csv('../dataFiles/rsvps.csv', index=False)

Fetching number of RSVP's For the events
Agile-Practitioners-Group-of-India-APGI 236693424
Agile-Practitioners-Group-of-India-APGI 240908380
Agile-Practitioners-Group-of-India-APGI 243577244
Agile-Practitioners-Group-of-India-APGI 243848931
Agile-Practitioners-Group-of-India-APGI 245609189
Agile-Practitioners-Group-of-India-APGI 246646599
Agile-Practitioners-Group-of-India-APGI 246868175
Agile-Practitioners-Group-of-India-APGI 247809675
Agile-Practitioners-Group-of-India-APGI 249735935
The-Pune-Book-Readers-Club 236637190
The-Pune-Book-Readers-Club 237729215
The-Pune-Book-Readers-Club 240342897
The-Pune-Book-Readers-Club 240557913
The-Pune-Book-Readers-Club 241304703
The-Pune-Book-Readers-Club 243145687
The-Pune-Book-Readers-Club 243626473
The-Pune-Book-Readers-Club 243626523
The-Pune-Book-Readers-Club 241741589
The-Pune-Book-Readers-Club 245633159
The-Pune-Book-Readers-Club 246513494
The-Pune-Book-Readers-Club 247447716
The-Pune-Book-Readers-Club 248504105
The-Pune-Book-Readers-Club 2

In [13]:
unique_members = rsvp_df.member_id.unique()

In [14]:
def get_member_details(url, header, mem_id):
    '''Accepts a member id, and returns a dataframe of metadata information.'''
    
    response = requests.get(url, params = headers).json()
    try:
        member_detail = {'member_id':mem_id, 'name':response['name'], 'city' : response['city'],
                       'visited' : response['visited'], 'lon' : response['lon'], 'lat' : response['lat']}
        return member_detail
    except:
        print('Problem in getting user details from API')
        return

In [15]:
url = api_url_base + '2/member/{}'
header = {
    'sign' : 'true',
    'key' : api_token,
}

mem_df = pd.DataFrame(columns=['member_id', 'name', 'city', 'visited', 'lon', 'lat'])
for mem_id in unique_members:
    val = pd.DataFrame([get_member_details(url.format(mem_id), header, mem_id)])
    
    frame = [mem_df, val]
    mem_df = pd.concat(frame, ignore_index=True)
    
mem_df.to_csv('../dataFiles/members.csv', index=False)