#  Scraping of Data 

<b> Here we are scraping the data from the meetup.com using the REST API </b>

** Importing Requisites **

In [1]:
import requests
import pandas as pd
from json import loads
from time import sleep

** Defining base stuff **

In [2]:
api_token = '5d1e1736057567d21d18111b3e363c'

api_url_base = 'https://api.meetup.com/'

In [3]:
url = '{0}/find/groups'.format(api_url_base)


headers = {
    'sign' : 'true',
    'key' : api_token,
    'zip' : '411030'    
}

In [4]:
def get_meetup_group_data(url, headers, filename = None):
    
    # Making a get request for data
    resp = requests.get(url, params = headers)
    
    status = resp.status_code
    
    # Printing status
    print('Response Code : {}'.format(status))
    
    # check that we either got a successful response (200) or a previously retrieved, but still valid response (304)
    if status == 200:
        print('Successful.. ')
        
    else :
        print('Unsuccessful')
        exit(1)
    
    response = resp.json()
    
    cols = ('group_id', 'group_name', 'num_members', 'category_id', 
            'category_name', 'organizer_id', 'group_urlname')
    meetup_group = pd.DataFrame(columns = cols)
        
    response = resp.json()
    
    if len(response) > 0:
        
        
        try:
            for g in response:
                s = pd.Series((g['id'], g['name'], g['members'], g['category']['id'], 
                               g['category']['name'], g['organizer']['id'], g['urlname']),
                               index=cols)
                meetup_group = meetup_group.append(s, ignore_index=True)
        
        except KeyError as exc:
                print(g['name'], exc)
        
        # Sleep briefly so that API doesn't get overwhelmed
        sleep(0.2)

    meetup_group = meetup_group.astype({'group_id': int, 'organizer_id': int, 'category_id': int, 'num_members': int})
    
    writepath = '../dataFiles/{}'.format(filename)
    
    # Write to computer
    if filename:
        meetup_group.to_csv(writepath, encoding='utf-8', index = False) 

    return meetup_group
    

In [5]:
meetup_group = get_meetup_group_data(url, headers, filename = 'meetup_group.csv')

Response Code : 200
Successful.. 


In [6]:
meetup_group.head()

Unnamed: 0,group_id,group_name,num_members,category_id,category_name,organizer_id,group_urlname
0,19394496,Pune Developer's Community,4266,34,Tech,182849219,Pune-Developers-Community
1,4387912,Google Developer Group Pune : GDG Pune,6139,34,Tech,25480862,Pune-GDG
2,3730702,ExpertTalks Pune,6027,34,Tech,14196782,expert-talks-Pune
3,25835616,ThoughtWorks Pune Events,1227,34,Tech,235949519,ThoughtWorks-GeekNight-Pune
4,18446215,"Internet Of Things, Pune (IoTPune)",6764,6,Education & Learning,111363232,Internet-Of-Things-Pune-IoTPune


In [7]:
def get_events(url, headers, date_filter_str = None, urlname = None):
    
    response = requests.get(url, params = headers)
    if response.status_code == 410:
        raise ValueError('Group not accessible.')
    if len(response.json()) == 0:
        print('No event for this till now')
        return
    
    events_df = pd.DataFrame.from_dict(response.json())
    events_df.time = pd.to_datetime(events_df.time, unit='ms')
    events_df['group_urlname'] = urlname
    events_df['event_id'] = events_df['id']
    
    if date_filter_str:
        events_df = events_df.loc[events_df.time > pd.to_datetime(date_filter_str)]
        
    return events_df

In [8]:
headers = {
    'sign' : 'true',
    'key' : api_token,
    'page' : '200',
    'status' : 'past'
}

In [9]:
cols = ['event_id', 'name', 'status', 'time', 'yes_rsvp_count', 'group_urlname']
events = pd.DataFrame(columns = cols)

for gp_url in meetup_group.group_urlname:
    
    print('Getting Data for {}'.format(gp_url))
    url = api_url_base + '/{}/events'.format(gp_url)
    
    val = get_events(url = url, headers = headers, urlname = gp_url, date_filter_str='2017-01-01')
    
    # Concatenating the fetched data to events dataframe
    frame = [events, val]
    events = pd.concat(frame, ignore_index=True)
          
events = events[cols]
events.to_csv('../dataFiles/events.csv', index=False)

Getting Data for Pune-Developers-Community
Getting Data for Pune-GDG
Getting Data for expert-talks-Pune
Getting Data for ThoughtWorks-GeekNight-Pune
Getting Data for Internet-Of-Things-Pune-IoTPune
Getting Data for Pune-Mobile-Developers
Getting Data for NASSCOM-10000-Startups-Pune
Getting Data for discuss-agile-network-pune
Getting Data for Machine-Learning-Pune
Getting Data for Data-Science-and-Machine-Learning
Getting Data for urclubpune
Getting Data for Pune-Travel-Club
Getting Data for Ansible-Pune
Getting Data for Pune-Spring-Meetup
Getting Data for Pune-Stock-Investors-Meetup
Getting Data for meetup-group-mBrnrixQ
Getting Data for 91springboard-Pune
No event for this till now
Getting Data for sspune
Getting Data for PyData-Pune
Getting Data for BlueRidge-IT-meetup
Getting Data for Pune-Footballers-Club
Getting Data for Pune-WordPress-Knowledge-Exchange
Getting Data for Pune-AI-Geeks
Getting Data for TechnoWise
Getting Data for SkewCode2
Getting Data for Online-Business-Marketing

Getting Data for Cloud-computing-aws-and-scalability
Getting Data for Pune-DAILY-MORNING-SPORTS-AND-EXERCISE-6-AM-TO-8-AM
No event for this till now
Getting Data for AmazonAWS-India
Getting Data for College-Buddies-Ready-for-Travel-Together
Getting Data for Natural-Building-and-Village-Tour
Getting Data for meetup-group-MeNKhMim
Getting Data for Pune-Law-Of-Attraction-Meetup
Getting Data for Pune-Poker-Stars-Meetup
Getting Data for SelfGrowthAndLifeSuccess
Getting Data for Unlimited-SKYE-Meetup
No event for this till now
Getting Data for Home-UnSchoolers
Getting Data for Couples-Meetup-pune


In [10]:
events.head()

Unnamed: 0,event_id,name,status,time,yes_rsvp_count,group_urlname
0,236723955,Develop your very own Alexa skill in an hour,past,2017-01-14 04:30:00,101,Pune-Developers-Community
1,236770319,Clean code,past,2017-02-11 04:30:00,172,Pune-Developers-Community
2,237491427,Google Cloud Next'17 Extended - Pune by PDC,past,2017-03-18 04:30:00,127,Pune-Developers-Community
3,238211889,Insights of NLP,past,2017-03-25 04:30:00,91,Pune-Developers-Community
4,238212161,Scaling ElasticSearch,past,2017-04-22 04:30:00,74,Pune-Developers-Community
