# Monitoring dynamism of Python and R Meetup groups in Ireland I

In [54]:
#import libraries
from pandas import Series, DataFrame
import pandas as pd
import math
import json
import urllib2
import dateutil
import time
import plotly.plotly as py 
import plotly.tools as tls
from plotly.graph_objs import * #Figure, Data, Layout 

# Data collection using the Meetup API 

In [55]:
#load data from/using the meetup API
eventsL = []
for name in ["DublinR","Cork-Ireland-R-Users-Group","pythonireland","PyLadiesDublin"]: 
        URL = "https://api.meetup.com/2/events.json/?group_urlname=" + name + "&key=95c3a7577a477a7d105a70197b5755"
        group = json.load(urllib2.urlopen(URL))
        eventsL.append(group)

# Data Processing

In [56]:
#function used to return a dataframe giving information about each events: fgroup_name, date, number of yes_rsvp
def get_event_details(group_urlname,URL):
    group = json.load(urllib2.urlopen(URL))
    eventr= DataFrame()
    LEN = len(group["results"])
    event_name = []
    group_name = []
    event_date_ms = []
    event_yes_rsvp =[]
    status =[]
    
    ##We don't take waiting_list into account as 14 values are different from zero
    #waiting_list=[]
    rsvp_limit = []
    for i in range(0,LEN):
        event_name.append(group["results"][i]["name"])
        group_name.append(group["results"][i]["group"]["urlname"])
        event_date_ms.append(group["results"][i]["time"])
        event_yes_rsvp.append(group["results"][i]["yes_rsvp_count"])
        #waiting_list.append(group["results"][i]["waitlist_count"])
        try:
            rsvp_limit.append(group["results"][i]["rsvp_limit"])
        except KeyError: 
            rsvp_limit.append(0)
        status.append(group["results"][i]["status"])
    #add the relevant column to the dataframe
    event_date = [x / 1000 for x in event_date_ms]
    events= DataFrame()
    events["Event Name"] = Series(event_name)
    events["Group Name"] = Series(group_name)
    events["Date"] = map(lambda date: time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(date)),event_date)
    events["Number of yes_rsvp"] = Series(event_yes_rsvp)
    #events["Waiting List"] = Series(waiting_list)
    events["Rsvp Limit"] = Series(rsvp_limit)
    events["Status"] = Series(status)
    return events

def get_members(group_urlname,URL):
    group = json.load(urllib2.urlopen(URL))
    eventr= DataFrame()
    LEN = len(group["results"])
    group_name = []
    number_of_members =[]
    group_name.append(group_urlname)
    for i in range(0,LEN):
        number_of_members.append(group["results"][i]["members"])
    #add the relevant column to the dataframe
    events= DataFrame()
    events["Event Name"] = Series(group_name)
    events["Number of members"] = Series(number_of_members)
    return events
#get number of members for each group
members = []
for group in ["DublinR","Cork-Ireland-R-Users-Group","pythonireland","PyLadiesDublin"]:
    for url in ["https://api.meetup.com/2/groups.json/?group_urlname="+ group +"&key=95c3a7577a477a7d105a70197b5755"]:
        df = get_members(group, url)
        members.append(df)


#get past events thanks to &status=past
list = []
for group in ["DublinR","Cork-Ireland-R-Users-Group","pythonireland","PyLadiesDublin"]:
    for url in ["https://api.meetup.com/2/events.json/?group_urlname="+ group + "&key=95c3a7577a477a7d105a70197b5755","https://api.meetup.com/2/events.json/?group_urlname=" + group + "&status=past&key=95c3a7577a477a7d105a70197b5755"]:
        df = get_event_details(group, url)
        list.append(df)

#concatenate all DataFrames into one
all_events = pd.concat(list, ignore_index=True)
members_number = pd.concat(members, ignore_index=True)
all_events.head()


Unnamed: 0,Event Name,Group Name,Date,Number of yes_rsvp,Rsvp Limit,Status
0,"Rated R, Rated G - Lightning Talks",DublinR,2015-12-09 18:30:00,23,0,upcoming
1,Meeting,DublinR,2011-11-17 18:30:00,10,0,past
2,Meeting,DublinR,2011-12-15 18:30:00,10,0,past
3,Meeting,DublinR,2012-01-19 18:30:00,8,0,past
4,Informal Meetup,DublinR,2012-03-29 19:00:00,8,0,past


# Stacked Bar Chart Of the number of members per group 

In [57]:
import plotly.graph_objs as go

y=[]
x=["DublinR","Cork-Ireland-R-Users-Group","pythonireland","PyLadiesDublin"]
for i in range(0,len(x)):
    y.append(int((members_number.ix[i]["Number of members"])))

trace1 = go.Bar(
    x=["pythonireland","DublinR","PyLadiesDublin","Cork-Ireland-R-Users-Group"],
    y=[int((members_number.ix[2]["Number of members"]).tolist()),int((members_number.ix[0]["Number of members"]).tolist()),int((members_number.ix[3]["Number of members"]).tolist()), int((members_number.ix[1]["Number of members"]).tolist())],
    marker=dict(
        color= ['#FFA300','#4099FF','#FFA300','#4099FF'],
        line=dict(
            color='black',
            width=1.5,
        )
    ),
    opacity=0.9,
 
)

data = [trace1]
layout = go.Layout(
    annotations=[
        dict(
            x=xi,
            y=yi,
            text=str(yi) + " members",
            xanchor='center',
            yanchor='bottom',
            showarrow=False,
        ) for xi, yi in zip(x, y)],
    title='Number of members per group',
     
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-bar')

# Data cleansing 

In [58]:
#we change that one or later on we'll have issues assogning group to plot our bubble graph
all_events.loc[all_events["Group Name"]=="Cork-Ireland-R-Users-Group","Group Name"]="Cork_Ireland_R_Users_Group"
for group in ["DublinR","Cork_Ireland_R_Users_Group","pythonireland","PyLadiesDublin"]:
    all_events.loc[(all_events["Rsvp Limit"]== 0) & (all_events["Group Name"] == group),"Rsvp Limit"]= math.ceil((all_events.groupby("Group Name").mean())["Number of yes_rsvp"][group])

    
#some cleaning of the data to match the right cell formats
all_events["Number Of Events"] =   Series(all_events.groupby(["Group Name",all_events["Date"].map(lambda d: dateutil.parser.parse(d).year)]).cumcount() + 1)
all_events["Date"] = Series([dateutil.parser.parse(all_events["Date"][i]).year for i in range(0,len(all_events))])


In [97]:
#Computing the number of events for each group
events_per_group = all_events.groupby(['Group Name',Series.sort_values(all_events['Date'])], sort=True)["Number Of Events"].max()
events_per_group

Group Name                  Date
Cork_Ireland_R_Users_Group  2015     3
DublinR                     2011     2
                            2012    20
                            2013    42
                            2014    32
                            2015    26
PyLadiesDublin              2013     2
                            2014    12
                            2015    12
pythonireland               2013    12
                            2014    14
                            2015    14
                            2016    11
Name: Number Of Events, dtype: int64

In [93]:
R_events = events_per_group["DublinR"].sum() + events_per_group["Cork_Ireland_R_Users_Group"].sum()
Python_events = events_per_group["pythonireland"].sum() + events_per_group["PyLadiesDublin"].sum()

In [95]:
print str(R_events) + " were organised for the R language in Ireland from 2011 to 2016."

125 were organised for the R language in Ireland from 2011 to 2016.


In [98]:
print str(Python_events) + " were organised for the Python language in Ireland from 2013 to 2016."

77 were organised for the Python language in Ireland from 2013 to 2016.


# Data Vizualisation with Plotly API 

In [111]:
#plot a graph of meetups in 2015
the_year = 2015
i_year = [all_events["Date"][i] == the_year for i in range(0,len(all_events))]
all_events_year = all_events[i_year]

#set a color for each group
colors = dict(DublinR='#0000cc',
    Cork_Ireland_R_Users_Group='#1919ff',
    pythonireland='#00cc00',
    PyLadiesDublin='#00e500',
)

sizemode = 'area'
#sizeref = all_events_year['Number of yes_rsvp'].max()


# (!) Set a reference for 'size' values (i.e. a population-to-pixel scaling).
#     Here the max bubble area will be on the order of 100 pixels
sizeref = all_events_year['Number of yes_rsvp'].max() / 1e2**1.75

# Define a trace-generating function (returns a Scatter object)
def make_trace(X, group, sizes, color):
    return Scatter(
        x=X['Rsvp Limit'],  # Rsvp Limit on the x-xaxis
        y=X['Number Of Events'],    # Number of Events on th y-axis
        name=group,    # label continent names on hover
        mode='markers',    # (!) point markers only on this plot
        marker= Marker(
            color=color,          # marker color
            size=sizes,           # (!) marker sizes (sizes is a list)
            sizeref=sizeref,      # link sizeref
            sizemode=sizemode,    # link sizemode
            opacity=0.6,          # (!) partly transparent markers
            line=Line(width=0.0)  # remove marker borders
        )
    )

In [112]:
data = Data()

for group, X in all_events_year.groupby('Group Name'):
    sizes = X['Number of yes_rsvp']
    color = colors[group]
    data.append(make_trace(X,group, sizes,color))
    
# Set plot and axis titles
title = "Meetups in Ireland for Python and R".format(the_year)
x_title = "Rsvp Limit"
y_title = "Number of Events"

# Define a dictionary of axis style options
axis_style = dict(
    zeroline=False,       # remove thick zero line
    gridcolor='#FFFFFF',  # white grid lines
    ticks='outside',      # draw ticks outside axes 
    ticklen=8,            # tick length
    tickwidth=1.5         #   and width
)

# Make layout object
layout = Layout(
    title=title,             # set plot title
    plot_bgcolor='#EFECEA',  # set plot color to grey
    xaxis=XAxis(
        axis_style,      # add axis style dictionary
        title=x_title,   # x-axis title
    ),
    yaxis=YAxis(
        axis_style,      # add axis style dictionary
        title=y_title,   # y-axis title
    )
)

fig = Figure(data=data, layout=layout)

#log axis scales better => more visible results, difference between groups
fig['layout']['xaxis'].update(
type = 'log',
    exponentformat = 'power',
    showexponent = 'all'
)
#update layout object
fig['layout'].update(
    hovermode = 'closest',
    showlegend = True,
    autosize = True,
)

#add hover text information to each trace point 
def hover_text(X):
     return 'Year: %d <br> Group: %s\
     <br> Number of yes_rsvp: %d' % (X["Date"],X['Group Name'], X['Number of yes_rsvp'])
i = 0 
for group, X in all_events_year.groupby("Group Name"):
    text = X.apply(hover_text, axis = 1).tolist()
    fig['data'][i].update(text=text)
    i += 1    


py.iplot(fig)

In [118]:
tls.embed("kevllino","85")