## Marketing Analytics Project
### Meetup.com Recommendation Systems
#### Data Pre-processing

In [1]:
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
import math

#### Load the Datasets

In [2]:
members = pd.read_csv('members_new.csv',encoding='latin-1')
rsvp = pd.read_csv('rsvps_all_new.csv',encoding='latin-1')
events  = pd.read_csv('events_all_new.csv',encoding='latin-1')
groups = pd.read_csv('groups_austin.csv',encoding='latin-1')

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
## Set to display all the columns
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

#### Members, Groups, Events and RSVP
Get all the required columns from the dataframes

In [5]:
members = members[['group_id','id','name','joined','visited','topics']]
members['id'] = members['id'].astype(int)

In [7]:
groups = groups[['category.id','description','id','members','name','rating','topics','urlname','who']]

In [10]:
events = events[['group.id','group.urlname','group.name','group.who','id']]

In [12]:
rsvp = rsvp[['event.id','group.id','member.member_id','response','rsvp_id']]

#### Number of events organized by each group

In [13]:
## Get the number of events in each group
group_events = pd.DataFrame(events.groupby('group.id')["id"].nunique()).reset_index()
group_events = group_events.rename(columns = {'id':'count_of_events'})

#### Number of RSVPs for each member per group
This is the number of events of a particular group that a member has RSVP'd for.

In [14]:
rsvp_data = pd.DataFrame(rsvp.groupby(['group.id','member.member_id'])['rsvp_id'].count())
rsvp_data = rsvp_data.reset_index().rename(columns = {'rsvp_id':'count_of_rsvps'})

#### Merge the event count and RSVP information with the members dataframe

In [15]:
## Merge members and rsvp_data on member_id and group_id to get the RSVP information for each member and group.
members_rsvp = members.merge(rsvp_data, left_on =['group_id','id'],right_on=['group.id','member.member_id'], how='left')
members_rsvp = members_rsvp[['name','group_id',"id","count_of_rsvps"]]

In [17]:
## Merge the new dataset with group_events to get the number of events organized by each group
members_rsvp = members_rsvp.merge(group_events, left_on ='group_id', right_on ='group.id')

In [18]:
members_rsvp["count_of_rsvps"].fillna(0, inplace=True)
members_rsvp["count_of_events"].fillna(0, inplace=True)

#### Calculate the number of events of a group that a member has RSVP'd for by the total number of events organized by the group

In [19]:
members_rsvp["rsvp_for_events"] = members_rsvp["count_of_rsvps"]/members_rsvp["count_of_events"]
members_rsvp = members_rsvp[["group_id","id","rsvp_for_events",'name']]

In [20]:
members_rsvp["rsvp_for_events"].fillna(0, inplace=True)

In [22]:
## Scale this ratio to be between 1 and 10
members_rsvp['rsvp_total'] = 1 + (members_rsvp['rsvp_for_events']-members_rsvp['rsvp_for_events'].min())*(10-1)/(members_rsvp['rsvp_for_events'].max()-members_rsvp['rsvp_for_events'].min())

#### Save this into a .csv file for further processing

In [24]:
members_rsvp.to_csv("members_rsvp.csv")