# Making an Activity DataFrame

business_id, stars, review_count, is_open, postal_code, checkins, tipcount

### check = a list of businesses and their check in count

In [250]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

#Full Check in data, seperate hours
df = pd.read_csv("../../data/raw/yelp_checkin.csv") 

#group into businesses and add to make total number of checkins for each business
check = df.groupby("business_id")['checkins'].sum().sort_values(ascending=False)

### Save this to a CSV file (business id, check ins)

In [236]:
check.to_csv("checkins.csv", index='business_id')
#csv file: (business id, total checkins)

 ### Adding check in totals
 - Read in CSV file
 - Read in filtered business file (only useful info)
 - Find the businesses that have no check ins
 - Set their check in total to 0
 - Combine the business info and check in totals to one dataframe

In [238]:
checkins = pd.read_csv("checkins.csv") #read in csv file
checkins.columns = ['business_id', 'checkins'] #assign headings
checkins.set_index('business_id') #set index to business_id

biz = pd.read_csv("../../data/raw/yelp_business.csv") #read in FULL business file
rating = biz.filter(['business_id', 'stars', 'review_count', 'is_open', 'postal_code']) #filter relevant columns
rating.set_index('business_id') #set index to business_id

#These two lines make a dataframe of all the business id's that have NO check ins
common = rating.merge(checkins,on='business_id')
notin = rating[(~rating.business_id.isin(common.business_id))].copy()

#making a list of 0's to combine with id's above
notinCounts = []
for i in range(len(notin)):
    notinCounts.append("0")
    
#Join businesses with no check ins with a column of 0's
notin['checkins'] = pd.Series(notinCounts, index=notin.index)

#merge ratings and checkins to create table of all info with checkins included
activity1 = pd.merge(rating, checkins, on='business_id',left_index=True, right_index=True)
activity1.set_index('business_id')

#Join the table with 0 checkins to the master table of info
activity = pd.concat([activity1, notin])

activity.head()

Unnamed: 0,business_id,stars,review_count,is_open,postal_code,checkins
0,FYWN1wneV18bWNgQjJ2GNg,4.0,22,1,85044,112590
1,He-G7vWjzVUysIKrfNbPUQ,3.0,11,1,15317,49934
2,KQPW8lFf1y5BT2MxiSZ3QA,1.5,18,1,85017,43995
3,8DShNS-LuFqpEWIp0HxijA,3.0,9,0,85282,32603
4,PfOCPjBrlQAnz__NXj9h_w,3.5,116,1,44221,32393


 ### Adding tip count totals
 - Read in CSV file
 - Read in filtered business file (only useful info)
 - Find the businesses that have no tips
 - Set their tip total to 0
 - Combine the business info and tip totals to one dataframe

In [239]:
tips = pd.read_csv("../../data/raw/yelp_tip.csv")

In [240]:
tipCount = tips.groupby("business_id").size().sort_values(ascending=False)
tipCount.to_csv("tipcounts.csv", index='business_id')

This cell uses the same method as above to add tips to the dataframe

In [251]:
tip = pd.read_csv("tipcounts.csv") #read in csv file
tip.columns = ['business_id', 'tipcount'] #assign headings
tip.set_index('business_id') #set index to business_id

activity.set_index('business_id') #set index to business_id
#These two lines make a dataframe of all the business id's that have NO check ins
hasTips = activity.merge(tip,on='business_id')
noTips = activity[(~activity.business_id.isin(hasTips.business_id))].copy()

#making a list of 0's to combine with id's above
noTipCount = []
for i in range(len(noTips)):
    noTipCount.append("0")
    
#Join businesses with no check ins with a column of 0's
noTips['tipcount'] = pd.Series(noTipCount, index=noTips.index)

# merge ratings and checkins to create table of all info with checkins included
activity2 = pd.merge(activity, tip, on='business_id',left_index=True, right_index=True)
activity2.set_index('business_id')

#Join the table with 0 checkins to the master table of info
activityTips = pd.concat([activity2, noTips])

activityTips.head()

Unnamed: 0,business_id,stars,review_count,is_open,postal_code,checkins,tipcount
0,FYWN1wneV18bWNgQjJ2GNg,4.0,22,1,85044,112590,2382
1,He-G7vWjzVUysIKrfNbPUQ,3.0,11,1,15317,49934,1474
2,KQPW8lFf1y5BT2MxiSZ3QA,1.5,18,1,85017,43995,1436
3,8DShNS-LuFqpEWIp0HxijA,3.0,9,0,85282,32603,1346
4,PfOCPjBrlQAnz__NXj9h_w,3.5,116,1,44221,32393,1287


In [252]:
activityTips.tail()

Unnamed: 0,business_id,stars,review_count,is_open,postal_code,checkins,tipcount
174525,Fbn3idu4uKvWVx3jGWDpnw,5.0,3,1,85205,0,0
174528,PCj1wDpVyb6y_h0RulVaHg,4.5,11,1,85282,0,0
174533,6AGvafGH6q6x5yQnHIQwHQ,4.5,23,1,85225,0,0
174561,kLFm_kehXNZkUc1oa2-Eaw,3.0,4,1,M6K 1T5,0,0
174563,gRGalHVu6BcaUDIAGVW_xQ,5.0,3,1,44286,0,0


### Export to CSV for use in other notebooks

In [247]:
activityTips.to_csv("activity.csv", index='business_id')