# Exercises

Download the dataset from http://yelp.com/dataset_challenge

Use the business table to answer questions 1-4.

In [129]:
import json as simplejson
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

file = 'Data/yelp_academic_dataset_business.json'
business_data = []

with open(file) as f:
    for line in f:
        business_data.append(simplejson.loads(line))
                             
df = pd.DataFrame(business_data[:3000])
df.columns

Index(['attributes', 'business_id', 'categories', 'city', 'full_address',
       'hours', 'latitude', 'longitude', 'name', 'neighborhoods', 'open',
       'review_count', 'stars', 'state', 'type'],
      dtype='object')

## 1. Create a new column that contains only the zipcode.

In [130]:
df['full_address']

0            4734 Lebanon Church Rd\nDravosburg, PA 15034
1                    202 McClure St\nDravosburg, PA 15034
2                 1530 Hamilton Rd\nBethel Park, PA 15234
3                     414 Hawkins Ave\nBraddock, PA 15104
4                   1000 Clubhouse Dr\nBraddock, PA 15104
5         141 Hawthorne St\nGreentree\nCarnegie, PA 15106
6                 718A Hope Hollow Rd\nCarnegie, PA 15106
7           920 Forsythe Rd\nCarnegie\nCarnegie, PA 15106
8                8 Logan St\nCarnegie\nCarnegie, PA 15106
9                 2080 Greentree Rd\nPittsburgh, PA 15220
10        300 Beechwood Ave\nCarnegie\nCarnegie, PA 15106
11                1011 Washington Ave\nCarnegie, PA 15106
12               2100 Washington Pike\nCarnegie, PA 15106
13               2100 Washington Pike\nCarnegie, PA 15106
14            341 E Main St\nCarnegie\nCarnegie, PA 15106
15        Bower Hill Rd & Vanadium Rd\nCarnegie, PA 15106
16                 1927 E Railroad St\nCarnegie, PA 15106
17            

In [131]:
def extract_zipcode(string):
    address = string.split(" ")
    zipcode = address[len(address)-1]
    return zipcode

df['zip_code'] = df['full_address'].apply(extract_zipcode, 1) #only selecting rows with zip codes in the 5 digit format
df[:20]

Unnamed: 0,attributes,business_id,categories,city,full_address,hours,latitude,longitude,name,neighborhoods,open,review_count,stars,state,type,zip_code
0,"{'Delivery': False, 'Caters': False, 'Takes Re...",5UmKMjUEUNdYWqANhGckJw,"[Fast Food, Restaurants]",Dravosburg,"4734 Lebanon Church Rd\nDravosburg, PA 15034","{'Thursday': {'close': '21:00', 'open': '11:00...",40.354327,-79.900706,Mr Hoagie,[],True,7,3.5,PA,business,15034
1,"{'Accepts Credit Cards': True, 'Good For Group...",UsFtqoBl7naz8AVUBZMjQQ,[Nightlife],Dravosburg,"202 McClure St\nDravosburg, PA 15034",{},40.350553,-79.886814,Clancy's Pub,[],True,5,3.0,PA,business,15034
2,{'Good for Kids': True},cE27W9VPgO88Qxe4ol6y_g,"[Active Life, Mini Golf, Golf]",Bethel Park,"1530 Hamilton Rd\nBethel Park, PA 15234",{},40.354115,-80.01466,Cool Springs Golf Center,[],False,5,2.5,PA,business,15234
3,"{'Attire': 'casual', 'Happy Hour': False, 'Wai...",mVHrayjG3uZ_RLHkLj-AMg,"[Bars, American (New), Nightlife, Lounges, Res...",Braddock,"414 Hawkins Ave\nBraddock, PA 15104","{'Thursday': {'close': '19:00', 'open': '10:00...",40.40883,-79.866211,Emil's Lounge,[],True,26,4.5,PA,business,15104
4,"{'Accepts Credit Cards': True, 'Takes Reservat...",mYSpR_SLPgUVymYOvTQd_Q,"[Active Life, Golf]",Braddock,"1000 Clubhouse Dr\nBraddock, PA 15104","{'Saturday': {'close': '20:00', 'open': '11:00...",40.403405,-79.855782,Grand View Golf Club,[],True,3,5.0,PA,business,15104
5,"{'Attire': 'casual', 'Happy Hour': True, 'Wait...",KayYbHCt-RkbGcPdGOThNg,"[Bars, American (Traditional), Nightlife, Rest...",Carnegie,"141 Hawthorne St\nGreentree\nCarnegie, PA 15106","{'Tuesday': {'close': '02:00', 'open': '11:00'...",40.415486,-80.067549,Alexion's Bar & Grill,[Greentree],True,23,4.0,PA,business,15106
6,{'Accepts Credit Cards': True},b12U9TFESStdy7CsTtcOeg,"[Auto Repair, Automotive, Tires]",Carnegie,"718A Hope Hollow Rd\nCarnegie, PA 15106","{'Tuesday': {'close': '18:00', 'open': '07:30'...",40.394588,-80.084454,Flynn's Tire & Auto Service,[],True,9,2.5,PA,business,15106
7,{'Good for Kids': True},Sktj1eHQFuVa-M4bgnEh8g,"[Active Life, Mini Golf]",Carnegie,"920 Forsythe Rd\nCarnegie\nCarnegie, PA 15106",{},40.405404,-80.076267,Forsythe Miniature Golf & Snacks,[Carnegie],False,4,4.0,PA,business,15106
8,{},3ZVKmuK2l7uXPE6lXY4Dbg,"[Roofing, Home Services, Decks & Railing, Cont...",Carnegie,"8 Logan St\nCarnegie\nCarnegie, PA 15106",{},40.406281,-80.09039,Quaker State Construction,[Carnegie],True,3,2.5,PA,business,15106
9,{},QoDa50dc7g62xciFygXB9w,"[Veterinarians, Pets]",Pittsburgh,"2080 Greentree Rd\nPittsburgh, PA 15220",{},40.392207,-80.069134,Greentree Animal Clinic,[],True,7,4.0,PA,business,15220


## 2. The table contains a column called 'categories' and each entry in this column is populated by a list. We are interested in those businesses that are restaurants. Create a new column 'restaurant_type' that contains a description of the restaurant based on the other elements of 'categories. 
That is, if we have '[Sushi Bars, Japanese, Restaurants]' in categories the 'restaurant_type will be '{'SushiBars': 1, 'Japanese': 1, 'Mexican': 0, ...}'



In [132]:
all_restaurant_categories = {}
for index, row in df.iterrows():
    if row['categories'] is not None:
        if 'Restaurants' in row['categories']:
            for item in row['categories']:
                if item != 'Restaurants':
                    all_restaurant_categories[item] = 0
list(all_restaurant_categories)[:30]

['Italian',
 'Hotels',
 'Buffets',
 'Vietnamese',
 'Tapas/Small Plates',
 'Desserts',
 'British',
 'Salvadoran',
 'Bakeries',
 'Convenience Stores',
 'Hot Dogs',
 'Meat Shops',
 'Noodles',
 'Tex-Mex',
 'American (New)',
 'Soul Food',
 'Latin American',
 'Dance Clubs',
 'Mexican',
 'Antiques',
 'Salad',
 'Polish',
 'Cafes',
 'Diners',
 'Vegetarian',
 'Shopping',
 'Caterers',
 'Food',
 'Middle Eastern',
 'Comfort Food']

In [133]:
def category(row):
    d = dict(all_restaurant_categories)
    if row['categories'] is not None:
        if 'Restaurants' in row['categories']:
            for item in row['categories']:
                if item in d.keys():
                    d[item]=1
                else:
                    d[item]=0
    else:
        d = 'Remove'
    return d

In [135]:
df['restaurant_type'] = df.apply(lambda x: category(x), 1)
df[:40]

Unnamed: 0,attributes,business_id,categories,city,full_address,hours,latitude,longitude,name,neighborhoods,open,review_count,stars,state,type,zip_code,restaurant_type
0,"{'Delivery': False, 'Caters': False, 'Takes Re...",5UmKMjUEUNdYWqANhGckJw,"[Fast Food, Restaurants]",Dravosburg,"4734 Lebanon Church Rd\nDravosburg, PA 15034","{'Thursday': {'close': '21:00', 'open': '11:00...",40.354327,-79.900706,Mr Hoagie,[],True,7,3.5,PA,business,15034,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."
1,"{'Accepts Credit Cards': True, 'Good For Group...",UsFtqoBl7naz8AVUBZMjQQ,[Nightlife],Dravosburg,"202 McClure St\nDravosburg, PA 15034",{},40.350553,-79.886814,Clancy's Pub,[],True,5,3.0,PA,business,15034,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."
2,{'Good for Kids': True},cE27W9VPgO88Qxe4ol6y_g,"[Active Life, Mini Golf, Golf]",Bethel Park,"1530 Hamilton Rd\nBethel Park, PA 15234",{},40.354115,-80.01466,Cool Springs Golf Center,[],False,5,2.5,PA,business,15234,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."
3,"{'Attire': 'casual', 'Happy Hour': False, 'Wai...",mVHrayjG3uZ_RLHkLj-AMg,"[Bars, American (New), Nightlife, Lounges, Res...",Braddock,"414 Hawkins Ave\nBraddock, PA 15104","{'Thursday': {'close': '19:00', 'open': '10:00...",40.40883,-79.866211,Emil's Lounge,[],True,26,4.5,PA,business,15104,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."
4,"{'Accepts Credit Cards': True, 'Takes Reservat...",mYSpR_SLPgUVymYOvTQd_Q,"[Active Life, Golf]",Braddock,"1000 Clubhouse Dr\nBraddock, PA 15104","{'Saturday': {'close': '20:00', 'open': '11:00...",40.403405,-79.855782,Grand View Golf Club,[],True,3,5.0,PA,business,15104,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."
5,"{'Attire': 'casual', 'Happy Hour': True, 'Wait...",KayYbHCt-RkbGcPdGOThNg,"[Bars, American (Traditional), Nightlife, Rest...",Carnegie,"141 Hawthorne St\nGreentree\nCarnegie, PA 15106","{'Tuesday': {'close': '02:00', 'open': '11:00'...",40.415486,-80.067549,Alexion's Bar & Grill,[Greentree],True,23,4.0,PA,business,15106,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."
6,{'Accepts Credit Cards': True},b12U9TFESStdy7CsTtcOeg,"[Auto Repair, Automotive, Tires]",Carnegie,"718A Hope Hollow Rd\nCarnegie, PA 15106","{'Tuesday': {'close': '18:00', 'open': '07:30'...",40.394588,-80.084454,Flynn's Tire & Auto Service,[],True,9,2.5,PA,business,15106,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."
7,{'Good for Kids': True},Sktj1eHQFuVa-M4bgnEh8g,"[Active Life, Mini Golf]",Carnegie,"920 Forsythe Rd\nCarnegie\nCarnegie, PA 15106",{},40.405404,-80.076267,Forsythe Miniature Golf & Snacks,[Carnegie],False,4,4.0,PA,business,15106,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."
8,{},3ZVKmuK2l7uXPE6lXY4Dbg,"[Roofing, Home Services, Decks & Railing, Cont...",Carnegie,"8 Logan St\nCarnegie\nCarnegie, PA 15106",{},40.406281,-80.09039,Quaker State Construction,[Carnegie],True,3,2.5,PA,business,15106,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."
9,{},QoDa50dc7g62xciFygXB9w,"[Veterinarians, Pets]",Pittsburgh,"2080 Greentree Rd\nPittsburgh, PA 15220",{},40.392207,-80.069134,Greentree Animal Clinic,[],True,7,4.0,PA,business,15220,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie..."


## 3. Lets clean the 'attributes' column. The entries in this column are dictionaries. We need to do two things:

 - Turn all the True or False values in the dictionary to 1 and 0.

 - There are some entries within dictionaries that are dictionaries themselves, lets turn the whole entry into just one dictionary, for example if we have

  '{'Accepts Credit Cards': True, 'Alcohol': 'none','Ambience': {'casual': False,'classy': False}}'

 then turn it into

 '{'Accepts Credit Cards':1, 'Alcohol': 0, 'Ambience_casual': 0, 'Ambience_classy': 0}'.

 There might be other entries like {'Price Range': 1} where the values are numerical so we might want to change that into {'Price_Range_1': 1}.



In [136]:
df.attributes[:15]

0     {'Delivery': False, 'Caters': False, 'Takes Re...
1     {'Accepts Credit Cards': True, 'Good For Group...
2                               {'Good for Kids': True}
3     {'Attire': 'casual', 'Happy Hour': False, 'Wai...
4     {'Accepts Credit Cards': True, 'Takes Reservat...
5     {'Attire': 'casual', 'Happy Hour': True, 'Wait...
6                        {'Accepts Credit Cards': True}
7                               {'Good for Kids': True}
8                                                    {}
9                                                    {}
10                                    {'Wi-Fi': 'free'}
11                                                   {}
12    {'Delivery': False, 'Takes Reservations': Fals...
13    {'Accepts Credit Cards': True, 'Wheelchair Acc...
14                                                   {}
Name: attributes, dtype: object

In [137]:
def flatten(row):
    d = row['attributes']
    dl = {}
    
    for item in d:
        
        # boolean 1 and 0
        if type(d[item]) == bool:
            dl[item] = int(d[item])
            
        # join strings
        elif type(d[item]) == str:
            label = '{}_{}'.format(item,d[item])
            dl[label] = 1
            
        # join integers
        elif type(d[item]) == int:
            label = '{}_{}'.format(item,str(d[item]))
            dl[label] = 1
            
        # nested dictionaries
        elif type(d[item]) == dict:
            for element in d[item].keys(): #iterate over keys in dictionary 
                label = '{}_{}'.format(item,element)
                dl[label] = int(d[item][element])
                
        #edge case check
        else:
            print(item,d[item])
    
    return dl

In [138]:
df['attributes_cleaned'] = df.apply(lambda x: flatten(x),1)
df[:20]

Unnamed: 0,attributes,business_id,categories,city,full_address,hours,latitude,longitude,name,neighborhoods,open,review_count,stars,state,type,zip_code,restaurant_type,attributes_cleaned
0,"{'Delivery': False, 'Caters': False, 'Takes Re...",5UmKMjUEUNdYWqANhGckJw,"[Fast Food, Restaurants]",Dravosburg,"4734 Lebanon Church Rd\nDravosburg, PA 15034","{'Thursday': {'close': '21:00', 'open': '11:00...",40.354327,-79.900706,Mr Hoagie,[],True,7,3.5,PA,business,15034,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Delivery': 0, 'Caters': 0, 'Takes Reservatio..."
1,"{'Accepts Credit Cards': True, 'Good For Group...",UsFtqoBl7naz8AVUBZMjQQ,[Nightlife],Dravosburg,"202 McClure St\nDravosburg, PA 15034",{},40.350553,-79.886814,Clancy's Pub,[],True,5,3.0,PA,business,15034,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Accepts Credit Cards': 1, 'Price Range_1': 1..."
2,{'Good for Kids': True},cE27W9VPgO88Qxe4ol6y_g,"[Active Life, Mini Golf, Golf]",Bethel Park,"1530 Hamilton Rd\nBethel Park, PA 15234",{},40.354115,-80.01466,Cool Springs Golf Center,[],False,5,2.5,PA,business,15234,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",{'Good for Kids': 1}
3,"{'Attire': 'casual', 'Happy Hour': False, 'Wai...",mVHrayjG3uZ_RLHkLj-AMg,"[Bars, American (New), Nightlife, Lounges, Res...",Braddock,"414 Hawkins Ave\nBraddock, PA 15104","{'Thursday': {'close': '19:00', 'open': '10:00...",40.40883,-79.866211,Emil's Lounge,[],True,26,4.5,PA,business,15104,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Good For Groups': 1, 'Delivery': 0, 'Caters'..."
4,"{'Accepts Credit Cards': True, 'Takes Reservat...",mYSpR_SLPgUVymYOvTQd_Q,"[Active Life, Golf]",Braddock,"1000 Clubhouse Dr\nBraddock, PA 15104","{'Saturday': {'close': '20:00', 'open': '11:00...",40.403405,-79.855782,Grand View Golf Club,[],True,3,5.0,PA,business,15104,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Parking_validated': 0, 'Parking_lot': 0, 'Ta..."
5,"{'Attire': 'casual', 'Happy Hour': True, 'Wait...",KayYbHCt-RkbGcPdGOThNg,"[Bars, American (Traditional), Nightlife, Rest...",Carnegie,"141 Hawthorne St\nGreentree\nCarnegie, PA 15106","{'Tuesday': {'close': '02:00', 'open': '11:00'...",40.415486,-80.067549,Alexion's Bar & Grill,[Greentree],True,23,4.0,PA,business,15106,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Good For Groups': 1, 'Delivery': 0, 'Caters'..."
6,{'Accepts Credit Cards': True},b12U9TFESStdy7CsTtcOeg,"[Auto Repair, Automotive, Tires]",Carnegie,"718A Hope Hollow Rd\nCarnegie, PA 15106","{'Tuesday': {'close': '18:00', 'open': '07:30'...",40.394588,-80.084454,Flynn's Tire & Auto Service,[],True,9,2.5,PA,business,15106,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",{'Accepts Credit Cards': 1}
7,{'Good for Kids': True},Sktj1eHQFuVa-M4bgnEh8g,"[Active Life, Mini Golf]",Carnegie,"920 Forsythe Rd\nCarnegie\nCarnegie, PA 15106",{},40.405404,-80.076267,Forsythe Miniature Golf & Snacks,[Carnegie],False,4,4.0,PA,business,15106,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",{'Good for Kids': 1}
8,{},3ZVKmuK2l7uXPE6lXY4Dbg,"[Roofing, Home Services, Decks & Railing, Cont...",Carnegie,"8 Logan St\nCarnegie\nCarnegie, PA 15106",{},40.406281,-80.09039,Quaker State Construction,[Carnegie],True,3,2.5,PA,business,15106,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",{}
9,{},QoDa50dc7g62xciFygXB9w,"[Veterinarians, Pets]",Pittsburgh,"2080 Greentree Rd\nPittsburgh, PA 15220",{},40.392207,-80.069134,Greentree Animal Clinic,[],True,7,4.0,PA,business,15220,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",{}


## 4. Create a new column for every day of the week and fill it with the amount of hours the business is open that day.

In [139]:
from datetime import datetime


def calculate_hours(row, date):
    try:
        hours_for_day = row['hours'][date]
        close_time = datetime.strptime(hours_for_day['close'], '%H:%M')
        open_time = datetime.strptime(hours_for_day['open'], '%H:%M')
        hours_open = close_time - open_time
                
        if hours_open < timedelta(days=0):
            hours_open += timedelta(days=1)
    except:
        hours_open = timedelta(days=0)
    
    return hours_open

days = ['Tuesday', 'Sunday', 'Monday', 'Thursday', 'Saturday', 'Friday', 'Wednesday']
for day in days:
    df[day] = df.apply(lambda x: calculate_hours(x,day),1)   
    
df.head()

Unnamed: 0,attributes,business_id,categories,city,full_address,hours,latitude,longitude,name,neighborhoods,...,zip_code,restaurant_type,attributes_cleaned,Tuesday,Sunday,Monday,Thursday,Saturday,Friday,Wednesday
0,"{'Delivery': False, 'Caters': False, 'Takes Re...",5UmKMjUEUNdYWqANhGckJw,"[Fast Food, Restaurants]",Dravosburg,"4734 Lebanon Church Rd\nDravosburg, PA 15034","{'Thursday': {'close': '21:00', 'open': '11:00...",40.354327,-79.900706,Mr Hoagie,[],...,15034,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Delivery': 0, 'Caters': 0, 'Takes Reservatio...",10:00:00,00:00:00,10:00:00,10:00:00,00:00:00,10:00:00,10:00:00
1,"{'Accepts Credit Cards': True, 'Good For Group...",UsFtqoBl7naz8AVUBZMjQQ,[Nightlife],Dravosburg,"202 McClure St\nDravosburg, PA 15034",{},40.350553,-79.886814,Clancy's Pub,[],...,15034,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Accepts Credit Cards': 1, 'Price Range_1': 1...",00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00
2,{'Good for Kids': True},cE27W9VPgO88Qxe4ol6y_g,"[Active Life, Mini Golf, Golf]",Bethel Park,"1530 Hamilton Rd\nBethel Park, PA 15234",{},40.354115,-80.01466,Cool Springs Golf Center,[],...,15234,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",{'Good for Kids': 1},00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00
3,"{'Attire': 'casual', 'Happy Hour': False, 'Wai...",mVHrayjG3uZ_RLHkLj-AMg,"[Bars, American (New), Nightlife, Lounges, Res...",Braddock,"414 Hawkins Ave\nBraddock, PA 15104","{'Thursday': {'close': '19:00', 'open': '10:00...",40.40883,-79.866211,Emil's Lounge,[],...,15104,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Good For Groups': 1, 'Delivery': 0, 'Caters'...",09:00:00,00:00:00,00:00:00,09:00:00,06:00:00,10:00:00,09:00:00
4,"{'Accepts Credit Cards': True, 'Takes Reservat...",mYSpR_SLPgUVymYOvTQd_Q,"[Active Life, Golf]",Braddock,"1000 Clubhouse Dr\nBraddock, PA 15104","{'Saturday': {'close': '20:00', 'open': '11:00...",40.403405,-79.855782,Grand View Golf Club,[],...,15104,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Parking_validated': 0, 'Parking_lot': 0, 'Ta...",00:00:00,05:00:00,00:00:00,09:00:00,09:00:00,09:00:00,09:00:00


## 5. Create a table with the average star review for a business from the review table.

In [140]:
reviewfile = 'Data/yelp_academic_dataset_review.json'
review_data = []

with open(file) as f:
    for line in f:
        review_data.append(simplejson.loads(line))
                             
df2 = pd.DataFrame(review_data[:3000])
df2.columns

Index(['attributes', 'business_id', 'categories', 'city', 'full_address',
       'hours', 'latitude', 'longitude', 'name', 'neighborhoods', 'open',
       'review_count', 'stars', 'state', 'type'],
      dtype='object')

In [141]:
avgR = df2.groupby('business_id')['stars'].mean().reset_index()
avgR.columns = ['business_id','star_avg']
avgR.head()

Unnamed: 0,business_id,star_avg
0,--7PRjnsjMA6uhPK8mW13Q,2.5
1,--UE_y6auTgq3FXlvUMkbw,3.5
2,-1oj6-fkP-iO6OyW8QBqPg,3.5
3,-1t3U6osBvqFLHseoCxiIA,2.5
4,-2NCvK5807Nxs9GUb2Eo1g,3.5


In [142]:
# use pivot tables to do averages

review_pivot = pd.DataFrame(pd.pivot_table(df2, values='stars', columns=['name'], aggfunc=np.mean))
review_pivot
#review_pivot['stars']['007 Nails']

Unnamed: 0_level_0,stars
name,Unnamed: 1_level_1
007 Nails,3.00
1902 Tavern,2.50
1st Way,3.00
2001 Cleaners Inc,4.00
3rd Phaze Body Oils Inc.,5.00
5 & Diner,3.50
7-Eleven,2.25
8th Street Studio,5.00
99 Ranch Market,4.00
A & A International Food,5.00


##  6. Create a new table that only contains restaurants with the following schema:
Business_Name | Restaurant_type | Friday hours | Saturday hours | Attributes | Zipcode | Average Rating




In [143]:
dfAll = df.merge(avgR, how='left',on = 'business_id')
dfAll.head()


Unnamed: 0,attributes,business_id,categories,city,full_address,hours,latitude,longitude,name,neighborhoods,...,restaurant_type,attributes_cleaned,Tuesday,Sunday,Monday,Thursday,Saturday,Friday,Wednesday,star_avg
0,"{'Delivery': False, 'Caters': False, 'Takes Re...",5UmKMjUEUNdYWqANhGckJw,"[Fast Food, Restaurants]",Dravosburg,"4734 Lebanon Church Rd\nDravosburg, PA 15034","{'Thursday': {'close': '21:00', 'open': '11:00...",40.354327,-79.900706,Mr Hoagie,[],...,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Delivery': 0, 'Caters': 0, 'Takes Reservatio...",10:00:00,00:00:00,10:00:00,10:00:00,00:00:00,10:00:00,10:00:00,3.5
1,"{'Accepts Credit Cards': True, 'Good For Group...",UsFtqoBl7naz8AVUBZMjQQ,[Nightlife],Dravosburg,"202 McClure St\nDravosburg, PA 15034",{},40.350553,-79.886814,Clancy's Pub,[],...,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Accepts Credit Cards': 1, 'Price Range_1': 1...",00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,3.0
2,{'Good for Kids': True},cE27W9VPgO88Qxe4ol6y_g,"[Active Life, Mini Golf, Golf]",Bethel Park,"1530 Hamilton Rd\nBethel Park, PA 15234",{},40.354115,-80.01466,Cool Springs Golf Center,[],...,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",{'Good for Kids': 1},00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,00:00:00,2.5
3,"{'Attire': 'casual', 'Happy Hour': False, 'Wai...",mVHrayjG3uZ_RLHkLj-AMg,"[Bars, American (New), Nightlife, Lounges, Res...",Braddock,"414 Hawkins Ave\nBraddock, PA 15104","{'Thursday': {'close': '19:00', 'open': '10:00...",40.40883,-79.866211,Emil's Lounge,[],...,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Good For Groups': 1, 'Delivery': 0, 'Caters'...",09:00:00,00:00:00,00:00:00,09:00:00,06:00:00,10:00:00,09:00:00,4.5
4,"{'Accepts Credit Cards': True, 'Takes Reservat...",mYSpR_SLPgUVymYOvTQd_Q,"[Active Life, Golf]",Braddock,"1000 Clubhouse Dr\nBraddock, PA 15104","{'Saturday': {'close': '20:00', 'open': '11:00...",40.403405,-79.855782,Grand View Golf Club,[],...,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...","{'Parking_validated': 0, 'Parking_lot': 0, 'Ta...",00:00:00,05:00:00,00:00:00,09:00:00,09:00:00,09:00:00,09:00:00,5.0


In [146]:
dfAll = dfAll[dfAll['restaurant_type']!='Remove']
dfAll = dfAll[['name','restaurant_type','Friday','Saturday','attributes_cleaned','zip_code','star_avg']]
dfAll

Unnamed: 0,name,restaurant_type,Friday,Saturday,attributes_cleaned,zip_code,star_avg
0,Mr Hoagie,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",10:00:00,00:00:00,"{'Delivery': 0, 'Caters': 0, 'Takes Reservatio...",15034,3.5
1,Clancy's Pub,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",00:00:00,00:00:00,"{'Accepts Credit Cards': 1, 'Price Range_1': 1...",15034,3.0
2,Cool Springs Golf Center,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",00:00:00,00:00:00,{'Good for Kids': 1},15234,2.5
3,Emil's Lounge,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",10:00:00,06:00:00,"{'Good For Groups': 1, 'Delivery': 0, 'Caters'...",15104,4.5
4,Grand View Golf Club,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",09:00:00,09:00:00,"{'Parking_validated': 0, 'Parking_lot': 0, 'Ta...",15104,5.0
5,Alexion's Bar & Grill,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",15:00:00,14:00:00,"{'Good For Groups': 1, 'Delivery': 0, 'Caters'...",15106,4.0
6,Flynn's Tire & Auto Service,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",10:30:00,08:30:00,{'Accepts Credit Cards': 1},15106,2.5
7,Forsythe Miniature Golf & Snacks,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",00:00:00,00:00:00,{'Good for Kids': 1},15106,4.0
8,Quaker State Construction,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",00:00:00,00:00:00,{},15106,2.5
9,Greentree Animal Clinic,"{'Italian': 0, 'Hotels': 0, 'Buffets': 0, 'Vie...",00:00:00,00:00:00,{},15220,4.0
