# Data Pipeline

* Dataset comes from https://www.yelp.com/dataset/challenge
* This notebook will parse the relevant features from each dataset into one df to use

In [45]:
import databricks.koalas as ks
import pandas as pd
pd.set_option('display.max_columns', None)
import time
import json
import os
from collections import Counter

from sklearn import preprocessing

In [33]:
import pickle
import sys

def save_as_pickled_object(obj, filepath):
    """
    This is a defensive way to write pickle.write, allowing for very large files on all platforms
    """
    max_bytes = 2**31 - 1
    bytes_out = pickle.dumps(obj)
    n_bytes = sys.getsizeof(bytes_out)
    with open(filepath, 'wb') as f_out:
        for idx in range(0, n_bytes, max_bytes):
            f_out.write(bytes_out[idx:idx+max_bytes])

# Load Yelp Dataset into DF

To parse the yelp dataset into a dataframe

## Load Datasets

In [34]:
def load_dataset(file):
    '''
    reads in a json file as a list, then transformed into a pandas df, and outputs to a pickle file for easier access
    
    input: json filepath
    output: pandas dataframe
    '''
    #reads in the json file
    start = time.time()
    raw_data = []
    with open(file) as f:
        for line in f:
            raw_data.append(json.loads(line))
    end = time.time()
    print('loaded {} lines of data into a list in {} seconds'.format(len(raw_data), end-start))

    # stores data into a df
    start = time.time()
    order = list(raw_data[0].keys())
    df = pd.DataFrame.from_dict(raw_data, orient='columns')
    df = df[order]
    end = time.time()
    print('loaded {} lines of data into a df in {} seconds'.format(len(df), end-start))
    
    start = time.time()
    filename = os.path.basename(file)
    name = filename[:filename.find('.')] + '_df.pkl'
    if(filename == 'review.json'):
        save_as_pickled_object(df, '../data/review_df.pkl')
    else:
        df.to_pickle('../data/' + name)
    end = time.time()
    print('{} has been converted to {} in {} seconds'.format(filename, name, end-start))

    return df

In [35]:
file = '../data/yelp_dataset'

review_raw_data = load_dataset(file)

loaded in 6685900 lines of data into a list in 67.61171507835388 seconds
loaded 6685900 lines as a df in 142.59094905853271 seconds
review.json has been converted to review_df.pkl in 159.64053511619568 seconds


In [49]:
folder = '../data/yelp_dataset/'

start = time.time()
json_file_list = []
for file in os.listdir(folder):
    if file.endswith(".json"):
        json_file_list.append(folder+file)
        print('found ' + file)               

for file in json_file_list:
    print('\nloading {}...'.format(file))
    load_dataset(file)
end = time.time()
    
print('\nFinished loading all json files! This took {} seconds.'.format(end-start))

found business.json
found review_sample.json
found user.json
found checkin.json
found tip.json
found review.json
found photo.json

loading ../data/yelp_dataset/business.json...
loaded in 192609 lines of data into a list in 264.7689700126648 seconds
loaded 192609 lines as a df in 1.0468401908874512 seconds
business.json has been converted to business_df.pkl in 2.9696381092071533 seconds
loading ../data/yelp_dataset/review_sample.json...
loaded in 5000 lines of data into a list in 0.04626607894897461 seconds
loaded 5000 lines as a df in 0.03955817222595215 seconds
review_sample.json has been converted to review_sample_df.pkl in 0.015414953231811523 seconds
loading ../data/yelp_dataset/user.json...
loaded in 1637138 lines of data into a list in 105.56039309501648 seconds
loaded 1637138 lines as a df in 28.80335807800293 seconds
user.json has been converted to user_df.pkl in 19.84369993209839 seconds
loading ../data/yelp_dataset/checkin.json...
loaded in 161950 lines of data into a list in

## See Schema

In [50]:
for file in json_file_list:
    raw_data = []
    with open(file) as f:
        for line in f:
            raw_data.append(json.loads(line))
    print('printing schema for {}, {} rows'.format(file, len(raw_data)))
    schema = list(raw_data[0].keys())
    print(schema)

printing schema for ../data/yelp_dataset/business.json, 192609 rows
['business_id', 'name', 'address', 'city', 'state', 'postal_code', 'latitude', 'longitude', 'stars', 'review_count', 'is_open', 'attributes', 'categories', 'hours']
printing schema for ../data/yelp_dataset/review_sample.json, 5000 rows
['review_id', 'user_id', 'business_id', 'stars', 'useful', 'funny', 'cool', 'text', 'date']
printing schema for ../data/yelp_dataset/user.json, 1637138 rows
['user_id', 'name', 'review_count', 'yelping_since', 'useful', 'funny', 'cool', 'elite', 'friends', 'fans', 'average_stars', 'compliment_hot', 'compliment_more', 'compliment_profile', 'compliment_cute', 'compliment_list', 'compliment_note', 'compliment_plain', 'compliment_cool', 'compliment_funny', 'compliment_writer', 'compliment_photos']
printing schema for ../data/yelp_dataset/checkin.json, 161950 rows
['business_id', 'date']
printing schema for ../data/yelp_dataset/tip.json, 1223094 rows
['user_id', 'business_id', 'text', 'date',

## Peak into each DF

In [73]:
business_df = pd.read_pickle('../data/business_df.pkl')
print(business_df.shape)
business_df.head(2)

(192609, 14)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,1SWheh84yJXfytovILXOAQ,Arizona Biltmore Golf Club,2818 E Camino Acequia Drive,Phoenix,AZ,85016,33.522143,-112.018481,3.0,5,0,{'GoodForKids': 'False'},"Golf, Active Life",
1,QXAEGFB4oINsVuTFxEYKFQ,Emerald Chinese Restaurant,30 Eglinton Avenue W,Mississauga,ON,L5R 3E7,43.605499,-79.652289,2.5,128,1,"{'RestaurantsReservations': 'True', 'GoodForMe...","Specialty Food, Restaurants, Dim Sum, Imported...","{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W..."


In [84]:
user_df = pd.read_pickle('../data/user_df.pkl')
print(user_df.shape)
user_df.head(2)

(1637138, 22)


Unnamed: 0,user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos
0,l6BmjZMeQD3rDxWUbiAiow,Rashmi,95,2013-10-08 23:11:33,84,17,25,201520162017.0,"c78V-rj8NQcQjOI8KP3UEA, alRMgPcngYSCJ5naFRBz5g...",5,4.03,2,0,0,0,0,1,1,1,1,2,0
1,4XChL029mKr5hydo79Ljxg,Jenna,33,2013-02-21 22:29:06,48,22,16,,"kEBTgDvFX754S68FllfCaA, aB2DynOxNOJK9st2ZeGTPg...",4,3.63,1,0,0,0,0,0,0,1,1,0,0


In [8]:
checkin_df = pd.read_pickle('../data/checkin_df.pkl')
print(checkin_df.shape)
checkin_df.head(2)

(161950, 2)


Unnamed: 0,business_id,date
0,--1UhMGODdWsrMastO9DZw,"2016-04-26 19:49:16, 2016-08-30 18:36:57, 2016..."
1,--6MefnULPED_I942VcFNA,"2011-06-04 18:22:23, 2011-07-23 23:51:33, 2012..."


In [10]:
tip_df = pd.read_pickle('../data/tip_df.pkl')
print(tip_df.shape)
tip_df.head(2)

(1223094, 5)


Unnamed: 0,user_id,business_id,text,date,compliment_count
0,UPw5DWs_b-e2JRBS-t37Ag,VaKXUpmWTTWDKbpJ3aQdMw,"Great for watching games, ufc, and whatever el...",2014-03-27 03:51:24,0
1,Ocha4kZBHb4JK0lOWvE0sg,OPiPeoJiv92rENwbq76orA,Happy Hour 2-4 daily with 1/2 price drinks and...,2013-05-25 06:00:56,0


In [87]:
review_df = pd.read_pickle('../data/review_df.pkl')
print(review_df.shape)
review_df.head(2)

(6685900, 9)


Unnamed: 0,review_id,user_id,business_id,stars,useful,funny,cool,text,date
0,Q1sbwvVQXV2734tPgoKj4Q,hG7b0MtEbXx5QzbzE6C_VA,ujmEBvifdJM6h6RLv4wQIg,1.0,6,1,0,Total bill for this horrible service? Over $8G...,2013-05-07 04:34:36
1,GJXCdrto3ASJOqKeVWPi6Q,yXQM5uF2jS6es16SJzNHfg,NZnhc2sEQy3RmzKTZnqtwQ,5.0,0,0,0,I *adore* Travis at the Hard Rock's new Kelly ...,2017-01-14 21:30:33


In [12]:
photo_df = pd.read_pickle('../data/photo_df.pkl')
print(photo_df.shape)
photo_df.head(2)

(200000, 4)


Unnamed: 0,caption,photo_id,business_id,label
0,,MllA1nNpcp1kDteVg6OGUw,rcaPajgKOJC2vo_l3xa42A,inside
1,,YjxBE88Bf6CmTEF2LP1UNA,Kn23LDd740SBVJ7mum0fwg,inside


In [15]:
min(review_df['date']), max(review_df['date'])

('2004-10-12 10:13:32', '2018-11-14 18:13:26')

## Create Combined DF
add new computed columns and drop the columns used to create them

### exploring and re-creating 'user_df'

adds likes, compliments, elite columns into one column that is more readable for use in EDA/modelling

In [89]:
user_df['likes'] = user_df['useful'] + user_df['funny']+ user_df['cool']
user_df['compliments'] = user_df['compliment_hot'] + user_df['compliment_more']+ user_df['compliment_profile']+ user_df['compliment_cute']+ user_df['compliment_list']+ user_df['compliment_note']+ user_df['compliment_plain']+ user_df['compliment_cool']+ user_df['compliment_funny']+ user_df['compliment_writer']+ user_df['compliment_photos']
user_df['elite_years'] = user_df['elite'].apply(lambda x: len(x.split(',')))
user_df = user_df.drop(['useful', 'funny', 'cool', 'elite', 'compliment_hot', 'compliment_more', 'compliment_profile', 'compliment_cute', 'compliment_list', 'compliment_note', 'compliment_plain', 'compliment_cool', 'compliment_funny', 'compliment_writer', 'compliment_photos'], axis=1)
user_df.head()

Unnamed: 0,user_id,name,review_count,yelping_since,friends,fans,average_stars,likes,compliments,elite_years
0,l6BmjZMeQD3rDxWUbiAiow,Rashmi,95,2013-10-08 23:11:33,"c78V-rj8NQcQjOI8KP3UEA, alRMgPcngYSCJ5naFRBz5g...",5,4.03,126,8,3
1,4XChL029mKr5hydo79Ljxg,Jenna,33,2013-02-21 22:29:06,"kEBTgDvFX754S68FllfCaA, aB2DynOxNOJK9st2ZeGTPg...",4,3.63,86,3,1
2,bc8C_eETBWL0olvFSJJd0w,David,16,2013-10-04 00:16:10,"4N-HU_T32hLENLntsNKNBg, pSY2vwWLgWfGVAAiKQzMng...",0,3.71,46,1,1
3,dD0gZpBctWGdWo9WlGuhlA,Angela,17,2014-05-22 15:57:30,"RZ6wS38wnlXyj-OOdTzBxA, l5jxZh1KsgI8rMunm-GN6A...",5,4.85,48,4,1
4,MM4RJAeH6yuaN8oZDSt0RA,Nancy,361,2013-10-23 07:02:50,"mbwrZ-RS76V1HoJ0bF_Geg, g64lOV39xSLRZO0aQQ6DeQ...",39,4.08,2058,293,4


### exploring and re-creating 'business_df'

In [74]:
business_df.categories = business_df.categories.fillna('[]') # businesses without categories get a null category
business_df.categories = business_df['categories'].apply(lambda x: x.split(', '))
business_df.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,1SWheh84yJXfytovILXOAQ,Arizona Biltmore Golf Club,2818 E Camino Acequia Drive,Phoenix,AZ,85016,33.522143,-112.018481,3.0,5,0,{'GoodForKids': 'False'},"[Golf, Active Life]",
1,QXAEGFB4oINsVuTFxEYKFQ,Emerald Chinese Restaurant,30 Eglinton Avenue W,Mississauga,ON,L5R 3E7,43.605499,-79.652289,2.5,128,1,"{'RestaurantsReservations': 'True', 'GoodForMe...","[Specialty Food, Restaurants, Dim Sum, Importe...","{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W..."
2,gnKjwL_1w79qoiV3IC_xQQ,Musashi Japanese Restaurant,"10110 Johnston Rd, Ste 15",Charlotte,NC,28210,35.092564,-80.859132,4.0,170,1,"{'GoodForKids': 'True', 'NoiseLevel': 'u'avera...","[Sushi Bars, Restaurants, Japanese]","{'Monday': '17:30-21:30', 'Wednesday': '17:30-..."
3,xvX2CttrVhyG2z1dFg_0xw,Farmers Insurance - Paul Lorenz,"15655 W Roosevelt St, Ste 237",Goodyear,AZ,85338,33.455613,-112.395596,5.0,3,1,,"[Insurance, Financial Services]","{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ..."
4,HhyxOkGAM07SRYtlQ4wMFQ,Queen City Plumbing,"4209 Stuart Andrew Blvd, Ste F",Charlotte,NC,28217,35.190012,-80.887223,4.0,4,1,"{'BusinessAcceptsBitcoin': 'False', 'ByAppoint...","[Plumbing, Shopping, Local Services, Home Serv...","{'Monday': '7:0-23:0', 'Tuesday': '7:0-23:0', ..."


#### 788841 category listings

In [52]:
# converts categories column into one list of all categories

category_lst = []
for category in business_df['categories']:
    category_lst.extend(category)
    
len(category_lst)

788841

#### there are 1301 categories !!!!

#### this is <b>too</b> many, let's only keep 'Restaurants' category for next steps to simplify EDA/modelling

#### notes:
* briefly looking at the categories and business_df, there are restaurant subtypes (e.g. chinese, seafood, sushi bars) but they also have a 'restaurant' category appended to them
* certain 'food' category types (e.g. bakery) do not have a 'restaurant category type, these would be excluded from our final dataset for simplification

In [57]:
len(Counter(category_lst))

1301

In [58]:
Counter(category_lst).keys()

dict_keys(['Golf', 'Active Life', 'Specialty Food', 'Restaurants', 'Dim Sum', 'Imported Food', 'Food', 'Chinese', 'Ethnic Food', 'Seafood', 'Sushi Bars', 'Japanese', 'Insurance', 'Financial Services', 'Plumbing', 'Shopping', 'Local Services', 'Home Services', 'Kitchen & Bath', 'Home & Garden', 'Water Heater Installation/Repair', 'Shipping Centers', 'Couriers & Delivery Services', 'Printing Services', 'Beauty & Spas', 'Hair Salons', 'Hair Stylists', 'Barbers', "Men's Hair Salons", 'Cosmetics & Beauty Supply', 'Nail Salons', 'Day Spas', 'Massage', 'Professional Services', 'Computers', 'IT Services & Computer Repair', 'Internet Service Providers', 'Web Design', 'Breakfast & Brunch', 'Mexican', 'Tacos', 'Tex-Mex', 'Fast Food', 'Bars', 'Nightlife', 'Pubs', 'Irish Pub', 'Italian', 'Pizza', 'Chicken Wings', 'Bakeries', 'Fitness & Instruction', 'Yoga', 'Event Planning & Services', 'Photographers', 'Trainers', 'Health & Medical', 'Physical Therapy', 'Gyms', 'Arcades', 'Arts & Entertainment', 'A

In [67]:
Counter(category_lst)['Restaurants']

59371

In [75]:
business_df['restaurant?'] = business_df['categories'].apply(lambda x: True if 'Restaurants' in x else False)
business_df.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,restaurant?
0,1SWheh84yJXfytovILXOAQ,Arizona Biltmore Golf Club,2818 E Camino Acequia Drive,Phoenix,AZ,85016,33.522143,-112.018481,3.0,5,0,{'GoodForKids': 'False'},"[Golf, Active Life]",,False
1,QXAEGFB4oINsVuTFxEYKFQ,Emerald Chinese Restaurant,30 Eglinton Avenue W,Mississauga,ON,L5R 3E7,43.605499,-79.652289,2.5,128,1,"{'RestaurantsReservations': 'True', 'GoodForMe...","[Specialty Food, Restaurants, Dim Sum, Importe...","{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",True
2,gnKjwL_1w79qoiV3IC_xQQ,Musashi Japanese Restaurant,"10110 Johnston Rd, Ste 15",Charlotte,NC,28210,35.092564,-80.859132,4.0,170,1,"{'GoodForKids': 'True', 'NoiseLevel': 'u'avera...","[Sushi Bars, Restaurants, Japanese]","{'Monday': '17:30-21:30', 'Wednesday': '17:30-...",True
3,xvX2CttrVhyG2z1dFg_0xw,Farmers Insurance - Paul Lorenz,"15655 W Roosevelt St, Ste 237",Goodyear,AZ,85338,33.455613,-112.395596,5.0,3,1,,"[Insurance, Financial Services]","{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ...",False
4,HhyxOkGAM07SRYtlQ4wMFQ,Queen City Plumbing,"4209 Stuart Andrew Blvd, Ste F",Charlotte,NC,28217,35.190012,-80.887223,4.0,4,1,"{'BusinessAcceptsBitcoin': 'False', 'ByAppoint...","[Plumbing, Shopping, Local Services, Home Serv...","{'Monday': '7:0-23:0', 'Tuesday': '7:0-23:0', ...",False


In [82]:
restaurant_df = business_df[business_df['restaurant?']==True]
restaurant_df = restaurant_df.drop(['restaurant?'], axis=1)
restaurant_df = restaurant_df.reset_index(drop=True)
restaurant_df.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,QXAEGFB4oINsVuTFxEYKFQ,Emerald Chinese Restaurant,30 Eglinton Avenue W,Mississauga,ON,L5R 3E7,43.605499,-79.652289,2.5,128,1,"{'RestaurantsReservations': 'True', 'GoodForMe...","[Specialty Food, Restaurants, Dim Sum, Importe...","{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W..."
1,gnKjwL_1w79qoiV3IC_xQQ,Musashi Japanese Restaurant,"10110 Johnston Rd, Ste 15",Charlotte,NC,28210,35.092564,-80.859132,4.0,170,1,"{'GoodForKids': 'True', 'NoiseLevel': 'u'avera...","[Sushi Bars, Restaurants, Japanese]","{'Monday': '17:30-21:30', 'Wednesday': '17:30-..."
2,1Dfx3zM-rW4n-31KeC8sJg,Taco Bell,2450 E Indian School Rd,Phoenix,AZ,85016,33.495194,-112.028588,3.0,18,1,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...","[Restaurants, Breakfast & Brunch, Mexican, Tac...","{'Monday': '7:0-0:0', 'Tuesday': '7:0-0:0', 'W..."
3,fweCYi8FmbJXHCqLnwuk8w,Marco's Pizza,5981 Andrews Rd,Mentor-on-the-Lake,OH,44060,41.70852,-81.359556,4.0,16,1,"{'RestaurantsPriceRange2': '2', 'BusinessAccep...","[Italian, Restaurants, Pizza, Chicken Wings]","{'Monday': '10:0-0:0', 'Tuesday': '10:0-0:0', ..."
4,PZ-LZzSlhSe9utkQYU8pFg,Carluccio's Tivoli Gardens,"1775 E Tropicana Ave, Ste 29",Las Vegas,NV,89119,36.100016,-115.128529,4.0,40,0,"{'OutdoorSeating': 'False', 'BusinessAcceptsCr...","[Restaurants, Italian]",


### Create Master DF

In [91]:
master_df = restaurant_df.merge(review_df, left_on='business_id', right_on='business_id').merge(user_df, left_on='user_id', right_on='user_id')

master_df.head()

Unnamed: 0,business_id,name_x,address,city,state,postal_code,latitude,longitude,stars_x,review_count_x,is_open,attributes,categories,hours,review_id,user_id,stars_y,useful,funny,cool,text,date,name_y,review_count_y,yelping_since,friends,fans,average_stars,likes,compliments,elite_years
0,QXAEGFB4oINsVuTFxEYKFQ,Emerald Chinese Restaurant,30 Eglinton Avenue W,Mississauga,ON,L5R 3E7,43.605499,-79.652289,2.5,128,1,"{'RestaurantsReservations': 'True', 'GoodForMe...","[Specialty Food, Restaurants, Dim Sum, Importe...","{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",6W0MQHmasK0IsaoDo4bmkw,2K62MJ4CJ19L8Tp5pRfjfQ,3.0,3,2,0,My girlfriend and I went for dinner at Emerald...,2017-01-27 21:54:30,David,57,2014-11-27 22:36:00,"ZJfwkdJMex21dGHfQ3YgTg, 603oDa3dGdJyfkq2RoH2Dw...",1,3.3,100,16,2
1,NX1281ugzs2navHAX5X9cQ,Cha Me Cha,"8333 Kennedy Road, Suite 1078",Markham,ON,L3R 1J5,43.856327,-79.303884,3.0,293,1,"{'OutdoorSeating': 'False', 'RestaurantsPriceR...","[Bubble Tea, Food, Restaurants, Taiwanese, Cof...","{'Monday': '12:0-0:0', 'Tuesday': '12:0-0:0', ...",wbRQarYt_jBHNleOr0e1ng,2K62MJ4CJ19L8Tp5pRfjfQ,4.0,1,0,0,We decided to make a stop at Cha Me Cha for a ...,2018-08-20 21:38:43,David,57,2014-11-27 22:36:00,"ZJfwkdJMex21dGHfQ3YgTg, 603oDa3dGdJyfkq2RoH2Dw...",1,3.3,100,16,2
2,5H4coiGpvG3XdnU1Nde2xQ,Keung's Restaurant,8380 Kennedy Road,Markham,ON,L3R 0W4,43.860726,-79.304713,3.0,60,1,"{'Alcohol': 'u'none'', 'BusinessParking': '{'g...","[Restaurants, Chinese]","{'Monday': '11:0-0:0', 'Tuesday': '11:0-0:0', ...",4xF9XB58CCuOcOv1lZJiiw,2K62MJ4CJ19L8Tp5pRfjfQ,3.0,0,0,0,I went to Keung's restaurant over the weekend ...,2014-12-02 23:45:52,David,57,2014-11-27 22:36:00,"ZJfwkdJMex21dGHfQ3YgTg, 603oDa3dGdJyfkq2RoH2Dw...",1,3.3,100,16,2
3,SvMBtzXfE-3H5pxoR7VD3w,Kenny's Gourmet,"1480 Major Mackenzie Drive E, Unit 7",Richmond Hill,ON,L3R,43.883345,-79.38816,3.0,15,0,"{'RestaurantsReservations': 'True', 'BikeParki...","[Restaurants, Chinese]",,naq8WG94TqTeE4E__Q3x7A,2K62MJ4CJ19L8Tp5pRfjfQ,3.0,0,0,0,I visited Kenny's with some friend to eat lunc...,2015-01-19 22:09:07,David,57,2014-11-27 22:36:00,"ZJfwkdJMex21dGHfQ3YgTg, 603oDa3dGdJyfkq2RoH2Dw...",1,3.3,100,16,2
4,PFAvETr4Vf6UY548TWvhFA,Koi Sakana Ramen House,"360 Highway 7 E, Unit 3",Richmond Hill,ON,L4B 3Y7,43.841523,-79.397015,3.5,148,1,"{'OutdoorSeating': 'False', 'BikeParking': 'Fa...","[Restaurants, Canadian (New), Japanese, Ramen]","{'Tuesday': '17:0-21:0', 'Wednesday': '17:0-21...",CeNglOZFNkOYP8Gx3CIxRw,2K62MJ4CJ19L8Tp5pRfjfQ,5.0,2,1,1,"I went to Koi Sakana last Saturday, my second ...",2016-02-19 02:08:29,David,57,2014-11-27 22:36:00,"ZJfwkdJMex21dGHfQ3YgTg, 603oDa3dGdJyfkq2RoH2Dw...",1,3.3,100,16,2


In [92]:
master_df.to_pickle('../data/master_df.pkl')

OSError: [Errno 22] Invalid argument