In [1]:
import os
import sys
sys.path.append(os.pardir)

### Imports

In [2]:
import json
import pathlib
import pandas as pd
from sqlalchemy.orm import sessionmaker
from credible import connectors
from credible.etl import Etl
from credible.objects import Base, Photo, Tip, Checkin, Business, User, Review

### Parameters

In [3]:
engine = connectors.connect_to_sqlite()

In [4]:
data_folderpath = os.path.join(os.pardir, 'data')
generator = pathlib.Path(data_folderpath).glob('*_x.json')
sorted_list = sorted(generator, key=os.path.getsize, reverse=True)
file_review, file_user, file_checkin, file_tip, file_business, file_photo = sorted_list

### Session

In [5]:
Session = sessionmaker(bind=engine)
etl = Etl(Base, Session, engine)

In [7]:
etl.create_all(drop_all=True)

### Import data to Photos

In [8]:
with open(file_photo, 'r') as json_data:
    data = json.load(json_data)
etl.bulk_insert(Photo, data)

In [9]:
pd.read_sql_table('photos', engine).sample()

Unnamed: 0,_id,photo_id,business_id,caption,label
69208,69209,pDXLs2GxrmuyVxwuVdqGaQ,3oTVApC-eUzpGjrOVxIr5g,Their appetizer wings are pretty good.,food


### Import data to Tips

In [10]:
with open(file_tip, 'r') as json_data:
    data = json.load(json_data)

# requires data transformation
tips_list = []
for d in data:
    t = Tip(d)
    tips_list.append(t)

etl.bulk_insert(Tip, tips_list)

In [11]:
pd.read_sql_table('tips', engine).sample()

Unnamed: 0,_id,user_id,business_id,text,date,compliment_count
677242,677243,8wfxxGCd_xmaG2wt-d6yVw,_81fRJ9m8Fn1s8ixPun46A,The killer chicken sandwich changes monthly; m...,2015-12-29 03:42:55,0


### Import data to Checkins

In [12]:
with open(file_checkin, 'r') as json_data:
    data = json.load(json_data)
etl.bulk_insert(Checkin, data)

In [13]:
pd.read_sql_table('checkins', engine).sample()

Unnamed: 0,_id,business_id,date
113971,113972,gxt2xfsr-Be1EUYZmicdFg,"2015-09-04 22:43:32, 2015-11-27 18:48:08, 2016..."


### Import data to Businesses

In [14]:
with open(file_business, 'r') as json_data:
    data = json.load(json_data)
    
# requires data transformation
business_list = []
for d in data:
    b = Business(d)
    business_list.append(b)

etl.bulk_insert(Business, business_list)

In [15]:
pd.read_sql_table('businesses', engine).sample()

Unnamed: 0,_id,business_id,name,address,city,state,postal_code,latitude,longitude,starts,review_count,is_open,attributes,categories,hours
55993,55994,bE2pOsHjCM8048ILoRVxNw,Pasqual's Cantina,100 Cross Country Rd,Verona,WI,53593,43.001934,-89.528862,,55,1,"{""WiFi"": ""u'free'"", ""Music"": ""{'dj': False, 'b...","Nightlife, Bars, Mexican, Restaurants, Tex-Mex","{""Monday"": ""0:0-0:0"", ""Tuesday"": ""11:0-21:0"", ..."


### Import data to Users

In [22]:
with open(file_user, 'r') as json_data:
    data = json.load(json_data)


# requires data transformation
users_list = []
for d in data:
    t = User(d)
    users_list.append(t)

etl.bulk_insert(User, users_list)

In [24]:
pd.read_sql_table('users', engine).sample()

Unnamed: 0,_id,user_id,name,review_count,yelping_since,friends,useful,funny,cool,fans,...,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos
1476033,1476034,WNdNT1RASHG79REYDd4KLg,Sarah,6,2011-01-03 23:08:29,"u1G18qGUbaDXdAX6QaV_eA, A_sqjh_-fTTiDlyFWtQ08w...",8,1,2,2,...,0,0,0,0,0,0,0,0,0,0


### Import data to Reviews

In [18]:
with open(file_review, 'r') as json_data:
    data = json.load(json_data)
etl.bulk_insert(Review, data)

In [19]:
pd.read_sql_table('reviews', engine).sample()

Unnamed: 0,_id,review_id,business_id,user_id,stars,date,text,useful,funny,cool
3619819,3619820,VH4lbeq6Cc_SDgGRJ93KRw,yX0pOWG_Be9TjGnm0aihVA,AOtwR1sC6psijjnbkwbR9A,5,2016-04-03 18:23:32,Nice casual atmosphere! Great vodka club soda...,0,0,0
