In [1]:
import os
import sys
sys.path.append(os.pardir)

### Imports

In [2]:
import json
import pathlib
import pandas as pd
from sqlalchemy.orm import sessionmaker
from credible import connectors
from credible.etl import Etl
from credible.objects import Base, Photo, Tip, Checkin, Business, User, Review

### Parameters

In [3]:
engine = connectors.connect_to_sqlite()

In [4]:
data_folderpath = os.path.join(os.pardir, 'data')
generator = pathlib.Path(data_folderpath).glob('*_x.json')
sorted_list = sorted(generator, key=os.path.getsize, reverse=True)
file_review, file_user, file_checkin, file_tip, file_business, file_photo = sorted_list

### Session

In [5]:
Session = sessionmaker(bind=engine)
etl = Etl(Base, Session, engine)

In [6]:
etl.create_all()

### Import data to Photos

In [None]:
with open(file_photo, 'r') as json_data:
    data = json.load(json_data)
etl.bulk_insert(Photo, data)

In [None]:
pd.read_sql_table('photos', engine).sample()

### Import data to Tips

In [9]:
with open(file_tip, 'r') as json_data:
    data = json.load(json_data)

# requires data transformation
tips_list = []
for d in data:
    t = Tip(d)
    tips_list.append(t)

etl.bulk_insert(Tip, tips_list)

In [10]:
pd.read_sql_table('tips', engine).sample()

Unnamed: 0,_id,user_id,business_id,text,date,compliment_count
615279,615280,-TD_eG2oKsZjv3-LXnTXMw,vsFFbN71ehRCp46KeR5RdQ,Whole hog skillet. Delicious.,2015-03-28 16:55:57,0


In [11]:
data[0]

{'user_id': 'UPw5DWs_b-e2JRBS-t37Ag',
 'business_id': 'VaKXUpmWTTWDKbpJ3aQdMw',
 'text': 'Great for watching games, ufc, and whatever else tickles yer fancy',
 'date': '2014-03-27 03:51:24',
 'compliment_count': 0}

### Import data to Checkins

In [None]:
with open(file_checkin, 'r') as json_data:
    data = json.load(json_data)
etl.bulk_insert(Checkin, data)

In [None]:
pd.read_sql_table('checkins', engine).sample()

### Import data to Businesses

In [None]:
with open(file_business, 'r') as json_data:
    data = json.load(json_data)
    
# requires data transformation
business_list = []
for d in data:
    b = Business(d)
    business_list.append(b)

etl.bulk_insert(Business, business_list)

In [None]:
pd.read_sql_table('businesses', engine).sample()

### Import data to Users

In [None]:
with open(file_user, 'r') as json_data:
    data = json.load(json_data)


# requires data transformation
users_list = []
for d in data[:2]:
    t = User(d)
    users_list.append(t)

etl.bulk_insert(User, users_list)

In [None]:
pd.read_sql_table('users', engine).sample()

### Import data to Reviews

In [7]:
with open(file_review, 'r') as json_data:
    data = json.load(json_data)
etl.bulk_insert(Review, data)

In [8]:
pd.read_sql_table('reviews', engine).sample()

Unnamed: 0,_id,review_id,business_id,user_id,stars,date,text,useful,funny,cool
4021657,4021658,iGgfv_-Nr_ACf4OoqxxxgA,uKrTmGn1HhwxcvHxsKyTIQ,6954Mf4fGGka6SHZa9b2GQ,2,2008-08-22 17:21:24,The Deal - Specializing in curly hair and hair...,16,2,1
