In [23]:
# import dependencies
import pandas as pd
import json
import os

# Import SQL Alchemy
from sqlalchemy import create_engine

# Import and establish Base for which classes will be constructed 
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.automap import automap_base
Base = declarative_base()

# Import modules to declare columns and column data types
from sqlalchemy import Column, Integer, String, Float, Boolean, DateTime, Date, func, ForeignKey
from sqlalchemy.orm import relationship, session
from sqlalchemy.exc import SQLAlchemyError

from datetime import datetime

DATASOURCE = 1 # Default data source

# Hide warning messages in notebook
#import warnings
#warnings.filterwarnings('ignore')

In [26]:
# Create classes
class RestaurantCls(Base):
    __tablename__ = 'Restaurant'
    id = Column(Integer, primary_key=True)
    name = Column(String(255), nullable=False)
    business_id = Column(String(255), unique=True, nullable=False)
    # location
    state_id = Column(Integer, ForeignKey('state.id'), nullable=False) 
    city = Column(String(100), nullable=False)
    street = Column(String(250), nullable=False)
    zip_code = Column(String(10), nullable=False)
    # Operational info
    price_range = Column(Integer, nullable=False)
    rating = Column(Float, nullable=False)
    is_closed = Column(Boolean, nullable=False)
    source_id = Column(Integer, nullable=False)
    modified_date = Column(Date, nullable=False, default=func.now(), onupdate=datetime.now())

class CategoryCls(Base):
    __tablename__ = 'category'
    id = Column(Integer, primary_key=True)
    alias = Column(String(255), nullable=False)
    title = Column(String(255), nullable=False)
    source_id = Column(Integer, nullable=False)
    modified_date = Column(Date, default=func.now())
    

In [17]:
# read data from files created by calling Yelp API
#data_file=pd.read_json("data.txt", lines=False)
#data_file
#data = json.load(open('data.txt'))
#df = pd.DataFrame(data["businesses"])
#df

In [18]:
#data = json.load(open('YelpData.txt'))
#df = pd.DataFrame(data["businesses"])
#df

In [19]:

def read_rest_data(file):

    with open(file, "r") as read_file:
        restaurants = json.load(read_file)["businesses"]

    # Hash of categories
    categories = {}

    # List of restaurants
    rest_list = []

    # List of restaurant categories
    rest_cat = []
    
    for restaurant in restaurants:
        id = restaurant["id"]
        name = restaurant["name"]

        # location
        location = restaurant["location"]
        # All address entries into street
        street = location["address1"]
        street2 = location["address2"]
        street3 = location["address3"]
        if(street2 and not street2.isspace()):
            street += " " + street2 
        if(street3 and not street3.isspace()):
            street += " " + street3 

        # operational info
        for c in restaurant["categories"]:
            # Object to list of categories 
            rest_cat.append((id, CategoryCls(alias = c['alias'], title = c['title'] )))
            categories[c['alias']] = c['title']

        if(restaurant.get("price")):
            price = restaurant["price"].count('$') 
        else:
            price = None # some entries miss price

        restaurantObj = RestaurantCls(
            name = restaurant["name"],
            business_id = restaurant["id"],
            city = location["city"],
            street = street,
            zip_code = location["zip_code"],
            price_range = price,
            rating = float(restaurant["rating"]),
            is_closed = bool(restaurant["is_closed"])
        )
        rest_list.append(restaurantObj)
    
    print(len(rest_list))
    return rest_list, rest_cat, categories


### Load data from file into objects for further processing
* Use direct data reader, not Pandas df to get values


### Load data from file into objects for further processing


In [20]:
fn = os.path.join("..", "Data", "YelpData.txt")
restaurants, restaurant_categories, categories = read_rest_data(fn)

50


In [21]:
# Load categories into database
import psycopg2
engine = create_engine('postgresql+psycopg2://postgres:postgres@localhost/ETLproject')

# Reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

Base.classes.keys()


['state',
 'restaurant',
 'category',
 'restaurant_category',
 'state_pce',
 'state_population']

In [22]:
# connect to database with psycopg2 w/o SQLAlchemy
conn_string = "host='localhost' dbname='ETLproject' user='postgres' password='postgres'"
conn = psycopg2.connect(conn_string)

connection = psycopg2.connect(user = "postgres",
                              password = "postgres",
                              host = "localhost",
                              port = "5432",
                              database = "ETLproject")
cursor = connection.cursor()


In [28]:
# Add entires into category table using SQLAlchemy ORM
# The issue with this aproach is no support for upsert
# https://www.pythonsheets.com/notes/python-sqlalchemy.html
session = Session(engine)

# Note that adding to the session does not update the table. It queues up those queries.
# https://stackoverflow.com/questions/9911467/sqlalchemy-update-if-unique-key-exists 
try:
    for alias, title in categories.items():
        print(f"{alias} - {title}")
        categoryObj = CategoryCls(id=None, alias = alias, title = title, source_id = DATASOURCE)
        session.add(categoryObj)
    # commit() flushes whatever remaining changes remain to the database, and commits the transaction.
    session.commit()
except SQLAlchemyError as e:
    print(e)
finally:
    session.close()        

icecream - Ice Cream & Frozen Yogurt
bakeries - Bakeries
french - French
soulfood - Soul Food
breakfast_brunch - Breakfast & Brunch
newamerican - American (New)
wine_bars - Wine Bars
cafes - Cafes
desserts - Desserts
seafood - Seafood
seafoodmarkets - Seafood Markets
raw_food - Live/Raw Food
customcakes - Custom Cakes
tradamerican - American (Traditional)
steak - Steakhouses
burmese - Burmese
asianfusion - Asian Fusion
greek - Greek
mediterranean - Mediterranean
chinese - Chinese
chicken_wings - Chicken Wings
noodles - Noodles
modern_european - Modern European
cakeshop - Patisserie/Cake Shop
macarons - Macarons
mexican - Mexican
bars - Bars
vietnamese - Vietnamese
sandwiches - Sandwiches
pizza - Pizza
italian - Italian
cocktailbars - Cocktail Bars
ramen - Ramen
british - British
fishnchips - Fish & Chips
donuts - Donuts
landmarks - Landmarks & Historical Buildings
parks - Parks
tapasmallplates - Tapas/Small Plates
latin - Latin American
brazilian - Brazilian
playgrounds - Playgrounds
b

In [None]:
# Load restaurants into database
for r in restaurants:
    print(r.Name)


In [None]:
# load resturant categories into database
for r in restaurant_categories:
    print(f"{r[0]}; {r[1].alias}; {r[1].title}")