In [30]:
# Dependencies

import pandas as pd
from sqlalchemy import create_engine
from flask import Flask, jsonify
import json
import requests

### Store CSV into DataFrame

In [51]:
# Imported NYC Restaurant Health Inspection CSV into Pandas DataFrame

csv_file = "../Resources/DOHMH_New_York_City_Restaurant_Inspection_Results.csv"
inspection_data_df = pd.read_csv(csv_file)
inspection_data_df.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,...,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA
0,50059672,GOOD FRIENDS 1,Brooklyn,1376,NOSTRAND AVE,11226.0,7182872345,Chinese,09/06/2018,Establishment Closed by DOHMH. Violations wer...,...,10/11/2019,Cycle Inspection / Initial Inspection,40.653158,-73.949837,317.0,40.0,82000.0,3116688.0,3050850000.0,BK60
1,50034192,K'OOK,Manhattan,324,E 6TH ST,10003.0,2122540300,Korean,08/14/2017,Violations were cited in the following area(s).,...,10/11/2019,Cycle Inspection / Initial Inspection,40.727066,-73.98778,103.0,2.0,3800.0,1006234.0,1004470000.0,MN22
2,50033885,A&H DELI,Manhattan,431,7TH AVE,10001.0,2125636200,American,06/06/2016,Violations were cited in the following area(s).,...,10/11/2019,Cycle Inspection / Re-inspection,40.75071,-73.990811,105.0,3.0,10100.0,1015218.0,1008090000.0,MN17
3,41519373,BUNGALO,Queens,3203,BROADWAY,11106.0,7182047010,Armenian,01/21/2017,No violations were recorded at the time of thi...,...,10/11/2019,Inter-Agency Task Force / Initial Inspection,40.761538,-73.92445,401.0,22.0,6100.0,4008406.0,4006140000.0,QN70
4,50016112,ANTOJITOS ECUATORIANOS,Brooklyn,3398,FULTON ST,11208.0,7182770970,"Latin (Cuban, Dominican, Puerto Rican, South &...",07/11/2018,Violations were cited in the following area(s).,...,10/11/2019,Cycle Inspection / Re-inspection,40.684208,-73.870173,305.0,37.0,118400.0,3092908.0,3041490000.0,BK83


### Create new data with select columns

In [45]:
# Select relevant columns: DBA (name), Building, Street, Zipcode, Boro, Grade

new_inspection_data_df = inspection_data_df[['DBA', 'BUILDING', 'STREET', 'ZIPCODE', 'BORO', 'GRADE']].copy()
new_inspection_data_df.head()

Unnamed: 0,DBA,BUILDING,STREET,ZIPCODE,BORO,GRADE
0,GOOD FRIENDS 1,1376,NOSTRAND AVE,11226.0,Brooklyn,
1,K'OOK,324,E 6TH ST,10003.0,Manhattan,
2,A&H DELI,431,7TH AVE,10001.0,Manhattan,A
3,BUNGALO,3203,BROADWAY,11106.0,Queens,
4,ANTOJITOS ECUATORIANOS,3398,FULTON ST,11208.0,Brooklyn,A


### Store JSON data into a DataFrame

In [58]:
json_file = "../Resources/yelp_academic_dataset_business.json"
yelp_data_df = pd.read_json(json_file, lines=True)
yelp_data_df.head()

address         192609
attributes      163773
business_id     192609
categories      192127
city            192609
hours           147779
is_open         192609
latitude        192609
longitude       192609
name            192609
postal_code     192609
review_count    192609
stars           192609
state           192609
dtype: int64

### Drop Irrelevant Data (non-restaurants, businesses outside of NYC)

In [69]:
yelp_data_df_ny = yelp_data_df[yelp_data_df.state == 'NJ']
yelp_data_df_ny.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
164311,,,U1ln0q5CwPydtPyoIeheUg,"Pets, Pet Services, Dog Walkers, Pet Sitting",Union,"{'Monday': '0:0-0:0', 'Tuesday': '0:0-0:0', 'W...",1,36.016102,-115.058461,Pet Sitting by Ines,7083,8,5.0,NJ


### Clean DataFrame

In [49]:
yelp_data_df_ny = yelp_data_df_ny[["name", "address", "city", "state", "categories", "stars"]].copy()
yelp_data_df_ny.head()

Unnamed: 0,name,address,city,state,categories,stars
595,Nathan's Famous,1038 State Rt 11,Champlain,NY,"Convenience Stores, Hot Dogs, Gas Stations, Bu...",4.0
9916,Lakeside Coffee,109 Lake St,Rouses Point,NY,"Coffee & Tea, Sandwiches, Salad, Food, Restaur...",4.5
22869,China Buffet,876 State Rt 11,Champlain,NY,"Buffets, Restaurants, Szechuan, Chinese, Canto...",2.5
27152,Valero,1038 Rt 11,Champlain,NY,"Hot Dogs, Food, Convenience Stores, Gas Statio...",2.5
47513,Sandy's Deli & Catering,133 Lake St,Rouses Point,NY,"Delis, Restaurants",3.0


### Connect to local database

In [27]:
rds_connection_string = "postgres:rutgers@localhost:5432/customer_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [28]:
engine.table_names()

[]

### Use pandas to load csv converted DataFrame into database

In [29]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [30]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [31]:
pd.read_sql_query('select * from customer_name', con=engine).head()

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Confirm data has been added by querying the customer_location table

In [32]:
pd.read_sql_query('select * from customer_location', con=engine).head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York
