In [78]:
# Dependencies

import pandas as pd
from sqlalchemy import create_engine
from flask import Flask, jsonify
import json
import requests
import os
import csv
import numpy as np
import matplotlib.pyplot as plt
from pprint import pprint


### Store CSV into DataFrame

In [79]:
# Imported NYC Restaurant Health Inspection CSV into Pandas DataFrame

csv_file = "../Resources/DOHMH_New_York_City_Restaurant_Inspection_Results.csv"
inspection_data_df = pd.read_csv(csv_file)
inspection_data_df.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,...,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA
0,50059672,GOOD FRIENDS 1,Brooklyn,1376,NOSTRAND AVE,11226.0,7182872345,Chinese,09/06/2018,Establishment Closed by DOHMH. Violations wer...,...,10/11/2019,Cycle Inspection / Initial Inspection,40.653158,-73.949837,317.0,40.0,82000.0,3116688.0,3050850000.0,BK60
1,50034192,K'OOK,Manhattan,324,E 6TH ST,10003.0,2122540300,Korean,08/14/2017,Violations were cited in the following area(s).,...,10/11/2019,Cycle Inspection / Initial Inspection,40.727066,-73.98778,103.0,2.0,3800.0,1006234.0,1004470000.0,MN22
2,50033885,A&H DELI,Manhattan,431,7TH AVE,10001.0,2125636200,American,06/06/2016,Violations were cited in the following area(s).,...,10/11/2019,Cycle Inspection / Re-inspection,40.75071,-73.990811,105.0,3.0,10100.0,1015218.0,1008090000.0,MN17
3,41519373,BUNGALO,Queens,3203,BROADWAY,11106.0,7182047010,Armenian,01/21/2017,No violations were recorded at the time of thi...,...,10/11/2019,Inter-Agency Task Force / Initial Inspection,40.761538,-73.92445,401.0,22.0,6100.0,4008406.0,4006140000.0,QN70
4,50016112,ANTOJITOS ECUATORIANOS,Brooklyn,3398,FULTON ST,11208.0,7182770970,"Latin (Cuban, Dominican, Puerto Rican, South &...",07/11/2018,Violations were cited in the following area(s).,...,10/11/2019,Cycle Inspection / Re-inspection,40.684208,-73.870173,305.0,37.0,118400.0,3092908.0,3041490000.0,BK83


### Create new data with select columns

In [113]:
# Select relevant columns: DBA (name), Building, Street, Zipcode, Boro, Grade

new_inspection_data_df = inspection_data_df[['DBA', 'BUILDING', 'STREET', 'ZIPCODE', 'BORO', 'GRADE']].copy()
new_inspection_data_df = new_inspection_data_df.dropna()
new_inspection_data_df.head()

Unnamed: 0,DBA,BUILDING,STREET,ZIPCODE,BORO,GRADE
2,A&H DELI,431,7TH AVE,10001.0,Manhattan,A
4,ANTOJITOS ECUATORIANOS,3398,FULTON ST,11208.0,Brooklyn,A
8,I LAND FISH & GRILL,7911,FLATLANDS AVE,11236.0,Brooklyn,B
9,CAFE LAFAYETTE,80,LAFAYETTE STREET,10013.0,Manhattan,A
12,FLY BAR,4224,COLLEGE POINT BLVD,11355.0,Queens,A


### Yelp API Response

In [117]:
#Setting up parameters for the API Key
api_key='KMXu7o4jj9H_5fBmmoxcQUXUcjaIiDMpnabg34SZhyJUQPt-H6y8sfBIq8jI65xOovUH7cKhDpTUnvK2UIFOf1r5864boyx0PCcIwR4QQ1OeR8IWr5RO7UxW3HJoXXYx'
headers = {'Authorization': 'Bearer %s' % api_key}

#Defining the Business Search end point url
url='https://api.yelp.com/v3/businesses/search'

In [123]:
# Pulling data for 1,000 restaurants in NYC
categories = ['Italian', 'American', 'Mexican', 'Chinese', 'Cuban']
restaurants = []

for category in categories:
    for offset in range(0,1000,50):
        params = {'term':'restaurants','categories': category,'location':'New York','limit':50, 'offset':offset}
        response = requests.get(url, params=params, headers=headers).json()
        restaurants.append(dict(response))

In [124]:
#pprint(restaurants[1])

In [125]:
#Defining empty dictionary to save only required elements
final = {}

#Defining Empty list to append all the data 
data = []

for restaurant in restaurants:
    try:
        for item in restaurant['businesses']:
            final['Restaurant Name'] = item['name']
            final['City'] = item['location']['city']
            final['Rating'] = item['rating']
            data.append(dict(final))
    except:   
        restaurant['businesses'] = float('nan')

In [133]:
# Creating a DataFrame to save restaurant name, neighborhood, rating
restaurants_df = pd.DataFrame(data)
restaurants_df.head()

Unnamed: 0,City,Rating,Restaurant Name
0,New York,4.5,Upstate
1,New York,4.5,Amélie
2,New York,4.0,Clinton Street Baking Company
3,New York,4.5,LoveMama
4,New York,4.5,Barn Joo 35


In [145]:
#Dropping Nan values
restaurants_df.dropna(axis='index',how='all',inplace=True)
restaurants_df.reset_index(drop=True)
restaurants_df.count()

City               5000
Rating             5000
Restaurant Name    5000
dtype: int64

In [None]:
# Convert restaurant names to uppercase to match DBA column in health inpection dataframe. This will be our primary key.
restaurants_df['Restaurant Name'] = restaurants_df['Restaurant Name'].str.upper() 
restaurants_df.head()

### Connect to local database

In [27]:
rds_connection_string = "postgres:rutgers@localhost:5432/customer_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [28]:
engine.table_names()

[]

### Use pandas to load csv converted DataFrame into database

In [29]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [30]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [31]:
pd.read_sql_query('select * from customer_name', con=engine).head()

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Confirm data has been added by querying the customer_location table

In [32]:
pd.read_sql_query('select * from customer_location', con=engine).head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York
