In [1]:
# Import Dependencies

import requests
import json
import pandas as pd
import numpy as np
import datetime
from config import api_key
from config import google_key
import time

from config import password
from config import username
import psycopg2
import sqlalchemy
import urllib
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import PrimaryKeyConstraint
from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker
from sqlalchemy import Column, Integer, String, Float, DateTime
from sqlalchemy.schema import Sequence

print(f'Dependencies imported...',flush=True)
print('---------------',flush=True)

Dependencies imported...
---------------


In [3]:
# Download 1000 restaurants from Yelp API with Minneapolis as the search parameter.

data = []

headers = {'Authorization': 'Bearer %s' % api_key}

url='https://api.yelp.com/v3/businesses/search'

print('Downloading Yelp Data...',flush=True)

for offset in range(0, 1000, 50):
    
    params = {
        'limit':50, 
        'location':'Minneapolis, MN',

        'categories':'restaurants',
        'offset':offset
        }  
    
    response=requests.get(url, params=params, headers=headers)
    if response.status_code == 200:
        data += response.json()['businesses']
    elif response.status_code == 400:
        print('400 Bad Request')
        break
        
print(f'Yelp data downloaded...  There are {len(data)} records...',flush=True)
print('---------------',flush=True)

Downloading Yelp Data...
Yelp data downloaded...  There are 1000 records...
---------------


In [4]:
i=0
yelp_list=[]
for places in data:
    yelp_id=data[i]['id']
    name=data[i]['name']
    image=data[i]['image_url']
    categories = []
    for category in data[i]['categories']:
        cat = category['title']
        categories.append(cat)
    url=data[i]['url']
    transactions=data[i]['transactions']
    city=data[i]['location']['city']
#     price=data[i]['price']
    phone=data[i]['display_phone']
    address= data[i]['location']['display_address']
    rating=data[i]['rating']
    reviews=data[i]['review_count']
    latitude=data[i]['coordinates']['latitude']
    longitude=data[i]['coordinates']['longitude']
    if data[i]['is_closed']==False and city=="Minneapolis":
        business_dict={"yelpid":yelp_id,"name":name,"image":image,"url":url,"latitude":latitude,"longitude":longitude,"phone":phone,"categories":categories,"transactions":transactions,"address":' '.join(map(str, address)),"rating":rating,"reviews":reviews}
        yelp_list.append(business_dict)
    i+=1

print('yelp_list with needed data has been built.',flush=True)
print('---------------',flush=True)

yelp_list with needed data has been built.
---------------


In [5]:
yelp_df=pd.DataFrame(yelp_list)
yelp_df=yelp_df[['yelpid','name','image','url','latitude','longitude','address','phone','categories','transactions','rating','reviews']]
yelp_df = yelp_df.drop_duplicates(subset=['name','address'])

print('Yelp DataFrame now stored in memory as "yelp_df"',flush=True)
print(f'Removed duplicates and restaurants outside of Minneapolis. Leaving {len(yelp_df)} restaurants.',flush=True)
print('---------------',flush=True)

Yelp DataFrame now stored in memory as "yelp_df"
Removed duplicates and restaurants outside of Minneapolis. Leaving 814 restaurants.
---------------


In [51]:

master_list=yelp_list

master_list

[{'yelpid': 'G9KdODINirmdSuYaBfBppA',
  'name': 'Butcher & The Boar',
  'image': 'https://s3-media1.fl.yelpcdn.com/bphoto/raQA0N0eKpTtW1kNPe6OcQ/o.jpg',
  'url': 'https://www.yelp.com/biz/butcher-and-the-boar-minneapolis?adjust_creative=P1RJNSLd83EWzgeA0sG62A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=P1RJNSLd83EWzgeA0sG62A',
  'latitude': 44.974777,
  'longitude': -93.279792,
  'phone': '(612) 238-8888',
  'categories': ['American (New)', 'Beer Gardens', 'Venues & Event Spaces'],
  'transactions': ['delivery'],
  'address': '1121 Hennepin Ave Minneapolis, MN 55403',
  'rating': 4.5,
  'reviews': 1623},
 {'yelpid': 'zv_XJAQr9D3PNR5eHYhC5w',
  'name': 'Bar La Grassa',
  'image': 'https://s3-media2.fl.yelpcdn.com/bphoto/SKbCky2Pi-5KRQ6Fh9_anQ/o.jpg',
  'url': 'https://www.yelp.com/biz/bar-la-grassa-minneapolis?adjust_creative=P1RJNSLd83EWzgeA0sG62A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=P1RJNSLd83EWzgeA0sG62A',
  'latitude': 44.98

In [8]:
# print('Matching Yelp data list to Google API in order to create Google List...   This will take some time, as we match each record...',flush=True)

# url = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?'
# google_data=[]

# for index,row in yelp_df.iterrows():
    
#     params = {
#         'key':google_key,
#         'input':row['name'],
#         'inputtype':'textquery',
#         'locationbias': 'point:' + str(row['latitude']) + ", " + str(row['longitude']),
#         'radius': 10,
#         'fields':'place_id,name,formatted_address,geometry,rating,user_ratings_total,price_level,photos,icon'
#         }
    
#     response = requests.get(url, params=params)

#     if len(response.json()['candidates'])>0:
#         google_data.append(response.json()['candidates'][0])
#     else:
#         google_data.append("")
        
#     print("Restaurants Remaining: {:3}".format(len(yelp_df)-index), end="\r",flush=True)

# print(f'Google match has been completed...  There are {len(google_data)} records',flush=True)


Matching Yelp data list to Google API...   This will take some time, as we match each record...
Google match has been completed...  There are 814 records


In [74]:
print('Matching Yelp data list to Google API in order to append Google columns to Master...   This will take some time, as we match each record...',flush=True)

url = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?'

i=0

for item in master_list:
    
    params = {
        'key':google_key,
        'input':item['name'],
        'inputtype':'textquery',
        'locationbias': 'point:' + str(item['latitude']) + ", " + str(item['longitude']),
        'radius': 10,
        'fields':'place_id,name,rating,user_ratings_total,price_level'
        }
    
    response = requests.get(url, params=params)
    json=response.json()
    
    if len(response.json()['candidates'])>0:
        if 'name' in json['candidates'][0]: 
            item.update(google_name =json['candidates'][0]['name'])
        if 'place_id' in json['candidates'][0]:
            item.update(google_id=json['candidates'][0]['place_id'])
        if 'rating' in json['candidates'][0]:
            item.update(google_rating=json['candidates'][0]['rating'])
        if 'user_ratings_total' in json['candidates'][0]:
            item.update(google_reviews = json['candidates'][0]['user_ratings_total'])
        if 'price_level' in json['candidates'][0]:
            item.update(google_price =json['candidates'][0]['price_level'])
    
    print("Restaurants Remaining: {:3}".format(len(master_list)-i), end="\r",flush=True)
    
    i+=1

print(f'Google match has been completed...',flush=True)

Matching Yelp data list to Google API in order to append Google columns to Master...   This will take some time, as we match each record...
Google match has been completed...


In [85]:
master_list

[{'yelpid': 'G9KdODINirmdSuYaBfBppA',
  'name': 'Butcher & The Boar',
  'image': 'https://s3-media1.fl.yelpcdn.com/bphoto/raQA0N0eKpTtW1kNPe6OcQ/o.jpg',
  'url': 'https://www.yelp.com/biz/butcher-and-the-boar-minneapolis?adjust_creative=P1RJNSLd83EWzgeA0sG62A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=P1RJNSLd83EWzgeA0sG62A',
  'latitude': 44.974777,
  'longitude': -93.279792,
  'phone': '(612) 238-8888',
  'categories': ['American (New)', 'Beer Gardens', 'Venues & Event Spaces'],
  'transactions': ['delivery'],
  'address': '1121 Hennepin Ave Minneapolis, MN 55403',
  'rating': 4.5,
  'reviews': 1623,
  'google_name': 'Butcher & The Boar',
  'google_id': 'ChIJN2-3wpQys1IRK9G73sv97zg',
  'google_rating': 4.6,
  'google_reviews': 2147,
  'google_price': 3},
 {'yelpid': 'zv_XJAQr9D3PNR5eHYhC5w',
  'name': 'Bar La Grassa',
  'image': 'https://s3-media2.fl.yelpcdn.com/bphoto/SKbCky2Pi-5KRQ6Fh9_anQ/o.jpg',
  'url': 'https://www.yelp.com/biz/bar-la-grassa-minneapol

In [86]:
master_df=pd.DataFrame(master_list)
master_df.head()

Unnamed: 0,yelpid,name,image,url,latitude,longitude,phone,categories,transactions,address,rating,reviews,google_name,google_id,google_rating,google_reviews,google_price
0,G9KdODINirmdSuYaBfBppA,Butcher & The Boar,https://s3-media1.fl.yelpcdn.com/bphoto/raQA0N...,https://www.yelp.com/biz/butcher-and-the-boar-...,44.974777,-93.279792,(612) 238-8888,"[American (New), Beer Gardens, Venues & Event ...",[delivery],"1121 Hennepin Ave Minneapolis, MN 55403",4.5,1623,Butcher & The Boar,ChIJN2-3wpQys1IRK9G73sv97zg,4.6,2147.0,3.0
1,zv_XJAQr9D3PNR5eHYhC5w,Bar La Grassa,https://s3-media2.fl.yelpcdn.com/bphoto/SKbCky...,https://www.yelp.com/biz/bar-la-grassa-minneap...,44.98954,-93.278594,(612) 333-3837,"[Italian, Desserts, Wine Bars]",[],"800 Washington Ave N Minneapolis, MN 55401",4.5,1256,Bar La Grassa,ChIJBcDbP4oys1IRg97QGrkjtH8,4.7,1457.0,3.0
2,6vaAze9Fxuco249L3BhEdQ,112 Eatery,https://s3-media2.fl.yelpcdn.com/bphoto/6oZdXE...,https://www.yelp.com/biz/112-eatery-minneapoli...,44.98267,-93.2716,(612) 343-7696,"[American (New), Wine Bars, Venues & Event Spa...",[delivery],"112 N 3rd St Minneapolis, MN 55401",4.5,1167,112 Eatery,ChIJ6ZJ3YYUys1IRihzyYe0WiGs,4.7,991.0,3.0
3,5s2sT8ux7og5XfK-180r2g,George & The Dragon,https://s3-media3.fl.yelpcdn.com/bphoto/u7Pn5-...,https://www.yelp.com/biz/george-and-the-dragon...,44.912151,-93.290436,(612) 208-1047,"[American (New), Pubs, British]","[delivery, pickup]","813 W 50th St Minneapolis, MN 55419",4.5,892,George and the Dragon,ChIJD0SgSFIm9ocRdLlVXKBGwMU,4.7,1252.0,2.0
4,5olLs-K-_k_rWrrNLjvy0w,Spoon and Stable,https://s3-media2.fl.yelpcdn.com/bphoto/QaDSlg...,https://www.yelp.com/biz/spoon-and-stable-minn...,44.985489,-93.269536,(612) 224-9850,"[American (New), Bars]",[delivery],"211 N 1st St Minneapolis, MN 55401",4.5,899,Spoon and Stable,ChIJpQmgMoQys1IRID1e5YB8eMo,4.7,1560.0,4.0


In [90]:
# Create a new Aggregate Score based on Yelp Rating and Google Rating and add Column to the DataFrame


# Function that computes the weighted rating of each movie
def aggregate_rating (x):
    yelp_reviews = x['reviews']
    yelp_rating = x['rating']
    google_reviews = x['google_reviews']
    google_rating = x['google_rating']
    # Calculation
    return ((yelp_rating*yelp_reviews)+(google_rating*google_reviews))/(yelp_reviews+google_reviews)

def total_reviews (x):
    yelp_reviews = x['reviews']
    google_reviews = x['google_reviews']
    return yelp_reviews+google_reviews

# Define a new feature 'score' and calculate its value with `weighted_rating()`
# df_restaurants['score'] = df_restaurants.apply(weighted_rating, axis=1)

master_df['agg_rating'] = master_df.apply(aggregate_rating, axis=1)
master_df['total_reviews'] = master_df.apply(total_reviews, axis=1)

# top_restaurants = df_restaurants.sort_values('score', ascending=False)

#Print the top 15 res
master_df.head()

Unnamed: 0,yelpid,name,image,url,latitude,longitude,phone,categories,transactions,address,rating,reviews,google_name,google_id,google_rating,google_reviews,google_price,agg_rating,total_reviews
0,G9KdODINirmdSuYaBfBppA,Butcher & The Boar,https://s3-media1.fl.yelpcdn.com/bphoto/raQA0N...,https://www.yelp.com/biz/butcher-and-the-boar-...,44.974777,-93.279792,(612) 238-8888,"[American (New), Beer Gardens, Venues & Event ...",[delivery],"1121 Hennepin Ave Minneapolis, MN 55403",4.5,1623,Butcher & The Boar,ChIJN2-3wpQys1IRK9G73sv97zg,4.6,2147.0,3.0,4.55695,3770.0
1,zv_XJAQr9D3PNR5eHYhC5w,Bar La Grassa,https://s3-media2.fl.yelpcdn.com/bphoto/SKbCky...,https://www.yelp.com/biz/bar-la-grassa-minneap...,44.98954,-93.278594,(612) 333-3837,"[Italian, Desserts, Wine Bars]",[],"800 Washington Ave N Minneapolis, MN 55401",4.5,1256,Bar La Grassa,ChIJBcDbP4oys1IRg97QGrkjtH8,4.7,1457.0,3.0,4.607409,2713.0
2,6vaAze9Fxuco249L3BhEdQ,112 Eatery,https://s3-media2.fl.yelpcdn.com/bphoto/6oZdXE...,https://www.yelp.com/biz/112-eatery-minneapoli...,44.98267,-93.2716,(612) 343-7696,"[American (New), Wine Bars, Venues & Event Spa...",[delivery],"112 N 3rd St Minneapolis, MN 55401",4.5,1167,112 Eatery,ChIJ6ZJ3YYUys1IRihzyYe0WiGs,4.7,991.0,3.0,4.591844,2158.0
3,5s2sT8ux7og5XfK-180r2g,George & The Dragon,https://s3-media3.fl.yelpcdn.com/bphoto/u7Pn5-...,https://www.yelp.com/biz/george-and-the-dragon...,44.912151,-93.290436,(612) 208-1047,"[American (New), Pubs, British]","[delivery, pickup]","813 W 50th St Minneapolis, MN 55419",4.5,892,George and the Dragon,ChIJD0SgSFIm9ocRdLlVXKBGwMU,4.7,1252.0,2.0,4.616791,2144.0
4,5olLs-K-_k_rWrrNLjvy0w,Spoon and Stable,https://s3-media2.fl.yelpcdn.com/bphoto/QaDSlg...,https://www.yelp.com/biz/spoon-and-stable-minn...,44.985489,-93.269536,(612) 224-9850,"[American (New), Bars]",[delivery],"211 N 1st St Minneapolis, MN 55401",4.5,899,Spoon and Stable,ChIJpQmgMoQys1IRID1e5YB8eMo,4.7,1560.0,4.0,4.626881,2459.0


In [31]:
i=0
google_list=[]

for places in google_data:
    if places != "":
        if "place_id" in places:
            google_id = places['place_id']
        if "icon"in places:
            icon=places['icon']
        photos=[]
        if "photos" in places:
            for photo in places['photos']:
                item = photo['html_attributions']
                photos.append(item)
        if "price_level" in places:
            price_level=places['price_level']
        if "name" in places:
            name = places['name']
        if "formatted_address" in places:
            address = places['formatted_address']
        if "rating" in places:
            rating  = places['rating']
        if "user_ratings_total" in places:
            reviews = places['user_ratings_total']
        if "geometry" in places:
            latitude = places['geometry']['location']['lat']
            longitude = places['geometry']['location']['lng']
        business_dict = {"googleplacesid":google_id,"icon":icon,"photos":photos,"name":name,"latitude":latitude,"longitude":longitude,"address":address,"rating":rating,"reviews":reviews,"price":price_level}
    
    else:
        business_dict = {"googleplacesid":"","icon":"","photos":"","name":"","latitude":"","longitude":"","address":"", "rating":"","reviews":"","price":""}
    
    google_list.append(business_dict)
    
    i+=1
    
print('google_list with needed data has been built.',flush=True)

google_list with needed data has been built.


In [32]:
google_df=pd.DataFrame(google_list)
google_df=google_df[google_df.name != ""]
google_df = google_df.drop_duplicates(subset=['googleplacesid'])

google_df=google_df[['googleplacesid','name','latitude','longitude','address','rating','reviews','price','icon','photos']]

google_df.to_csv('DataFiles/googledata.csv')


print('Google DataFrame now stored in memory as "google_df".',flush=True)
print(f'Removed null entries.  {len(google_df)} restaurants remain.',flush=True)
print('---------------',flush=True)

Google DataFrame now stored in memory as "google_df".
Removed null entries.  797 restaurants remain.
---------------


In [33]:
i = 0
compare_list=[]
yelpgeo_list=[]

for i in range(len(google_list)):

    compare = {"Yelp":yelp_list[i]['name'],"Google":google_list[i]['name'],"GoogleAddress":google_list[i]['address'],"Yelp Address":yelp_list[i]['address']}
    compare_list.append(compare)
    i+=1

compare_df = pd.DataFrame(compare_list)
compare_df.to_csv('DataFiles/compare.csv')

print('"compare_df" has been stored in memory and csv "compare.csv" has been saved in DataFiles folder to allow easy comparison between Yelp and Google data.',flush=True)
print('---------------',flush=True)

"compare_df" has been stored in memory and csv "compare.csv" has been saved in DataFiles folder to allow easy comparison between Yelp and Google data.
---------------


In [None]:
print('Matching Yelp data list to Minneapolis Health Inspection API...   This will take some time, as we match each record...',flush=True)

inspection_data=[]

for index,row in yelp_df.iterrows():

    biz = row['name']

    biz_string = biz.split(' ',1)[0].upper()
    biz_string = biz_string.replace("'","")
    biz_string = biz_string.replace("&","")

    minlat=row['latitude']-.0015
    maxlat=row['latitude']+.0015
    minlon=row['longitude']-.0015
    maxlon=row['longitude']+.0015
    
    url = 'https://services.arcgis.com/afSMGVsC7QlRK1kZ/arcgis/rest/services/Food_Inspections/FeatureServer/0/query?'
    params = f"where=BusinessName%20like%20'%25{biz_string}%25'%20AND%20Latitude%20%3E%3D%20{minlat}%20AND%20Latitude%20%3C%3D%20{maxlat}%20AND%20Longitude%20%3E%3D%20{minlon}%20AND%20Longitude%20%3C%3D%20{maxlon}"
    outfields = "&outFields=BusinessName,HealthFacilityIDNumber,FullAddress,InspectionType,DateOfInspection,InspectionIDNumber,InspectionScore,Latitude,Longitude,FoodCodeText,ViolationPoints,InspectionResult,FoodCodeItem,InspectorComments,ViolationStatus,ViolationPriority&returnGeometry=false&outSR=4326"
    json = '&f=json'

    full_url = url+params+outfields+json

    response = requests.get(full_url)
    
    if response !="":
        inspection_data += response.json()['features']
        
    print("Restaurants Remaining: {:3}".format(len(yelp_df)-index), end="\r",flush=True)
    
print(f'Inspection data match has been completed...  There are {len(inspection_data)} records',flush=True)
print('---------------',flush=True)

In [21]:

    
url = 'https://services.arcgis.com/afSMGVsC7QlRK1kZ/arcgis/rest/services/Food_Inspections/FeatureServer/0/query?'
params = "where=FacilityCategory%20%3D%20%27RESTAURANT%27"
outfields = "&outFields=BusinessName,HealthFacilityIDNumber,FullAddress,InspectionType,DateOfInspection,InspectionIDNumber,InspectionScore,Latitude,Longitude,FoodCodeText,ViolationPoints,InspectionResult,FoodCodeItem,InspectorComments,ViolationStatus,ViolationPriority&returnGeometry=false&outSR=4326"
json = '&f=json'

full_url = url+params+outfields+json

response = requests.get(full_url)

inspection_data=response.json()['features']

len(inspection_data)


42408

In [None]:
# inspection_data = inspection_data.sort(key=operator.itemgetter(attributes[DateOfInspection]))
# inspection_data=inspection_data.attributes
inspection_data

In [22]:
inspection_data_list = []

for records in inspection_data:
    item = records['attributes']
    item['DateOfInspection']=time.strftime('%Y/%m/%d',time.gmtime(records['attributes']['DateOfInspection']/1000))
    inspection_data_list.append(item)
    
print('inspection_data_list with needed data has been built.',flush=True)
print('---------------',flush=True)

inspection_data_list with needed data has been built.
---------------


In [23]:
inspection_data_list

[{'BusinessName': 'SOTAROL UPTOWN',
  'HealthFacilityIDNumber': 'LIC107101',
  'FullAddress': '2935 GIRARD AVE S',
  'InspectionType': 'Routine',
  'DateOfInspection': '2017/07/15',
  'InspectionIDNumber': 194952,
  'InspectionScore': 94,
  'Latitude': 44.94885,
  'Longitude': -93.29663,
  'FoodCodeText': 'Develop written procedures prior to using  time as a public health control for time/temperature control for safety food and maintain the procedures in the food establishment.',
  'ViolationPoints': 2,
  'InspectionResult': 'Complete',
  'FoodCodeItem': 'MN Rule 4626.0408A',
  'InspectorComments': 'ESTABLISHMENT IS USING TIME AS PUBLIC HEALTH CONTROL FOR THE SUSHI RICE BUT HAVE NOT NOTIFIED THE HEALTH DEPT THEY WERE USING TIME AS A CONTROL METHOD FOR THE RICE. ESTABLISHMENT MUST COMPLETE THE REQUIRED FORM WHICH ATTACHED WITH THIS INSPECTION REPORT. ',
  'ViolationStatus': 'Observed',
  'ViolationPriority': 'Priority2'},
 {'BusinessName': 'SOTAROL UPTOWN',
  'HealthFacilityIDNumber': '

In [24]:
inspections_df_base = pd.DataFrame(inspection_data_list)

inspections_df_1 = inspections_df_base[['InspectionIDNumber','DateOfInspection','BusinessName','FullAddress','InspectionType','InspectionScore','Latitude','Longitude']]
inspections_df_1 = inspections_df_1.drop_duplicates(subset='InspectionIDNumber', keep='first')
inspections_df_1 = inspections_df_1.sort_values(by=['BusinessName','DateOfInspection'])
inspections_df_1 = inspections_df_1.rename(columns={'BusinessName':'businessname','FullAddress':'fulladdress','Latitude':'latitude','Longitude':'longitude','InspectionIDNumber':'inspectionidnumber','DateOfInspection':'dateofinspection','InspectionScore':'inspectionscore','InspectionType':'inspectiontype'})

inspections_df_2 = inspections_df_base[['DateOfInspection','InspectionIDNumber','BusinessName','FullAddress','InspectionType','InspectionScore','InspectionResult','FoodCodeItem','FoodCodeText','InspectorComments','ViolationPriority','ViolationStatus','ViolationPoints']]
inspections_df_2 = inspections_df_2.sort_values(by=['BusinessName','DateOfInspection'])
inspections_df_2 = inspections_df_2.rename(columns={'InspectionIDNumber':'inspectionidnumber','DateOfInspection':'dateofinspection','BusinessName':'businessname','FullAddress':'fulladdress','InspectionType':'inspectiontype','InspectionScore':'inspectionscore','InspectionResult':'inspectionresult','FoodCodeItem':'foodcodeitem','FoodCodeText':'foodcodetext','InspectorComments':'inspectorcomments','ViolationPriority':'violationpriority','ViolationStatus':'violationstatus','ViolationPoints':'violationpoints'})

inspections_df_1

Unnamed: 0,inspectionidnumber,dateofinspection,businessname,fulladdress,inspectiontype,inspectionscore,latitude,longitude
27198,44334,2017/08/31,112 EATERY,112 3RD ST N,Routine,80,44.98266,-93.27156
27231,224449,2017/10/06,112 EATERY,112 3RD ST N,Follow-Up,96,44.98266,-93.27156
27222,224448,2018/08/31,112 EATERY,112 3RD ST N,Routine,84,44.98266,-93.27156
27241,270458,2018/10/01,112 EATERY,112 3RD ST N,Follow-Up,100,44.98266,-93.27156
27236,270457,2019/08/30,112 EATERY,112 3RD ST N,Routine,94,44.98266,-93.27156
...,...,...,...,...,...,...,...,...
20496,304622,2019/05/10,ZOE'S CAFE,821 LAKE ST W,Follow-Up,100,44.94813,-93.29046
7274,24299,2017/02/02,ZUMBRO CAFE,2803 43RD ST W,Follow-Up,100,44.92447,-93.31501
7281,24300,2017/10/25,ZUMBRO CAFE,2803 43RD ST W,Routine,80,44.92447,-93.31501
7307,236468,2018/12/07,ZUMBRO CAFE,2803 43RD ST W,Routine,96,44.92447,-93.31501


In [25]:
inspections_df_2

Unnamed: 0,dateofinspection,inspectionidnumber,businessname,fulladdress,inspectiontype,inspectionscore,inspectionresult,foodcodeitem,foodcodetext,inspectorcomments,violationpriority,violationstatus,violationpoints
27198,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.0395A2,Maintain all cold TCS foods at 41 degrees F (5...,Many observations of potentially hazardous foo...,Priority1,Observed,4
27199,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.0725,EXPIRED: Secure carbon dioxide and bottle gas...,Carbon dioxide tanks in lower level observed n...,Priority3,Observed,0
27200,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.0735AB,All equipment and components must be in good r...,The following equipment was observed not worki...,Priority3,Observed,0
27201,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.1480B,Provide lockers or other suitable facilities f...,"Shoes, backpacks and other personal items obse...",Priority3,Observed,0
27202,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.0390A,Cool food by: 1. placing the food in shallow p...,"In walk-in cooler, two large plastic container...",Priority2,Observed,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
40241,2020/01/07,281116,ZUMBRO CAFE,2803 43RD ST W,Routine,96,Complete,MN Rule 4626.0680A,Provide a 3 compartment sink with integrally a...,ESTABLISHMENT IS USING 2 COMP SINK AND DISHWAS...,Priority2,Observed,2
40242,2020/01/07,281116,ZUMBRO CAFE,2803 43RD ST W,Routine,96,Complete,MN Rule 4626.1720A,Submit complete plans and specifications to th...,SUBMIT PLANS PRIOR TO INSTALLATION OF 3 COMP S...,Priority2,Observed,2
40243,2020/01/07,281116,ZUMBRO CAFE,2803 43RD ST W,Routine,96,Complete,MN Rule 4626.0450BCDE,"Remove all multi-use equipment, utensils, and ...",OBSERVED WET TOWELS UNDER CUTTING BOARDS. TOWE...,Priority3,Observed,0
40244,2020/01/07,281116,ZUMBRO CAFE,2803 43RD ST W,Routine,96,Complete,MN Rule 4626.0235A(1),"Separate raw animal foods during storage, prep...",OBSERVED RAW CHICKEN ABOVE CELERY IN COOLER. S...,Priority1,Corrected on-site,4


In [26]:
inspect_by_biz=inspections_df_1.groupby(['businessname','fulladdress','latitude','longitude'],sort=False,as_index=False).aggregate(lambda x: list(x))

print('Inspections DataFrame now stored in memory as "inspect_by_biz" and csv "InspectionsData.csv" has been saved in DataFiles folder.')
print(f'There are {len(inspections_df_1)} inspections for {len(inspect_by_biz)} facilities.')
print('---------------')

Inspections DataFrame now stored in memory as "inspect_by_biz" and csv "InspectionsData.csv" has been saved in DataFiles folder.
There are 6148 inspections for 1557 facilities.
---------------


In [27]:
inspection_detail=inspections_df_2

print('Inspection Detail DataFrame now stored in memory as "inspection_detail"',flush=True)

print('---------------',flush=True)

Inspection Detail DataFrame now stored in memory as "inspection_detail"
---------------


In [28]:
inspection_detail

Unnamed: 0,dateofinspection,inspectionidnumber,businessname,fulladdress,inspectiontype,inspectionscore,inspectionresult,foodcodeitem,foodcodetext,inspectorcomments,violationpriority,violationstatus,violationpoints
27198,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.0395A2,Maintain all cold TCS foods at 41 degrees F (5...,Many observations of potentially hazardous foo...,Priority1,Observed,4
27199,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.0725,EXPIRED: Secure carbon dioxide and bottle gas...,Carbon dioxide tanks in lower level observed n...,Priority3,Observed,0
27200,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.0735AB,All equipment and components must be in good r...,The following equipment was observed not worki...,Priority3,Observed,0
27201,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.1480B,Provide lockers or other suitable facilities f...,"Shoes, backpacks and other personal items obse...",Priority3,Observed,0
27202,2017/08/31,44334,112 EATERY,112 3RD ST N,Routine,80,Complete,MN Rule 4626.0390A,Cool food by: 1. placing the food in shallow p...,"In walk-in cooler, two large plastic container...",Priority2,Observed,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
40241,2020/01/07,281116,ZUMBRO CAFE,2803 43RD ST W,Routine,96,Complete,MN Rule 4626.0680A,Provide a 3 compartment sink with integrally a...,ESTABLISHMENT IS USING 2 COMP SINK AND DISHWAS...,Priority2,Observed,2
40242,2020/01/07,281116,ZUMBRO CAFE,2803 43RD ST W,Routine,96,Complete,MN Rule 4626.1720A,Submit complete plans and specifications to th...,SUBMIT PLANS PRIOR TO INSTALLATION OF 3 COMP S...,Priority2,Observed,2
40243,2020/01/07,281116,ZUMBRO CAFE,2803 43RD ST W,Routine,96,Complete,MN Rule 4626.0450BCDE,"Remove all multi-use equipment, utensils, and ...",OBSERVED WET TOWELS UNDER CUTTING BOARDS. TOWE...,Priority3,Observed,0
40244,2020/01/07,281116,ZUMBRO CAFE,2803 43RD ST W,Routine,96,Complete,MN Rule 4626.0235A(1),"Separate raw animal foods during storage, prep...",OBSERVED RAW CHICKEN ABOVE CELERY IN COOLER. S...,Priority1,Corrected on-site,4


In [77]:
#Postgres username, password, and database name
ipaddress = 'localhost'
port = '5432'
username = username
password = password 
dbname = 'Minneapolis_Restaurants'
# A long string that contains the necessary Postgres login information
postgres_str = f'postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'

In [None]:
# Creates Classes which will serve as the anchor points for our Table, loads table to Postgres and uplads the data

Base = declarative_base()
engine = create_engine(postgres_str)

class YelpData(Base):
    __tablename__ = 'yelpdata'
    index=Column(Integer,primary_key=True,autoincrement=True)
    yelpid=Column(String,nullable=False)
    name=Column(String)
    image=Column(String)
    url=Column(String)
    latitude=Column(Float(20))
    longitude=Column(Float(20))
    address=Column(String)
    phone=Column(String)
    categories=Column(String)
    transactions=Column(String)
    rating=Column(Float(10))
    reviews=Column(Integer)
                   
Base.metadata.create_all(engine)

yelp_df.to_sql('yelpdata', engine, if_exists='replace', index=True)

print(f'Table "yelpdata" uploaded to postgreSQL database "Minneapolis_Restaurants".',flush=True)
print('---------------')

In [91]:
# Creates Classes which will serve as the anchor points for our Table, loads table to Postgres and uplads the data

Base = declarative_base()
engine = create_engine(postgres_str)

class MasterData(Base):
    __tablename__ = 'masterdata'
    index=Column(Integer,primary_key=True,autoincrement=True)
    yelpid=Column(String,nullable=False)
    name=Column(String)
    image=Column(String)
    url=Column(String)
    latitude=Column(Float(20))
    longitude=Column(Float(20))
    address=Column(String)
    phone=Column(String)
    categories=Column(String)
    transactions=Column(String)
    rating=Column(Float(10))
    reviews=Column(Integer)
    google_name=Column(String)
    google_id=Column(String)
    google_rating=Column(Float(10))
    google_reviews=Column(Integer)
    google_price=Column(Integer)
    agg_rating=Column(Float)
    total_reviews=Column(Float)
                   
Base.metadata.create_all(engine)

master_df.to_sql('masterdata', engine, if_exists='replace', index=True)

print(f'Table "masterdata" uploaded to postgreSQL database "Minneapolis_Restaurants".',flush=True)
print('---------------')

Table "masterdata" uploaded to postgreSQL database "Minneapolis_Restaurants".
---------------


In [None]:
# Creates Classes which will serve as the anchor points for our Table, loads table to Postgres and uplads the data

Base = declarative_base()
engine = create_engine(postgres_str)

class GoogleData(Base):
    __tablename__ = 'googledata'
    googleplacesid=Column(String,primary_key=True, nullable=False)
    name=Column(String)
    latitude=Column(Float(20))
    longitude=Column(Float(20))
    address=Column(String)
    rating=Column(Float(10))
    reviews=Column(Integer) 
    price=Column(Integer)
    icon=Column(String)
    photos=Column(String)
                   
Base.metadata.create_all(engine)

google_df.to_sql('googledata', engine, if_exists='replace', index=True)

print(f'Table "googledata" uploaded to postgreSQL database "Minneapolis_Restaurants".',flush=True)
print('---------------',flush=True)

In [92]:
# Creates Classes which will serve as the anchor points for our Table, loads table to Postgres and uplads the data

Base = declarative_base()
engine = create_engine(postgres_str)

class InspectionsData(Base):
    __tablename__ = 'inspectionsdata'
    index=Column(Integer,primary_key=True,autoincrement=True)
    businessname=Column(String,nullable=False)
    fulladdress=Column(String)
    healthfacilityidnumber=Column(String)
    latitude=Column(Float(20))
    longitude=Column(Float(20))
    inspectionidnumber=Column(String)
    dateofinspection=Column(String)
    inspectionscore=Column(String)
    inspectiontype=Column(String)
                   
Base.metadata.create_all(engine)

inspect_by_biz.to_sql('inspectionsdata', engine, if_exists='replace', index=True)

print(f'Table "inspectionsdata" uploaded to postgreSQL database "Minneapolis_Restaurants".',flush=True)
print('---------------',flush=True)

Table "inspectionsdata" uploaded to postgreSQL database "Minneapolis_Restaurants".
---------------


In [94]:
# Creates Classes which will serve as the anchor points for our Table, loads table to Postgres and uplads the data

Base = declarative_base()
engine = create_engine(postgres_str)

class InspectionsDetail(Base):
    __tablename__ = 'inspectionsdata'
    inspectionidnumber=Column(String,primary_key=True)
    dateofinspection=Column(String)
    businessname=Column(String)
    fulladdress=Column(String)
    inspectiontype=Column(String)
    inspectionscore=Column(String)
    inspectionresult=Column(String)
    foodcodeitem=Column(String)
    foodcodetext=Column(String)
    inspectorcomments=Column(String)
    violationpriority=Column(String)
    violationstatus=Column(String)
    violationpoints=Column(String)
                   
Base.metadata.create_all(engine)

inspection_detail.to_sql('inspectionsdetail', engine, if_exists='replace', index=True)

print(f'Table "inspectionsdetail" uploaded to postgreSQL database "Minneapolis_Restaurants".',flush=True)
print('---------------',flush=True)
print("DONE.  Don't forget to fix the SQL data types! Use the DataTypeChange script to fix your Minneapolis_Restaurants DB",flush=True)

Table "inspectionsdetail" uploaded to postgreSQL database "Minneapolis_Restaurants".
---------------
DONE.  Don't forget to fix the SQL data types! Use the DataTypeChange script to fix your Minneapolis_Restaurants DB
