In [1]:
# Import Pandas and NumPy
import pandas as pd
import numpy as np

# Import SQL Alchemy
from sqlalchemy import create_engine
import psycopg2

# Import Password
from config import password

In [2]:
# cuisine
# Save path to data set in a variable
cuisine = "Resources/chefmozcuisine.csv"

# Use Pandas to read data
cuisine_df = pd.read_csv(cuisine, encoding="ISO-8859-1")
cuisine_df.head()

Unnamed: 0,placeID,Rcuisine
0,134999,Dutch-Belgian
1,132825,Seafood
2,135106,International
3,132667,Seafood
4,132613,French


Data Cleaning

In [3]:
# get list of all columns
cuisine_df.columns

Index(['placeID', 'Rcuisine'], dtype='object')

In [4]:
# Rename placeID column to userid
cuisine_df = cuisine_df.rename(columns={"placeID": "placeid", "Rcuisine": "cuisine"})
cuisine_df.head()

Unnamed: 0,placeid,cuisine
0,134999,Dutch-Belgian
1,132825,Seafood
2,135106,International
3,132667,Seafood
4,132613,French


In [5]:
# check all columns with any missing/null values
cuisine_df.isna().sum()

placeid    0
cuisine    0
dtype: int64

In [6]:
# check all duplicate rows
duplicate_rows_df = cuisine_df[cuisine_df.duplicated()]
print (f"Number of duplicate rows: {duplicate_rows_df.shape}")

Number of duplicate rows: (171, 2)


In [7]:
# drop the duplicates
cuisine_df.drop_duplicates(inplace=True) 

In [8]:
# check all duplicate rows again, we should not find any after dropping them
duplicate_rows_df = cuisine_df[cuisine_df.duplicated()]
print (f"Number of duplicate rows: {duplicate_rows_df.shape}")

Number of duplicate rows: (0, 2)


In [9]:
# before count
cuisine_df["placeid"].count()

745

Database

In [10]:
# connect to Postgres
engine = create_engine(f"postgresql://postgres:{password}@localhost/restaurant_rating_db")
conn = engine.connect()

In [11]:
# Insert data into Cuisine table
cuisine_df.to_sql(name='cuisine', con=engine, if_exists='append', index=False)

In [12]:
# Query the data in Postgres
CUISINE = pd.read_sql("SELECT * FROM CUISINE", conn)
CUISINE.head(10)

Unnamed: 0,placeid,cuisine
0,134999,Dutch-Belgian
1,132825,Seafood
2,135106,International
3,132667,Seafood
4,132613,French
5,135040,Seafood
6,132732,Dutch-Belgian
7,132875,Mediterranean
8,132609,International
9,135082,International
