In [1]:
# Import Pandas and NumPy
import pandas as pd
import numpy as np

# Import SQL Alchemy
from sqlalchemy import create_engine
import psycopg2

# Import Password
from config import password

In [2]:
# rating_final
# Save path to data set in a variable
rating = "Resources/rating_final.csv"

# Use Pandas to read data
rating_df = pd.read_csv(rating, encoding="ISO-8859-1")
rating_df.head()

Unnamed: 0,userID,placeID,rating,food_rating,service_rating
0,U1077,135085,2,2,2
1,U1077,135038,2,2,1
2,U1077,132825,2,2,2
3,U1077,135060,1,2,2
4,U1068,135104,1,1,2


Data Cleaning

In [3]:
# get list of all columns
rating_df.columns

Index(['userID', 'placeID', 'rating', 'food_rating', 'service_rating'], dtype='object')

In [4]:
# Rename UserID and placeID column to userid
rating_df = rating_df.rename(columns={"userID": "userid", "placeID": "placeid"})
rating_df.head()

Unnamed: 0,userid,placeid,rating,food_rating,service_rating
0,U1077,135085,2,2,2
1,U1077,135038,2,2,1
2,U1077,132825,2,2,2
3,U1077,135060,1,2,2
4,U1068,135104,1,1,2


In [5]:
# check all columns with any missing/null values
rating_df.isna().sum()

userid            0
placeid           0
rating            0
food_rating       0
service_rating    0
dtype: int64

In [6]:
# check all duplicate rows
duplicate_rows_df = rating_df[rating_df.duplicated()]
print (f"Number of duplicate rows: {duplicate_rows_df.shape}")

Number of duplicate rows: (0, 5)


Database

In [7]:
# connect to Postgres
engine = create_engine(f"postgresql://postgres:{password}@localhost/restaurant_rating_db")
conn = engine.connect()

In [8]:
# Insert data into Rating table
rating_df.to_sql(name='rating', con=engine, if_exists='append', index=False)

In [9]:
# Query the data in Postgres
RATING = pd.read_sql("SELECT * FROM RATING", conn)
RATING.head(10)

Unnamed: 0,userid,placeid,rating,food_rating,service_rating
0,U1077,135085,2,2,2
1,U1077,135038,2,2,1
2,U1077,132825,2,2,2
3,U1077,135060,1,2,2
4,U1068,135104,1,1,2
5,U1068,132740,0,0,0
6,U1068,132663,1,1,1
7,U1068,132732,0,0,0
8,U1068,132630,1,1,1
9,U1067,132584,2,2,2
