In [42]:
import pandas as pd
import numpy as np
import datetime as dt
from sqlalchemy import create_engine, inspect



In [43]:
csv_file = "./Resources/airbnb_host_info.csv"
review_df = pd.read_csv(csv_file)
review_df.head()

# Change the date type of the last_review column to datetime
review_df["last_review"] = pd.to_datetime(review_df["last_review"])
review_df.dropna()
review_df.head()

Unnamed: 0,listing_id,host_name,last_review
0,2595,Jennifer,2019-05-21
1,3831,LisaRoxanne,2019-07-05
2,5099,Chris,2019-06-22
3,5178,Shunichi,2019-06-24
4,5238,Ben,2019-06-09


In [44]:
csv_file2 = "./Resources/airbnb_price.csv"
price_df = pd.read_csv(csv_file2)
price_df.head()

# new data frame with split value columns
new_price_df = price_df["nbhood_full"].str.split(", ", n = 1, expand = True)
 
# making separate first name column from new data frame
price_df["borough"]= new_price_df[0]
 
# making separate last name column from new data frame
price_df["area"]= new_price_df[1]
 
# Dropping old Name columns
price_df.drop(columns =["nbhood_full"], inplace = True)
 
# Drop na value
price_df.dropna()

# Display df
price_df.head()

Unnamed: 0,listing_id,price,borough,area
0,2595,225 dollars,Manhattan,Midtown
1,3831,89 dollars,Brooklyn,Clinton Hill
2,5099,200 dollars,Manhattan,Murray Hill
3,5178,79 dollars,Manhattan,Hell's Kitchen
4,5238,150 dollars,Manhattan,Chinatown


In [45]:
csv_file3 = "./Resources/airbnb_room_type.csv"
type_df = pd.read_csv(csv_file3)
type_df.head()

Unnamed: 0,listing_id,description,room_type
0,2595,Skylit Midtown Castle,Entire home/apt
1,3831,Cozy Entire Floor of Brownstone,Entire home/apt
2,5099,Large Cozy 1 BR Apartment In Midtown East,Entire home/apt
3,5178,Large Furnished Room Near B'way,private room
4,5238,Cute & Cozy Lower East Side 1 bdrm,Entire home/apt


In [46]:
# Determine different Values in Column
type_df["room_type"].unique() 

array(['Entire home/apt', 'private room', 'Private room',
       'entire home/apt', 'PRIVATE ROOM', 'shared room',
       'ENTIRE HOME/APT', 'Shared room', 'SHARED ROOM'], dtype=object)

In [47]:
# Cleaning and Reducing Categories
# Define Dict with the key-value pair to remap.
conv = {
        'Entire home/apt':'Entire Home/Apt','entire home/apt':'Entire Home/Apt','ENTIRE HOME/APT':'Entire Home/Apt',
        'private room' : 'Private Room', 'PRIVATE ROOM' : 'Private Room', 'Private room' : 'Private Room',
        'shared room' : 'Shared Room', 'Shared room' : 'Shared Room', 'SHARED ROOM' : 'Shared Room'
       }
type_df_1=type_df.replace({"room_type": conv})

type_df["room_type"].unique()

array(['Entire home/apt', 'private room', 'Private room',
       'entire home/apt', 'PRIVATE ROOM', 'shared room',
       'ENTIRE HOME/APT', 'Shared room', 'SHARED ROOM'], dtype=object)

In [48]:
# Drop na values
type_df_1.dropna()

# Display df 
type_df_1.head()

Unnamed: 0,listing_id,description,room_type
0,2595,Skylit Midtown Castle,Entire Home/Apt
1,3831,Cozy Entire Floor of Brownstone,Entire Home/Apt
2,5099,Large Cozy 1 BR Apartment In Midtown East,Entire Home/Apt
3,5178,Large Furnished Room Near B'way,Private Room
4,5238,Cute & Cozy Lower East Side 1 bdrm,Entire Home/Apt


In [50]:
protocol = 'postgresql'
username = 'postgres'
password = 'password'
host = 'localhost'
port = 5432            
database_name = 'Airbnb_db'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [51]:
engine.table_names()

  engine.table_names()


['airbnb_price', 'airbnb_review', 'airbnb_room_type', 'properties']