### Connecting to PostgreSQL

In [3]:
# library
import pandas as pd
from sqlalchemy import create_engine

In [None]:
# check pandas version
pd.__version__

In [None]:
# database engine
engine = create_engine('postgresql://root:root@localhost:5432/ny_taxi')

### Reading CSV file as chunks and insert into database

In [None]:
# read csv file with a chunk of 100000 lines for each iteration
df_iter = pd.read_csv('data/yellow_tripdata_2021-01.csv', iterator=True, chunksize=100000)

In [None]:
# take the first chunk
df = next(df_iter)

In [None]:
# convert from text to timestamp datatype (the first chunk)
df.tpep_pickup_datetime = pd.to_datetime(df.tpep_pickup_datetime)
df.tpep_dropoff_datetime = pd.to_datetime(df.tpep_dropoff_datetime)

In [None]:
# create a new table in database with only column names
df.head(n=0).to_sql(name='yellow_taxi_data', con=engine, if_exists='replace')

In [None]:
# insert the first data chunk (100000 rows) into table
# %time: calculate execute time
%time df.to_sql(name='yellow_taxi_data', con=engine, if_exists='append')

In [None]:
# library
from time import time

In [None]:
# loop over each chunk and insert into table
# calculate time for each write operation
while True: 
    
    t_start = time()

    df = next(df_iter)

    df.tpep_pickup_datetime = pd.to_datetime(df.tpep_pickup_datetime)
    df.tpep_dropoff_datetime = pd.to_datetime(df.tpep_dropoff_datetime)
    
    df.to_sql(name='yellow_taxi_data', con=engine, if_exists='append')

    t_end = time()

    print('inserted another chunk, took %.3f second' % (t_end - t_start))

### Preparing SQL

In [None]:
# read csv file & convert it to DataFrame
df_zones = pd.read_csv('data/taxi_zone_lookup.csv')

In [None]:
# check whether read operation is successed
df_zones.head()

In [None]:
# create database engine
# note is hostname is localhost because we're not working in docker env
engine = create_engine(f'postgresql://root:root@localhost:5432/ny_taxi')

In [None]:
# write table to Postgres database
df_zones.to_sql(name='zones', con=engine, if_exists='replace')