In [None]:
import time
import pandas as pd
from decouple import config
from sqlalchemy import create_engine

In [None]:
user = config('user')
password = config('password')
host = config('host')
port = config('port')
db = config('db')

engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{db}")

In [None]:
# create field headings in database

df = pd.read_csv('green_tripdata_2019-09.csv')
df.head(n=0).to_sql(name='green_taxi_trips', con=engine, if_exists='replace')

In [None]:


# Define the chunk size (number of rows per chunk)
chunk_size = 100000

# Function to load DataFrame into PostgreSQL in chunks
def load_dataframe_to_postgres(df, engine, chunk_size):
    total_rows = len(df)
    
    for start in range(0, total_rows, chunk_size):
        # Record the start time
        start_time = time.time()

        end = min(start + chunk_size, total_rows)
        chunk = df.iloc[start:end]
        
        # Write the chunk to the PostgreSQL table
        chunk.to_sql("green_taxi_trips", engine, if_exists='append', index=False)

        # Record the end time
        end_time = time.time()
        
        # Calculate and print the elapsed time
        elapsed_time = end_time - start_time
        print(f"Chunk {start}-{end-1} loaded in {elapsed_time:.2f} seconds")


# Call the function to load the DataFrame in chunks
load_dataframe_to_postgres(df, engine, chunk_size)

In [None]:
df_zones = pd.read_csv('taxi+_zone_lookup.csv')
df_zones.to_sql(name='zones', con=engine, if_exists='replace')

In [None]:
query = """
SELECT *
FROM pg_catalog.pg_tables
WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema';"""

pd.read_sql(query, con=engine)

In [None]:
query = """
SELECT
    column_name,
    data_type
FROM
    information_schema.columns
WHERE
    table_name = 'green_taxi_trips';"""

pd.read_sql(query, con=engine)

In [None]:
query = """
SELECT
    column_name,
    data_type
FROM
    information_schema.columns
WHERE
    table_name = 'zones';"""

pd.read_sql(query, con=engine)