#### Before You Start


cd .devcontainer </br>
run docker-compose up

In [51]:
! pip install psycopg2
! pip install sqlalchemy



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [52]:
import psycopg2  #import of the psycopg2 python library
import pandas as pd #import of the pandas python library
import pandas.io.sql as psql

##No transaction is started when commands are executed and no commit() or rollback() is required. 
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT

In [53]:
try:
    # Connect to the postgreSQL server with username, and password credentials
    con = psycopg2.connect(user = "postgres",
                                  password = "postgres",
                                  host = "localhost",
                                  port = "5432")
    
    con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT);
    print("Connected Successfully to PostgreSQL server!!")
    
    # Obtain a DB Cursor to perform database operations
    cursor = con.cursor();
except (Exception, psycopg2.Error) as error :
     print ("Error while connecting to PostgreSQL", error)


Connected Successfully to PostgreSQL server!!


#### View table in python

In [54]:
import pandas as pd

df = pd.read_csv('lap_time.csv')
print(df.head())

   race_id  driver_id  lap  position  time_in_milliseconds
0      841         20    1         1                 98109
1      841         20    2         1                 93006
2      841         20    3         1                 92713
3      841         20    4         1                 92803
4      841         20    5         1                 92342


### Create tables in relational database

In [55]:
try:
    #table_name variable
    create_all_tables_query = '''

    DROP TABLE IF EXISTS lap_time, pit_stop, result, race, driver, constructor, circuit, season CASCADE;

    CREATE TABLE IF NOT EXISTS circuit (
        circuit_id INT PRIMARY KEY,
        -- circuit_ref omitted
        name TEXT,
        location TEXT,
        country TEXT,
        lat float,
        long float,
        alt INT,
        url TEXT
    );

    CREATE TABLE IF NOT EXISTS constructor (
        constructor_id INT PRIMARY KEY,
        -- removing constructor_ref for normalisation purposes
        name TEXT,
        nationality TEXT,
        url TEXT
    );

    CREATE TABLE IF NOT EXISTS driver (
        driver_id INT PRIMARY KEY,
        -- removing driver_ref and number for normalisation purposes
        code TEXT,
        first_name TEXT, --no longer forename
        last_name TEXT, --no longer surname
        dob DATE,
        nationality TEXT,
        url TEXT
    );

    CREATE TABLE IF NOT EXISTS lap_time (
        race_id INT,
        driver_id INT REFERENCES driver,
        lap INT,
        position INT,
        time_in_milliseconds INT,
        PRIMARY KEY(race_id, driver_id, lap)
    );

    CREATE TABLE IF NOT EXISTS pit_stop (
        race_id INT,
        driver_id INT,
        stop INT,
        lap INT,
        time_in_miliseconds INT,
        PRIMARY KEY(race_id, driver_id, lap)
    );

    CREATE TABLE IF NOT EXISTS race (
        race_id INT PRIMARY KEY,
        year INT,
        round INT,
        circuit_id INT REFERENCES circuit,
        name TEXT,
        date DATE,
        url TEXT
    );

    CREATE TABLE IF NOT EXISTS result (
        result_id INT PRIMARY KEY,
        race_id INT REFERENCES race,
        driver_id INT REFERENCES driver,
        constructor_id INT REFERENCES constructor,
        -- number and grid omitted
        position INT, -- use positionOrder instead of position/positionText
        points INT,
        laps INT,
        -- omit time, unclean
        time_in_milliseconds INT,
        fastest_lap INT,
        rank INT,
        fastest_lap_time_in_milliseconds INT,
        fastest_lap_speed float
    );

    CREATE TABLE IF NOT EXISTS season (
        year INT PRIMARY KEY,
        url TEXT
    );

    '''


    #Execute this command (SQL Query)
    cursor.execute(create_all_tables_query)
    
    # Make the changes to the database persistent
    con.commit()
    print("All tables created successfully in PostgreSQL ")
except (Exception, psycopg2.Error) as error:
    # if it exits with an exception the transaction is rolled back.
    con.rollback()
    print("Error While Creating the DB: ",error)

All tables created successfully in PostgreSQL 


### Create many-to-many tables

In [56]:
try:
    #table_name variable
    create_all_tables_query = '''

    DROP TABLE IF EXISTS race_result, constructor_result CASCADE;

    CREATE TABLE IF NOT EXISTS race_result (
        race_id INT REFERENCES race,
        result_id INT REFERENCES result, 
        PRIMARY KEY (race_id, result_id)
    );

    CREATE TABLE IF NOT EXISTS constructor_result (
        constructor_id INT REFERENCES constructor,
        result_id INT REFERENCES result,
        PRIMARY KEY (constructor_id, result_id)
    );
    '''


    #Execute this command (SQL Query)
    cursor.execute(create_all_tables_query)
    
    # Make the changes to the database persistent
    con.commit()
    print("All tables created successfully in PostgreSQL ")
except (Exception, psycopg2.Error) as error:
    # if it exits with an exception the transaction is rolled back.
    con.rollback()
    print("Error While Creating the DB: ",error)

All tables created successfully in PostgreSQL 


### Check table creation

In [57]:
# [information_schema.tables] keep listing of every table being managed by Postgres for a particular database.
# specifying the tabel_schema to 'public' to only list tables that you create.
cursor.execute("""SELECT table_name 
                  FROM information_schema.tables 
                  WHERE table_schema = 'public'  
               """)

for table in cursor.fetchall():
    print(table)

('race_result',)
('driver',)
('lap_time',)
('pit_stop',)
('circuit',)
('race',)
('result',)
('constructor',)
('season',)
('constructor_result',)


### Read all CSVs and insert into postgres (requires some waiting)

In [58]:
import pandas as pd
from sqlalchemy import create_engine

# List of CSV files and corresponding table names
csv_files = [
    ("circuit.csv", "circuit"),
    ("constructor.csv", "constructor"),
    ("driver.csv", "driver"),
    ("lap_time.csv", "lap_time"),
    ("pit_stop.csv", "pit_stop"),
    ("race.csv", "race"),
    ("result.csv", "result"),
    ("season.csv", "season")
]

# Connect to the PostgreSQL database
user = "postgres"
password = "postgres"
host = "localhost"
port = "5432"
database = "postgres"
engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}")

# Loop through the list of CSV files and import them into the PostgreSQL tables
for csv_file, table_name in csv_files:
    try:
        # Load CSV file into DataFrame
        # df = pd.read_csv(csv_file)
        df = pd.read_csv(csv_file, encoding="latin1")

        # Write the DataFrame to the PostgreSQL table
        df.to_sql(table_name, con=engine, if_exists="append", index=False, method="multi", chunksize=100)
        # df.to_sql(table_name, con=engine, if_exists="append", index=False)
        print(f"Data from {csv_file} successfully imported into {table_name}!")
    except Exception as e:
        print(f"An error occurred while importing {csv_file} into {table_name}: {e}")


Data from circuit.csv successfully imported into circuit!
Data from constructor.csv successfully imported into constructor!
Data from driver.csv successfully imported into driver!
Data from lap_time.csv successfully imported into lap_time!
Data from pit_stop.csv successfully imported into pit_stop!
Data from race.csv successfully imported into race!
Data from result.csv successfully imported into result!
Data from season.csv successfully imported into season!


### Populate many-to-many tables with records

In [59]:
csv_file = 'result.csv'
table_name = 'race_result'

try:
        # Load CSV file into DataFrame
        # df = pd.read_csv(csv_file)
        df_result = pd.read_csv('result.csv', encoding="latin1")
        df_race_result = df_result[['race_id', 'result_id']]
        df_constructor_result = df_result[['constructor_id', 'result_id']]

        # Write the DataFrame to the PostgreSQL table
        df_race_result.to_sql('race_result', con=engine, if_exists="append", index=False, method="multi", chunksize=100)
        # df.to_sql(table_name, con=engine, if_exists="append", index=False)
        print(f"Data from {csv_file} successfully imported into {table_name}!")
except Exception as e:
    print(f"An error occurred while importing {csv_file} into {table_name}: {e}")


Data from result.csv successfully imported into race_result!


In [60]:
csv_file = 'result.csv'
table_name = 'constructor_result'

try:
        # Load CSV file into DataFrame
        # df = pd.read_csv(csv_file)
        df_result = pd.read_csv('result.csv')
        df_constructor_result = df_result[['constructor_id', 'result_id']]

        # Write the DataFrame to the PostgreSQL table
        df_constructor_result.to_sql('constructor_result', con=engine, if_exists="append", index=False, method="multi", chunksize=100)
        # df.to_sql(table_name, con=engine, if_exists="append", index=False)
        print(f"Data from {csv_file} successfully imported into {table_name}!")
except Exception as e:
    print(f"An error occurred while importing {csv_file} into {table_name}: {e}")


Data from result.csv successfully imported into constructor_result!


### View query

In [65]:
# sql_select_query = """ SELECT * FROM season WHERE year = 2001"""
# sql_select_query = """ SELECT r.race_id, r.year FROM race as r JOIN season as s ON s.year = r.year WHERE s.year = 2001"""

# try:
#     cursor.execute(sql_select_query, (1,))
#     person_records = cursor.fetchall() 
#     print("Print each row and it's columns values:\n")
#     # print(person_records)
#     df = pd.DataFrame(person_records, columns=['race_id', 'year'])
#     print(df)
# except(Exception, psycopg2.Error) as error :
#     con.rollback()
#     print("Error:", error)


# Countries_Customers_Cnt_gt1= psql.read_sql("""SELECT r.race_id, r.year 
#                                            FROM race as r 
#                                            JOIN season as s 
#                                            ON s.year = r.year 
#                                            WHERE s.year = 2001
#                                           """, engine)

Countries_Customers_Cnt_gt1= psql.read_sql("""SELECT *
                                           FROM race_result
                                          """, engine)
display(Countries_Customers_Cnt_gt1.style)

Unnamed: 0,race_id,result_id
0,18,1
1,18,2
2,18,3
3,18,4
4,18,5
5,18,6
6,18,7
7,18,8
8,18,9
9,18,10
