In [1]:
# import libraries
import requests
import pandas as pd
import os
import psycopg2


In [2]:
# extract F1 data from API
drivers_api = requests.get('https://api.openf1.org/v1/drivers').json()
meetings_api = requests.get('https://api.openf1.org/v1/meetings').json()
weather_api = requests.get('https://api.openf1.org/v1/weather').json()

# convert to DataFrames
drivers_df = pd.DataFrame(drivers_api)
meetings_df = pd.DataFrame(meetings_api)
weather_df = pd.DataFrame(weather_api)

# join DataFrames
f1_df = drivers_df.merge(meetings_df, on='meeting_key', how='inner').merge(weather_df, on=['meeting_key', 'session_key'], how='inner')

In [11]:
f1_df.columns

Index(['driver_number', 'broadcast_name', 'full_name', 'name_acronym',
       'team_name', 'team_colour', 'first_name', 'last_name', 'headshot_url',
       'country_code_x', 'session_key', 'meeting_key', 'meeting_name',
       'meeting_official_name', 'location', 'country_key', 'country_code_y',
       'country_name', 'circuit_key', 'circuit_short_name', 'date_start',
       'gmt_offset', 'year', 'meeting_code', 'air_temperature', 'humidity',
       'pressure', 'rainfall', 'track_temperature', 'wind_direction',
       'wind_speed', 'date'],
      dtype='object')

In [None]:
# connect to Amazon RDS and create a table
conn = psycopg2.connect(
            host = 'postgresql.c7cao8o2cjkw.eu-west-2.rds.amazonaws.com',
            dbname = 'f1',
            user = 'postgres',
            password = 'mydb123$$',
            port = '5432'
        )
cur = conn.cursor()
cur.execute("""
CREATE TABLE IF NOT EXISTS f1_data (
    meeting_key INT,
    session_key INT,
    driver_number INT,
    full_name TEXT,
    team_name TEXT,
    team_colour TEXT,
    headshot_url TEXT,
    country_code TEXT,
    meeting_name TEXT,
    meeting_official_name TEXT,
    location TEXT,
    country_name TEXT,
    circuit_short_name TEXT,
    date_start DATE,
    gmt_offset TEXT,
    year INT,
    meeting_code TEXT,
    air_temperature FLOAT,
    humidity FLOAT,
    pressure FLOAT,
    rainfall FLOAT,
    track_temperature FLOAT,
    wind_direction TEXT,
    wind_speed FLOAT,
    date_weather TIMESTAMP
);
""")

# Preview table
query = "SELECT * FROM f1_data LIMIT 10;"
df = pd.read_sql(query, conn)
print(df.head())


  df = pd.read_sql(query, conn)


Empty DataFrame
Columns: [meeting_key, session_key, driver_number, full_name, team_name, team_colour, headshot_url, country_code, meeting_name, meeting_official_name, location, country_name, circuit_short_name, date_start, gmt_offset, year, meeting_code, air_temperature, humidity, pressure, rainfall, track_temperature, wind_direction, wind_speed, date_weather]
Index: []

[0 rows x 25 columns]


In [None]:
# load API data to RDS
for index, row in f1_df.sample(500).iterrows():
    cur.execute("""
        INSERT INTO f1_data (
            meeting_key, session_key, driver_number, full_name, team_name, team_colour,
            headshot_url, country_code, meeting_name, meeting_official_name, location,
            country_name, circuit_short_name, date_start, gmt_offset, year, meeting_code,
            air_temperature, humidity, pressure, rainfall, track_temperature, wind_direction,
            wind_speed, date_weather
        )
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """, (
        row['meeting_key'],
        row['session_key'],
        row['driver_number'],
        row['full_name'],
        row['team_name'],
        row['team_colour'],
        row['headshot_url'],
        row['country_code_x'],
        row['meeting_name'],
        row['meeting_official_name'],
        row['location'],
        row['country_name'],
        row['circuit_short_name'],
        row['date_start'],
        row['gmt_offset'],
        row['year'],
        row['meeting_code'],
        row['air_temperature'],
        row['humidity'],
        row['pressure'],
        row['rainfall'],
        row['track_temperature'],
        row['wind_direction'],
        row['wind_speed'],
        row['date']
    ))


# Preview table
query = "SELECT * FROM f1_data LIMIT 10;"
df = pd.read_sql(query, conn)
print(df.head())

   meeting_key  session_key  driver_number       full_name        team_name  \
0         1140         7763              1  Max VERSTAPPEN  Red Bull Racing   
1         1140         7763              1  Max VERSTAPPEN  Red Bull Racing   
2         1140         7763              1  Max VERSTAPPEN  Red Bull Racing   
3         1140         7763              1  Max VERSTAPPEN  Red Bull Racing   
4         1140         7763              1  Max VERSTAPPEN  Red Bull Racing   

  team_colour                                       headshot_url country_code  \
0      3671C6  https://www.formula1.com/content/dam/fom-websi...          NED   
1      3671C6  https://www.formula1.com/content/dam/fom-websi...          NED   
2      3671C6  https://www.formula1.com/content/dam/fom-websi...          NED   
3      3671C6  https://www.formula1.com/content/dam/fom-websi...          NED   
4      3671C6  https://www.formula1.com/content/dam/fom-websi...          NED   

         meeting_name                 

  df = pd.read_sql(query, conn)


In [None]:
# create a view in RDS
conn.execute("""
    TRUNCATE TABLE f1_processed;
             
    CREATE OR REPLACE TABLE f1_processed AS
        SELECT 
             meeting_key, 
             session_key, 
             full_name,
             COUNT(headshot_url),
        GROUP BY meeting_key, session_key, full_name
             
""")

In [None]:
# insert values into tables
# drivers table
for index, row in f1_df.iterrows():
    cur.execute("""
        INSERT INTO f1_all (
            meeting_key, session_key, driver_number, broadcast_name, full_name, 
            name_acronym, team_name, team_colour, first_name, last_name, 
            headshot_url, country_code, "registered"
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW());
    """, (
        row['meeting_key'], row['session_key'], row['driver_number'], row['broadcast_name'], row['full_name'], 
        row['name_acronym'], row['team_name'], row['team_colour'], row['first_name'], 
        row['last_name'], row['headshot_url'], row['country_code']
    ))
    conn.commit()

In [None]:
# convert JSON to DataFrame and then to CSV
df_drivers = pd.DataFrame(f1_drivers_api)
df_drivers.to_csv('Files/Sample data/f1_drivers.csv')
df_meetings = pd.DataFrame(f1_meetings_api)
df_meetings.to_csv('Files/Sample data/f1_meetings.csv')
df_weather = pd.DataFrame(f1_weather_api)
df_weather.to_csv('Files/Sample data/f1_weather.csv')
# df_position = pd.DataFrame(f1_position_api)
# df_position.to_csv('f1_position.csv')


In [None]:
# connect to local PostgreSQL
import psycopg2
conn = psycopg2.connect(
    dbname = 'f1_db',
    user = 'postgres',
    password = '123456Superman',
    host = 'localhost'
)
cur = conn.cursor()


In [None]:
# Create tables in PostgreSQL database
# Create "drivers" table
cur.execute("""
DROP TABLE IF EXISTS drivers;
            
CREATE TABLE drivers (
    index SERIAL PRIMARY KEY,
    meeting_key INT,
    session_key INT,
    driver_number INT,
    broadcast_name VARCHAR(50),
    full_name VARCHAR(50),
    name_acronym VARCHAR(20),
    team_name VARCHAR(20),
    team_colour VARCHAR(20),
    first_name VARCHAR(30),
    last_name VARCHAR(30),
    headshot_url VARCHAR(200),
    country_code VARCHAR(20),
    "registered" TIMESTAMP
);
""")

# Create "meetings" table
cur.execute("""
DROP TABLE IF EXISTS meetings;
            
CREATE TABLE meetings (
    index SERIAL PRIMARY KEY,
    meeting_key INT,
    meeting_name VARCHAR(50),
    meeting_official_name VARCHAR(200),
    location VARCHAR(50),
    country_key INT,
    country_code VARCHAR(50),
    country_name VARCHAR(50), 
    circuit_key INT,
    circuit_short_name VARCHAR(50),
    date_start TIMESTAMPTZ,
    gmt_offset VARCHAR(50), 
    year INT,
    meeting_code VARCHAR(20),
    "registered" TIMESTAMP
);
""")

# Create "weather" table
cur.execute("""
DROP TABLE IF EXISTS weather;

CREATE TABLE weather (
    index SERIAL PRIMARY KEY,
    meeting_key INT,
    session_key INT,
    air_temperature DECIMAL(5,2),
    humidity DECIMAL(5,2),
    pressure DECIMAL(6,2),
    rainfall DECIMAL(5,2),
    track_temperature DECIMAL(5,2),
    wind_direction INT,
    wind_speed DECIMAL(5,2),
    date TIMESTAMPTZ,
    "registered" TIMESTAMP
);
""")

In [None]:
# insert values into tables
# drivers table
for index, row in df_drivers.iterrows():
    cur.execute("""
        INSERT INTO drivers (
            meeting_key, session_key, driver_number, broadcast_name, full_name, 
            name_acronym, team_name, team_colour, first_name, last_name, 
            headshot_url, country_code, "registered"
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW());
    """, (
        row['meeting_key'], row['session_key'], row['driver_number'], row['broadcast_name'], row['full_name'], 
        row['name_acronym'], row['team_name'], row['team_colour'], row['first_name'], 
        row['last_name'], row['headshot_url'], row['country_code']
    ))
    conn.commit()

# meetings table
for index, row in df_meetings.iterrows():
        cur.execute("""
            INSERT INTO meetings (
                meeting_key, meeting_name, meeting_official_name, location, 
                country_key, country_code, country_name, circuit_key, 
                circuit_short_name, date_start, gmt_offset, year, 
                meeting_code, "registered"
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW());
        """, (
            row['meeting_key'], row['meeting_name'], row['meeting_official_name'], row['location'], 
            row['country_key'], row['country_code'], row['country_name'], row['circuit_key'], 
            row['circuit_short_name'], row['date_start'], row['gmt_offset'], row['year'], 
            row['meeting_code']
        ))
        conn.commit()


# weather table
for index, row in df_weather.iterrows():
        cur.execute("""
            INSERT INTO weather (
                meeting_key, session_key, air_temperature, humidity, pressure, 
                rainfall, track_temperature, wind_direction, wind_speed, date, "registered"
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW());
        """, (
            row['meeting_key'], row['session_key'], row['air_temperature'], row['humidity'], 
            row['pressure'], row['rainfall'], row['track_temperature'], row['wind_direction'], 
            row['wind_speed'], row['date']
        ))
        conn.commit()

In [None]:
# check values in table drivers
cur.execute("SELECT * FROM drivers;")
drivers_preview = pd.DataFrame(cur.fetchall())
print(drivers_preview)


In [None]:
# check values in table meetings
cur.execute("SELECT * FROM meetings;")
drivers_preview = pd.DataFrame(cur.fetchall(), columns=['index', 'meeting_key', 'meeting_name', 'meeting_official_name',
'location', 'country_key', 'country_code', 'country_name', 'circuit_key',
'circuit_short_name', 'date_start', 'gmt_offset', 'year', 'meeting_code', 'registered'])
print(drivers_preview)

In [None]:
# join drivers, meetings and weather tables 
cur.execute("""
    DROP TABLE IF EXISTS f1;     
    
    CREATE TABLE IF NOT EXISTS f1 AS
    SELECT 
        drivers.meeting_key AS meeting_key, 
        drivers.session_key AS session_key, 
        drivers.driver_number,
        drivers.broadcast_name,
        drivers.full_name,
        drivers.name_acronym,
        drivers.team_name,
        drivers.team_colour,
        drivers.first_name,
        drivers.last_name,
        drivers.headshot_url,
        drivers.country_code AS driver_country_code,
        drivers."registered",
            
        meetings.meeting_name,
        meetings.meeting_official_name,
        meetings.location,
        meetings.country_key,
        meetings.country_code AS meeting_country_code,
        meetings.country_name,
        meetings.circuit_key,
        meetings.circuit_short_name,
        meetings.date_start,
        meetings.gmt_offset,
        meetings.year,
        meetings.meeting_code,

        weather.air_temperature,
        weather.humidity,
        weather.pressure,
        weather.rainfall,
        weather.track_temperature,
        weather.wind_direction,
        weather.wind_speed,
        weather.date AS weather_date
        
    FROM 
        drivers
    JOIN 
        meetings ON drivers.meeting_key = meetings.meeting_key
    JOIN 
        weather ON drivers.meeting_key = weather.meeting_key;
""")

conn.commit()

# check if table was created successfully
cur.execute("SELECT * FROM f1 LIMIT 5;")
print(cur.fetchall())

# Close the cursor and connection
cur.close()
conn.close()