In [1]:
# import libraries
import requests
import pandas as pd
import psycopg2
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import os
from dotenv import load_dotenv

In [None]:
# extract F1 data from API
drivers_api = requests.get('https://api.openf1.org/v1/drivers').json()
meetings_api = requests.get('https://api.openf1.org/v1/meetings').json()
weather_api = requests.get('https://api.openf1.org/v1/weather').json()

# convert to DataFrames
drivers_df = pd.DataFrame(drivers_api)
meetings_df = pd.DataFrame(meetings_api)
weather_df = pd.DataFrame(weather_api)

# join DataFrames
f1_df = drivers_df.merge(meetings_df, on='meeting_key', how='inner').merge(weather_df, on=['meeting_key', 'session_key'], how='inner')


In [None]:
# preview columns
f1_df.columns

In [None]:
#  connect to Amazon RDS and create a table
load_dotenv()
conn = psycopg2.connect(
            host=os.getenv('DB_HOST'),
            dbname=os.getenv('DB_NAME'),
            user=os.getenv('DB_USER'),
            password=os.getenv('DB_PASSWORD'),
            port=os.getenv('DB_PORT')
            )
cur = conn.cursor()
cur.execute("""
CREATE TABLE IF NOT EXISTS f1_data (
    meeting_key INT,
    session_key INT,
    driver_number INT,
    full_name TEXT,
    team_name TEXT,
    team_colour TEXT,
    headshot_url TEXT,
    country_code TEXT,
    meeting_name TEXT,
    meeting_official_name TEXT,
    location TEXT,
    country_name TEXT,
    circuit_short_name TEXT,
    date_start DATE,
    gmt_offset TEXT,
    year INT,
    meeting_code TEXT,
    air_temperature FLOAT,
    humidity FLOAT,
    pressure FLOAT,
    rainfall FLOAT,
    track_temperature FLOAT,
    wind_direction TEXT,
    wind_speed FLOAT,
    date_weather TIMESTAMP
);
""")

# Preview table
query = "SELECT * FROM f1_data LIMIT 10;"
df = pd.read_sql(query, conn)
print(df.head())

In [None]:
# load API data to RDS
cur.execute("""
        TRUNCATE TABLE f1_data;
""")
for index, row in f1_df.sample(10000).iterrows():
    cur.execute("""
        INSERT INTO f1_data (
            meeting_key, session_key, driver_number, full_name, team_name, team_colour,
            headshot_url, country_code, meeting_name, meeting_official_name, location,
            country_name, circuit_short_name, date_start, gmt_offset, year, meeting_code,
            air_temperature, humidity, pressure, rainfall, track_temperature, wind_direction,
            wind_speed, date_weather
        )
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """, (
        row['meeting_key'],
        row['session_key'],
        row['driver_number'],
        row['full_name'],
        row['team_name'],
        row['team_colour'],
        row['headshot_url'],
        row['country_code_x'],
        row['meeting_name'],
        row['meeting_official_name'],
        row['location'],
        row['country_name'],
        row['circuit_short_name'],
        row['date_start'],
        row['gmt_offset'],
        row['year'],
        row['meeting_code'],
        row['air_temperature'],
        row['humidity'],
        row['pressure'],
        row['rainfall'],
        row['track_temperature'],
        row['wind_direction'],
        row['wind_speed'],
        row['date']
    ))


# Preview table
query = "SELECT * FROM f1_data LIMIT 10;"
df = pd.read_sql(query, conn)
print(df.head())

In [None]:
# create mart
cur.execute("""             
    DROP TABLE IF EXISTS f1_mart;""")
cur.execute("""
    CREATE TABLE f1_mart AS
        WITH meeting_session_agg AS (
        SELECT
            meeting_key,
            AVG(air_temperature) AS avg_air_temperature,
            AVG(humidity) AS avg_humidity,
            AVG(pressure) AS avg_pressure,
            SUM(rainfall) AS total_rainfall,
            AVG(track_temperature) AS avg_track_temperature,
            AVG(wind_speed) AS avg_wind_speed,
            MIN(date_weather) AS first_recorded_weather
        FROM
            f1_data
        GROUP BY
            meeting_key
)             
        SELECT 
            f.meeting_key,
            f.session_key,
            f.full_name,
            MAX(f.driver_number) AS driver_number,
            COUNT(f.headshot_url) AS headshot_count,
            MAX(f.team_name) AS team_name,
            MAX(f.team_colour) AS team_colour,
            MAX(f.country_code) AS driver_country,
            MAX(f.meeting_name) AS meeting_name,
            MAX(f.meeting_official_name) AS meeting_official_name,
            MAX(f.location) AS location,
            MAX(f.country_name) AS country_name,
            MAX(f.circuit_short_name) AS circuit_short_name,
            MIN(f.date_start) AS date_start,
            MAX(f.gmt_offset) AS gmt_offset,
            MAX(f.year) AS year,
            MAX(f.meeting_code) AS meeting_code,
            m.avg_air_temperature,
            m.avg_humidity,
            m.avg_pressure,
            m.total_rainfall,
            m.avg_track_temperature,
            m.avg_wind_speed,
            m.first_recorded_weather

        FROM 
            f1_data AS f
        LEFT JOIN 
            meeting_session_agg AS m
        ON 
            f.meeting_key = m.meeting_key

        GROUP BY 
            f.meeting_key, f.session_key, f.full_name, 
            m.avg_air_temperature, m.avg_humidity, m.avg_pressure, 
            m.total_rainfall, m.avg_track_temperature, m.avg_wind_speed, m.first_recorded_weather

        ORDER BY 
            f.meeting_key, f.session_key;
        ;""")
conn.commit()

# Preview table
query = "SELECT * FROM f1_mart;"
f1_mart_df = pd.read_sql(query, conn)
print(f1_mart_df.head())

In [3]:
# create a Google Sheets spreadsheet with f1 data
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
load_dotenv()
creds = ServiceAccountCredentials.from_json_keyfile_name(os.getenv('GSHEET_CREDENTIALS'), scope)
client = gspread.authorize(creds)

# Open target Google Sheet
spreadsheet = client.open_by_url("https://docs.google.com/spreadsheets/d/1_ITQ17SUBtdiacBrHBdl8H84pn9We1OSr2_9xZHXdeA")
sheet = spreadsheet.sheet1

# Clear existing data
sheet.clear()

# convert dates to strings
f1_mart_df['date_start'] = f1_mart_df['date_start'].astype(str)
f1_mart_df['first_recorded_weather'] = f1_mart_df['first_recorded_weather'].astype(str)

# Write headers
sheet.insert_row(f1_mart_df.columns.tolist(), index=1)

# Write data rows
sheet.insert_rows(f1_mart_df.values.tolist(), row=2)

NameError: name 'f1_mart_df' is not defined