In [13]:
year_data = 2025

In [14]:
import requests
import boto3
import pandas as pd
from io import StringIO
from dotenv import load_dotenv
import os
#import tabulate

In [None]:
load_dotenv()
s3_bucket_f1 = os.getenv('S3_BUCKET_F1')

In [None]:
ergast_base_url = "https://api.jolpi.ca/ergast/f1/"
# https://github.com/jolpica/jolpica-f1/blob/main/docs/README.md

def get_drivers_df(year):
    url = f"{ergast_base_url}/{year}/drivers/"
    response = requests.get(url)
    drivers_list = response.json()['MRData']['DriverTable']['Drivers']
    return pd.json_normalize(drivers_list)

def get_constructors_df(year):
    url = f"{ergast_base_url}/{year}/constructors/"
    response = requests.get(url)
    constructors_list = response.json()['MRData']['ConstructorTable']['Constructors']
    return pd.json_normalize(constructors_list)

def get_calendar_df(year):
    url = f"{ergast_base_url}/{year}"
    response = requests.get(url)
    races_list = response.json()['MRData']['RaceTable']['Races']
    return pd.json_normalize(races_list)

def get_races_results_df(year, year_races_calendar_df):
    races_df = pd.DataFrame()
    season_length = len(year_races_calendar_df.index)

    for round_index in range(1,season_length+1):
        url = f"{ergast_base_url}/{year}/{round_index}/results"
        response = requests.get(url)
        try:
            race_result_df = pd.json_normalize(response.json()['MRData']['RaceTable']['Races'][0]['Results'])
            races_df = pd.concat([races_df, race_result_df], ignore_index=True)
        except:
            break

    races_df = races_df[['number','position','positionText','points','grid','laps','status','position','Driver.permanentNumber','Driver.code'
        ,'Constructor.constructorId','Time.millis','Time.time','FastestLap.rank','FastestLap.lap','FastestLap.Time.time']]

    return races_df

def upload_to_s3(bucket, key, dataframe):
    s3 = boto3.client('s3')
    csv_buffer = StringIO()
    dataframe.to_csv(csv_buffer, index=False)
    s3.put_object(Bucket=bucket, Key=key, Body=csv_buffer.getvalue())


In [None]:
if __name__ == "__main__":
    year_drivers_df = get_drivers_df(year_data)
    year_constructors_df = get_constructors_df(year_data)
    year_races_calendar_df = get_calendar_df(year_data)
    year_data_df = get_races_results_df(year_data, year_races_calendar_df)
    #upload_to_s3(s3_bucket_f1, f"raw/{year_data}/year_drivers_df.csv", year_drivers_df)

In [None]:
#print(year_drivers_df.to_markdown())
#print(year_constructors_df.to_markdown())
#print(year_races_calendar_df.to_markdown())

In [None]:
#print(year_data_df.to_markdown())