In [None]:
import pandas as pd
import fastf1

In [None]:
fastf1.Cache.enable_cache('cache')  # Specify a directory for caching

def get_track_lengths(season):
    """
    Retrieve the track lengths for all races in a given Formula 1 season.
    Args:
        season (int): The year of the season to retrieve track lengths for.
    Returns:
        pandas.DataFrame: A DataFrame containing race names and their corresponding track lengths.
    """
    try:
        schedule = fastf1.get_event_schedule(season)  # Get the schedule for the season
    except Exception as e:
        print(f"Error retrieving schedule for season {season}: {e}")
        return pd.DataFrame()

    track_data = []

    for _, event in schedule.iterrows():
        if event['RoundNumber'] == 0:
            continue
        try:
            event_data = fastf1.get_event(season, event['RoundNumber'])
            session_data = event_data.get_session('R')
            session_data.load()
            track_length = session_data.total_laps
            track_data.append({
                "Race": event['EventName'],
                "Track Length (m)": track_length
            })
        except Exception as e:
            print(f"Error retrieving track length for {event['EventName']}: {e}")

    return pd.DataFrame(track_data)

In [None]:
df = get_track_lengths(2024)
df = df.rename({'Track Length (m)': 'TotalLaps'}, axis=1)
df = df.rename({'Race': 'Grand Prix'}, axis=1)
df['Grand Prix'] = df['Grand Prix'].str.strip()

In [None]:
url = 'https://en.wikipedia.org/wiki/2024_Formula_One_World_Championship'

html_dfs = pd.read_html(url)

html_df = html_dfs[2].drop(24).set_index('Round')
html_df['Location'] = html_df['Circuit'].apply(lambda x: [y.strip() for y in x.split(',')][1])
html_df['Circuit'] = html_df['Circuit'].apply(lambda x: [y.strip() for y in x.split(',')][0])

html_df

In [None]:
circuit_url = 'https://en.wikipedia.org/wiki/List_of_Formula_One_circuits'
circuits_dfs = pd.read_html(circuit_url)

circuit_df = circuits_dfs[2]
circuit_df
circuit_df['Circuit'] = circuit_df['Circuit'].str.replace(r'[^0-9a-zA-z\ ]+', '', regex=True)
circuit_df['Length_km'] = circuit_df['Last length used'].apply(lambda x: float(x.split('\xa0')[0].strip()))
circuit_df['Grand Prix'] = circuit_df['Grands Prix'].apply(lambda x: [y.strip() for y in x.split(',')])
circuit_df = circuit_df.explode('Grand Prix')
circuit_df['2024'] = circuit_df['Season(s)'].apply(lambda x: True if '2024' in x or '2023' in x else False)
circuit_df = circuit_df[circuit_df['2024']]
circuit_df = circuit_df[['Type','Direction', 'Location', 'Country', 'Length_km', 'Turns', 'Grand Prix']]

In [None]:
import bs4 as bs
import requests

url = 'https://en.wikipedia.org/wiki/2024_Formula_One_World_Championship'
r = requests.get(url)
soup = bs.BeautifulSoup(r.content, 'lxml')
parsed_table = soup.find_all('table')[2] 

In [None]:
soup_df = pd.DataFrame([{'Url': 'https://en.wikipedia.org' + td.a['href'], 'Grand Prix': td.a['title'] } for td in parsed_table.find_all('td') if td.find('a') and 'Grand_Prix' in td.a['href'] ])
soup_df['Round'] = soup_df.index + 1
soup_df['Round'] = soup_df['Round'].astype(str)
soup_df

In [None]:
merged_df = html_df.reset_index().merge(circuit_df, how='left', on=[ 'Grand Prix']) 
merged_df = merged_df[~((merged_df['Location_x'] == 'Monza') & (merged_df['Location_y'] == 'Imola'))]
merged_df

In [None]:
merged_df = merged_df.merge(soup_df, how='left', on='Round')
merged_df = merged_df.drop(['Grand Prix_y', 'Location_y'], axis=1)
merged_df = merged_df.rename({'Grand Prix_x': 'Grand Prix', 'Location_x': 'Location'}, axis=1)

In [None]:
merged_df = merged_df.merge(df, how='left', on='Grand Prix')

In [None]:
import numpy as np
merged_df['laps_calc'] = np.ceil(305 / merged_df['Length_km'])
merged_df['Total Laps'] = merged_df.apply(lambda x: x['TotalLaps'] if not np.isnan(x['TotalLaps']) else x['laps_calc'], axis=1)
merged_df = merged_df.drop(['TotalLaps', 'laps_calc'], axis=1)
merged_df

In [None]:
merged_df.to_csv('track_data.csv', index=False)

In [None]:
from openai import OpenAI
import requests

# Set your OpenAI API Key
key = ''

def get_url_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = bs.BeautifulSoup(response.content, 'lxml')
        parsed_table = soup.find_all(class_='infobox') 
        return f"""{str(parsed_table[0])}"""
    except Exception as e:
        print(f"Error fetching URL content: {e}")
        return None

def get_fun_facts(text, num_facts=5):
    prompt = (
        f"Extract {num_facts} fun and interesting facts such as number of times held, driver with most wins, fastest lap, track built date, etc. from the following text:\n\n{text}\n\n"
        f"""Please return the facts in JSON format where the keys are "Fact1", Fact 2", and so on"""
    )
    client = OpenAI(api_key = key)
    try:
        response = client.chat.completions.create(
            # model="gpt-3.5-turbo",
            model="gpt-4o",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=4000,
            temperature=0.50,
        )
        content = response.choices[0].message.content
        return content
    except Exception as e:
        print(f"Error generating fun facts: {e}")
        return {}

def fetch_and_generate_facts(url):
    content = get_url_content(url)
    if not content:
        return {}

    trimmed_content = content[:4000]
    return get_fun_facts(trimmed_content)

new_df = []

import time

for row in merged_df.to_dict(orient='records'):
    print(row['Round'])
    fun_fact_dict_str = fetch_and_generate_facts(row['Url'])
    print(fun_fact_dict_str, type(fun_fact_dict_str))
    row['Fun_fact_dict'] = fun_fact_dict_str
    new_df.append(row)
    time.sleep(5)


In [None]:
import json

new_df = [ row | json.loads(row['Fun_fact_dict'].replace('```json', '').replace('```', '').replace('\n', '')) for row in new_df]
new_df = pd.DataFrame(new_df)
new_df = new_df.drop(['Fun_fact_dict'], axis=1)
for column in [f'Fact{i}' for i in range(1,6)]:
    new_df[column] = new_df[column].str.replace('constructor', 'team')

new_df.to_csv('track_data.csv', index=False)