In [1]:
# Import dependencies
import pandas as pd
import numpy as np
from pprint import pprint
import requests

from selenium import webdriver
import bs4
from bs4 import BeautifulSoup
import time

In [1]:
import requests
import pandas as pd

def fetch_race_data():
    races_data = {
        'season': [],
        'round': [],
        'circuit_id': [],
        'lat': [],
        'long': [],
        'country': [],
        'date': [],
        'url': []
    }
    for year in range(1950, 2024):
        url = f'https://ergast.com/api/f1/{year}.json'
        response = requests.get(url)
        data = response.json()
        
        races_data = data.get('MRData', {}).get('RaceTable', {}).get('Races', [])
        
        for item in races_data:
            race_info = {
                'season': int(item.get('season')),
                'round': int(item.get('round')),
                'circuit_id': item['Circuit'].get('circuitId'),
                'lat': float(item['Circuit']['Location'].get('lat')),
                'long': float(item['Circuit']['Location'].get('long')),
                'country': item['Circuit']['Location'].get('country'),
                'date': item.get('date'),
                'url': item.get('url')
            }
            races_data['season'].append(race_info['season'])
            races_data['round'].append(race_info['round'])
            races_data['circuit_id'].append(race_info['circuit_id'])
            races_data['lat'].append(race_info['lat'])
            races_data['long'].append(race_info['long'])
            races_data['country'].append(race_info['country'])
            races_data['date'].append(race_info['date'])
            races_data['url'].append(race_info['url'])

    races_df = pd.DataFrame(races_data)
    return races_df
races_data = fetch_race_data()
print(races_data.shape)


(1101, 8)


In [2]:
races_data.to_csv('data2/races_data.csv', index = False)

In [2]:
read_race_data = pd.read_csv('data2/races_data.csv')

In [3]:
seasons = []
for year in np.array(read_race_data.season.unique()):
    seasons.append([year, list(read_race_data[read_race_data.season == year]['round'])])

In [4]:
import requests
import pandas as pd

def fetch_race_results(rounds):
    results = []

    for season, round_numbers in rounds:
        for rnd in round_numbers:
            url = f'http://ergast.com/api/f1/{season}/{rnd}/results.json'
            response = requests.get(url)
            data = response.json()
            
            race_results = data.get('MRData', {}).get('RaceTable', {}).get('Races', [{}])[0].get('Results', [])
            
            for item in race_results:
                driver_info = item.get('Driver', {})
                constructor_info = item.get('Constructor', {})
                time_info = item.get('Time', {})
                
                result_info = {
                    'season': int(season),
                    'round': int(rnd),
                    'circuit_id': data['MRData']['RaceTable']['Races'][0]['Circuit'].get('circuitId'),
                    'driver': driver_info.get('driverId'),
                    'date_of_birth': driver_info.get('dateOfBirth'),
                    'nationality': driver_info.get('nationality'),
                    'constructor': constructor_info.get('constructorId'),
                    'grid': int(item.get('grid')) if item.get('grid') is not None else None,
                    'time': int(time_info.get('millis')) if time_info.get('millis') is not None else None,
                    'status': item.get('status'),
                    'points': float(item.get('points')) if item.get('points') is not None else None,  # Convert to float
                    'podium': int(item.get('position')) if item.get('position') is not None else None,
                    'url': data['MRData']['RaceTable']['Races'][0].get('url')
                }
                results.append(result_info)

    results_df = pd.DataFrame(results)
    return results_df

# Call the function with the rounds data and print the shape of the resulting DataFrame
results_data = fetch_race_results(seasons)
print(results_data.shape)


(25827, 13)


In [6]:
results_data.to_csv('data2/results.csv', index = False)

In [8]:
import requests
import pandas as pd

def fetch_driver_standings(rounds):
    driver_standings = []

    for season, round_numbers in rounds:
        for rnd in round_numbers:
            url = f'https://ergast.com/api/f1/{season}/{rnd}/driverStandings.json'
            response = requests.get(url)
            data = response.json()
            
            standings_data = data.get('MRData', {}).get('StandingsTable', {}).get('StandingsLists', [{}])[0].get('DriverStandings', [])
            
            for item in standings_data:
                driver_info = item.get('Driver', {})
                
                standings_info = {
                    'season': int(data['MRData']['StandingsTable']['StandingsLists'][0].get('season')),
                    'round': int(data['MRData']['StandingsTable']['StandingsLists'][0].get('round')),
                    'driver': driver_info.get('driverId'),
                    'driver_points': float(item.get('points')) if item.get('points') is not None else None,  # Convert to float
                    'driver_wins': float(item.get('wins')) if item.get('wins') is not None else None,  # Convert to float
                    'driver_standings_pos': int(item.get('position')) if item.get('position') is not None else None
                }
                driver_standings.append(standings_info)

    driver_standings_df = pd.DataFrame(driver_standings)
    return driver_standings_df

# Call the function with the rounds data and print the shape of the resulting DataFrame
driver_standings_data = fetch_driver_standings(seasons)
print(driver_standings_data.shape)


(28042, 6)


In [9]:
def point_shift (df, driver_or_team, points):
    df['lookup1'] = df.season.astype(str) + df[driver_or_team] + df['round'].astype(str) # current round
    df['lookup2'] = df.season.astype(str) + df[driver_or_team] + (df['round']-1).astype(str) # previous round
    new_df = df.merge(df[['lookup1', points]], how = 'left', left_on='lookup2',right_on='lookup1') # shifted one round
    new_df.drop(['lookup1_x', 'lookup2', 'lookup1_y'], axis = 1, inplace = True) # drop unnecessary columns
    new_df.rename(columns = {points+'_x': points+'_after_race', points+'_y': points}, inplace = True) # rename before/after race
    new_df[points].fillna(0, inplace = True)
    return new_df

In [11]:
driver_standings_data = point_shift(driver_standings_data, 'driver', 'driver_points')
driver_standings_data = point_shift(driver_standings_data, 'driver', 'driver_wins')
driver_standings_data = point_shift(driver_standings_data, 'driver', 'driver_standings_pos')
driver_standings_data.tail(3)

Unnamed: 0,season,round,driver,driver_points_after_race,driver_wins_after_race,driver_standings_pos_after_race,driver_points,driver_wins,driver_standings_pos
28039,2023,22,lawson,2.0,0.0,20,2.0,0.0,20.0
28040,2023,22,sargeant,1.0,0.0,21,1.0,0.0,21.0
28041,2023,22,de_vries,0.0,0.0,22,0.0,0.0,22.0


In [12]:
driver_standings_data.to_csv('data2/driver_position_data.csv', index = False)

In [14]:
import requests
import pandas as pd

def fetch_constructor_standings(rounds):
    constructor_standings = []

    for season, round_numbers in rounds:
        for rnd in round_numbers:
            url = f'https://ergast.com/api/f1/{season}/{rnd}/constructorStandings.json'
            response = requests.get(url)
            data = response.json()

            standings_data = data.get('MRData', {}).get('StandingsTable', {}).get('StandingsLists', [{}])[0].get('ConstructorStandings', [])

            for item in standings_data:
                constructor_info = item.get('Constructor', {})

                standings_info = {
                    'season': int(data['MRData']['StandingsTable']['StandingsLists'][0].get('season')),
                    'round': int(data['MRData']['StandingsTable']['StandingsLists'][0].get('round')),
                    'constructor': constructor_info.get('constructorId'),
                    'constructor_points': float(item.get('points')) if item.get('points') is not None else None,
                    'constructor_wins': float(item.get('wins')) if item.get('wins') is not None else None,
                    'constructor_standings_pos': int(item.get('position')) if item.get('position') is not None else None
                }
                constructor_standings.append(standings_info)

    constructor_standings_df = pd.DataFrame(constructor_standings)
    return constructor_standings_df

# Define constructor rounds starting from year 1958
constructor_rounds = seasons[8:]

# Fetch constructor standings data
constructor_standings_data = fetch_constructor_standings(constructor_rounds)

# Print the shape of the resulting DataFrame
print(constructor_standings_data.shape)


(13151, 6)


In [15]:
# Again, using the point_shift function for constructor points

constructor_standings_data = point_shift(constructor_standings_data, 'constructor', 'constructor_points')
constructor_standings_data = point_shift(constructor_standings_data, 'constructor', 'constructor_wins')
constructor_standings_data = point_shift(constructor_standings_data, 'constructor', 'constructor_standings_pos')
constructor_standings_data.tail(3)

Unnamed: 0,season,round,constructor,constructor_points_after_race,constructor_wins_after_race,constructor_standings_pos_after_race,constructor_points,constructor_wins,constructor_standings_pos
13148,2023,22,alphatauri,25.0,0.0,8,21.0,0.0,8.0
13149,2023,22,alfa,16.0,0.0,9,16.0,0.0,9.0
13150,2023,22,haas,12.0,0.0,10,12.0,0.0,10.0


In [16]:
constructor_standings_data.to_csv('data2/constructor_position_data.csv', index = False)