In [None]:
#pobranie sezonow (era hybrydowa od 2014 roku na tej bazujemy)
import requests
import pandas as pd

url = "http://ergast.com/api/f1/seasons.json?limit=1000"
response = requests.get(url)
data = response.json()

seasons = data['MRData']['SeasonTable']['Seasons']

season_list = [int(season['season']) for season in seasons]

df_seasons = pd.DataFrame(season_list, columns=['season'])
year_of_hybrid_era = 2014
hybrid_era_seasons = [season for season in season_list if season >= year_of_hybrid_era]
hybrid_era_seasons

In [2]:
#pobranie danych o konstruktorach dla każdego z wczesniej pobranych sezonow 
from tqdm import tqdm

constructors_data = []

# Iteruj po sezonach
for season in tqdm(hybrid_era_seasons):
    url = f"http://ergast.com/api/f1/{season}/constructors.json?limit=100"
    response = requests.get(url)
    data = response.json()
    
    constructors = data['MRData']['ConstructorTable']['Constructors']
    
    for constructor in constructors:
        constructors_data.append({
            'season': season,
            'constructorId': constructor['constructorId'],
            'name': constructor['name'],
            'nationality': constructor['nationality']
        })

# Do DataFrame
df_constructors = pd.DataFrame(constructors_data)

# Przykładowe dane
df_constructors


100%|██████████| 11/11 [00:06<00:00,  1.68it/s]


Unnamed: 0,season,constructorId,name,nationality
0,2014,caterham,Caterham,Malaysian
1,2014,ferrari,Ferrari,Italian
2,2014,force_india,Force India,Indian
3,2014,lotus_f1,Lotus F1,British
4,2014,marussia,Marussia,Russian
...,...,...,...,...
107,2024,mercedes,Mercedes,German
108,2024,rb,RB F1 Team,Italian
109,2024,red_bull,Red Bull,Austrian
110,2024,sauber,Sauber,Swiss


In [3]:
import requests
import pandas as pd
from tqdm import tqdm
import time

# Punkty zespołu do danego GP
def get_constructor_points_upto_gp(season, round_, constructor_id):
    url = f"http://ergast.com/api/f1/{season}/{round_}/constructors/{constructor_id}/constructorStandings.json"
    response = requests.get(url)
    data = response.json()
    
    standings_list = data['MRData']['StandingsTable']['StandingsLists']
    if standings_list:
        for constructor in standings_list[0]['ConstructorStandings']:
            if constructor['Constructor']['constructorId'] == constructor_id:
                return float(constructor['points'])
    return 0.0


# Zliczanie zwycięstw konstruktorów

wins_data = []

for season in tqdm(hybrid_era_seasons, desc="Zliczanie zwycięstw"):
    for constructor_id in df_constructors[df_constructors['season'] == season]['constructorId'].unique():
        url = f"http://ergast.com/api/f1/{season}/constructors/{constructor_id}/results/1.json?limit=1000"
        response = requests.get(url)
        data = response.json()
        wins = len(data['MRData']['RaceTable']['Races'])
        wins_data.append({
            'season': season,
            'constructorId': constructor_id,
            'number_of_constructor_wins': wins
        })
        time.sleep(0.5)

df_wins = pd.DataFrame(wins_data)

df_final = df_constructors.merge(df_wins, on=['season', 'constructorId'], how='left')

df_final

Zliczanie zwycięstw: 100%|██████████| 11/11 [01:32<00:00,  8.39s/it]


Unnamed: 0,season,constructorId,name,nationality,number_of_constructor_wins
0,2014,caterham,Caterham,Malaysian,0
1,2014,ferrari,Ferrari,Italian,0
2,2014,force_india,Force India,Indian,0
3,2014,lotus_f1,Lotus F1,British,0
4,2014,marussia,Marussia,Russian,0
...,...,...,...,...,...
107,2024,mercedes,Mercedes,German,4
108,2024,rb,RB F1 Team,Italian,0
109,2024,red_bull,Red Bull,Austrian,9
110,2024,sauber,Sauber,Swiss,0


In [4]:
#pobranie kierowcow

def get_drivers_from_season(season):
    url = f"http://ergast.com/api/f1/{season}/1/results.json?limit=100"
    response = requests.get(url)
    data = response.json()

    races = data['MRData']['RaceTable']['Races']
    if not races:
        return pd.DataFrame()

    results = races[0]['Results']
    
    drivers = []
    for result in results:
        driver = result['Driver']
        constructor = result['Constructor']
        
        drivers.append({
            'season': season,
            'driverId': driver['driverId'],
            'givenName': driver['givenName'],
            'familyName': driver['familyName'],
            'fullName': f"{driver['givenName']} {driver['familyName']}",
            'dateOfBirth': driver['dateOfBirth'],
            'nationality': driver['nationality'],
            'constructorId': constructor['constructorId'],
            'constructorName': constructor['name']
        })

    return pd.DataFrame(drivers)

drivers_2023 = get_drivers_from_season(2023)
drivers_2023

Unnamed: 0,season,driverId,givenName,familyName,fullName,dateOfBirth,nationality,constructorId,constructorName
0,2023,max_verstappen,Max,Verstappen,Max Verstappen,1997-09-30,Dutch,red_bull,Red Bull
1,2023,perez,Sergio,Pérez,Sergio Pérez,1990-01-26,Mexican,red_bull,Red Bull
2,2023,alonso,Fernando,Alonso,Fernando Alonso,1981-07-29,Spanish,aston_martin,Aston Martin
3,2023,sainz,Carlos,Sainz,Carlos Sainz,1994-09-01,Spanish,ferrari,Ferrari
4,2023,hamilton,Lewis,Hamilton,Lewis Hamilton,1985-01-07,British,mercedes,Mercedes
5,2023,stroll,Lance,Stroll,Lance Stroll,1998-10-29,Canadian,aston_martin,Aston Martin
6,2023,russell,George,Russell,George Russell,1998-02-15,British,mercedes,Mercedes
7,2023,bottas,Valtteri,Bottas,Valtteri Bottas,1989-08-28,Finnish,alfa,Alfa Romeo
8,2023,gasly,Pierre,Gasly,Pierre Gasly,1996-02-07,French,alpine,Alpine F1 Team
9,2023,albon,Alexander,Albon,Alexander Albon,1996-03-23,Thai,williams,Williams


In [5]:
from datetime import datetime

def get_driver_career_stats(driver_id):
    url = f"http://ergast.com/api/f1/drivers/{driver_id}/results.json?limit=1000"
    response = requests.get(url)
    data = response.json()

    results = data['MRData']['RaceTable']['Races']
    if not results:
        return {
            'number_of_races': 0,
            'career_points': 0.0,
            'number_of_wins': 0,
            'number_of_podiums': 0,
            'debut_year': None
        }

    num_races = len(results)
    points = 0.0
    wins = 0
    podiums = 0
    debut_year = int(results[0]['season'])  # pierwszy sezon

    for race in results:
        result = race['Results'][0]
        position = result.get('position')
        points += float(result.get('points', 0))
        if position == '1':
            wins += 1
        if position and int(position) <= 3:
            podiums += 1

    return {
        'number_of_races': num_races,
        'career_points': points,
        'number_of_wins': wins,
        'number_of_podiums': podiums,
        'debut_year': debut_year
    }

career_stats = []

for _, row in drivers_2023.iterrows():
    stats = get_driver_career_stats(row['driverId'])
    career_stats.append(stats)

career_df = pd.DataFrame(career_stats)
drivers_with_career = pd.concat([drivers_2023, career_df], axis=1)

drivers_with_career

Unnamed: 0,season,driverId,givenName,familyName,fullName,dateOfBirth,nationality,constructorId,constructorName,number_of_races,career_points,number_of_wins,number_of_podiums,debut_year
0,2023,max_verstappen,Max,Verstappen,Max Verstappen,1997-09-30,Dutch,red_bull,Red Bull,100,905.0,7,29,2015
1,2023,perez,Sergio,Pérez,Sergio Pérez,1990-01-26,Mexican,red_bull,Red Bull,100,274.0,0,5,2011
2,2023,alonso,Fernando,Alonso,Fernando Alonso,1981-07-29,Spanish,aston_martin,Aston Martin,100,460.0,18,45,2001
3,2023,sainz,Carlos,Sainz,Carlos Sainz,1994-09-01,Spanish,ferrari,Ferrari,100,251.0,0,0,2015
4,2023,hamilton,Lewis,Hamilton,Lewis Hamilton,1985-01-07,British,mercedes,Mercedes,100,815.0,18,46,2007
5,2023,stroll,Lance,Stroll,Lance Stroll,1998-10-29,Canadian,aston_martin,Aston Martin,100,176.0,0,3,2017
6,2023,russell,George,Russell,George Russell,1998-02-15,British,mercedes,Mercedes,100,411.0,1,10,2019
7,2023,bottas,Valtteri,Bottas,Valtteri Bottas,1989-08-28,Finnish,alfa,Alfa Romeo,100,738.0,3,23,2013
8,2023,gasly,Pierre,Gasly,Pierre Gasly,1996-02-07,French,alpine,Alpine F1 Team,100,327.0,1,3,2017
9,2023,albon,Alexander,Albon,Alexander Albon,1996-03-23,Thai,williams,Williams,100,238.0,0,2,2019


In [6]:
def get_all_races(start_season=2014, end_season=2024):
    races = []

    for season in range(start_season, end_season + 1):
        url = f"http://ergast.com/api/f1/{season}.json"
        response = requests.get(url)
        data = response.json()

        season_races = data['MRData']['RaceTable']['Races']
        for race in season_races:
            races.append({
                'season': int(season),
                'round': int(race['round']),
                'raceName': race['raceName'],
                'date': race['date'],
                'circuitId': race['Circuit']['circuitId'],
                'circuitName': race['Circuit']['circuitName'],
                'country': race['Circuit']['Location']['country']
            })

    return pd.DataFrame(races)

# Pobierz wszystkie wyścigi 2014–2024
all_races_df = get_all_races()
print(all_races_df.head())

   season  round               raceName        date    circuitId  \
0    2014      1  Australian Grand Prix  2014-03-16  albert_park   
1    2014      2   Malaysian Grand Prix  2014-03-30       sepang   
2    2014      3     Bahrain Grand Prix  2014-04-06      bahrain   
3    2014      4     Chinese Grand Prix  2014-04-20     shanghai   
4    2014      5     Spanish Grand Prix  2014-05-11    catalunya   

                      circuitName    country  
0  Albert Park Grand Prix Circuit  Australia  
1    Sepang International Circuit   Malaysia  
2   Bahrain International Circuit    Bahrain  
3  Shanghai International Circuit      China  
4  Circuit de Barcelona-Catalunya      Spain  


In [7]:
# mozna wykorzystac all_races_df

def build_driver_features_for_each_race(start_year=2014, end_year=2023):
    all_rows = []
    driver_results_cache = {} 

    for season in range(start_year, end_year + 1):
        for rnd in range(1, 25):
            race_url = f"http://ergast.com/api/f1/{season}/{rnd}/results.json?limit=100"
            response = requests.get(race_url)
            data = response.json()
            races = data['MRData']['RaceTable']['Races']
            if not races:
                break  # brak dalszych wyścigów

            race = races[0]
            race_date = race['date']
            race_results = race['Results']

            for result in race_results:
                driver = result['Driver']
                driver_id = driver['driverId']
                dob = driver['dateOfBirth']
                race_day = pd.to_datetime(race_date)
                age = (race_day - pd.to_datetime(dob)).days / 365.25

              # cache: jeśli brak danych, pobierz raz
                if driver_id not in driver_results_cache:
                    history_url = f"http://ergast.com/api/f1/drivers/{driver_id}/results.json?limit=1000"
                    hist_response = requests.get(history_url)
                    hist_data = hist_response.json()
                    hist_races = hist_data['MRData']['RaceTable']['Races']
                    driver_results_cache[driver_id] = hist_races
                else:
                    hist_races = driver_results_cache[driver_id]

                # Filtrowanie wyników przed danym GP
                past_races = [r for r in hist_races if r['date'] < race_date]
                past_races.sort(key=lambda r: r['date'], reverse=True)
                last_5 = past_races[:5]

                last_5_points = sum(float(r['Results'][0]['points']) for r in last_5) if last_5 else 0.0
                last_finish = int(past_races[0]['Results'][0]['position']) if past_races else None
                last_qualifying = int(past_races[0]['Results'][0].get('grid', 0)) if past_races else None
                final_position = int(result['position'])

                all_rows.append({
                    'season': season,
                    'round': rnd,
                    'race_date': race_date,
                    'driverId': driver_id,
                    'age_on_race_day': round(age, 2),
                    'last_5_race_points': last_5_points,
                    'last_race_finish': last_finish,
                    'last_qualifying_position': last_qualifying,
                    'final_position': final_position
                })

    return pd.DataFrame(all_rows)

drivers_stats = build_driver_features_for_each_race()

In [8]:
drivers_stats

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,season,round,race_date,driverId,age_on_race_day,last_5_race_points,last_race_finish,last_qualifying_position,final_position
0,2014,1,2014-03-16,rosberg,28.72,22.0,9.0,7.0,1
1,2014,1,2014-03-16,kevin_magnussen,21.44,0.0,,,2
2,2014,1,2014-03-16,button,34.15,17.0,7.0,4.0,3
3,2014,1,2014-03-16,alonso,32.63,31.0,3.0,4.0,4
4,2014,1,2014-03-16,bottas,24.55,4.0,21.0,13.0,5
...,...,...,...,...,...,...,...,...,...
4142,2023,22,2023-11-26,sargeant,22.90,1.0,16.0,6.0,16
4143,2023,22,2023-11-26,zhou,24.49,2.0,15.0,17.0,17
4144,2023,22,2023-11-26,sainz,29.23,22.0,8.0,7.0,18
4145,2023,22,2023-11-26,bottas,34.25,83.0,2.0,3.0,19


In [9]:
import requests
import pandas as pd
from tqdm import tqdm
import time

# dane o kwalifikacjach i wyscigu dla kierowcy w konkretnym wyscigu
race_driver_data = []

for season in tqdm([2023], desc="Sezony"):
#for season in tqdm(hybrid_era_seasons, desc="Sezony"):
    races_in_season = all_races_df[all_races_df['season'] == season]

    for _, race in tqdm(races_in_season.iterrows(), desc=f"Wyścigi {season}", leave=False):
        round_ = race['round']
        race_name = race['raceName']
        date = race['date']

        # wyniki wyścigu
        url_race = f"http://ergast.com/api/f1/{season}/{round_}/results.json?limit=100"
        race_res = requests.get(url_race).json()
        try:
            results = race_res['MRData']['RaceTable']['Races'][0]['Results']
        except IndexError:
            continue

        # dane o kwalifikacjach dla danego wyścigu
        url_qual = f"http://ergast.com/api/f1/{season}/{round_}/qualifying.json"
        qual_res = requests.get(url_qual).json()
        try:
            qualifying_results = qual_res['MRData']['RaceTable']['Races'][0]['QualifyingResults']
        except IndexError:
            qualifying_results = []

        for result in results:
            driver = result['Driver']
            driver_id = driver['driverId']
            constructor_id = result['Constructor']['constructorId']
            grid = int(result['grid'])  # pozycja startowa
            finish_position = int(result['position'])  # pozycja na mecie
            status = result['status']  # status kierowcy

            finished = status.lower() in ['finished'] or 'lap' in status.lower()

            # dopasowanie pozycji kwalifikacyjnej
            qual_position = None
            for qual_result in qualifying_results:
                if qual_result['Driver']['driverId'] == driver_id:
                    qual_position = int(qual_result['position'])
                    break

            race_driver_data.append({
                'season': int(season),
                'round': int(round_),
                'race_name': race_name,
                'race_date': date,
                'driverId': driver_id,
                'constructorId': constructor_id,
                'grid': grid,
                'qual_position': qual_position,
                'finish_position': finish_position,
                'finished': int(finished) 
            })

df_race_driver = pd.DataFrame(race_driver_data)

print(df_race_driver.head())

Sezony:   0%|          | 0/1 [00:00<?, ?it/s]
Wyścigi 2023: 0it [00:00, ?it/s][A
Wyścigi 2023: 1it [00:00,  2.97it/s][A
Wyścigi 2023: 2it [00:01,  1.80it/s][A
Wyścigi 2023: 3it [00:01,  1.59it/s][A
Wyścigi 2023: 4it [00:02,  1.62it/s][A
Wyścigi 2023: 5it [00:02,  1.72it/s][A
Wyścigi 2023: 6it [00:03,  1.75it/s][A
Wyścigi 2023: 7it [00:04,  1.50it/s][A
Wyścigi 2023: 8it [00:05,  1.24it/s][A
Wyścigi 2023: 9it [00:06,  1.05it/s][A
Wyścigi 2023: 10it [00:08,  1.11s/it][A
Wyścigi 2023: 11it [00:10,  1.35s/it][A
Wyścigi 2023: 12it [00:11,  1.30s/it][A
Wyścigi 2023: 13it [00:12,  1.20s/it][A
Wyścigi 2023: 14it [00:13,  1.15s/it][A
Wyścigi 2023: 15it [00:14,  1.08s/it][A
Wyścigi 2023: 16it [00:15,  1.02s/it][A
Wyścigi 2023: 17it [00:15,  1.05it/s][A
Wyścigi 2023: 18it [00:16,  1.30it/s][A
Wyścigi 2023: 19it [00:16,  1.59it/s][A
Wyścigi 2023: 20it [00:16,  1.84it/s][A
Wyścigi 2023: 21it [00:17,  2.05it/s][A
Wyścigi 2023: 22it [00:17,  2.32it/s][A
Sezony: 100%|██████████| 

   season  round           race_name   race_date        driverId  \
0    2023      1  Bahrain Grand Prix  2023-03-05  max_verstappen   
1    2023      1  Bahrain Grand Prix  2023-03-05           perez   
2    2023      1  Bahrain Grand Prix  2023-03-05          alonso   
3    2023      1  Bahrain Grand Prix  2023-03-05           sainz   
4    2023      1  Bahrain Grand Prix  2023-03-05        hamilton   

  constructorId  grid  qual_position  finish_position  finished  
0      red_bull     1              1                1         1  
1      red_bull     2              2                2         1  
2  aston_martin     5              5                3         1  
3       ferrari     4              4                4         1  
4      mercedes     7              7                5         1  





In [10]:
import requests
import pandas as pd
from tqdm import tqdm
import time

all_meetings = []

for year in tqdm(hybrid_era_seasons, desc="Pobieranie danych"):
    url = f"https://api.openf1.org/v1/meetings?year={year}"
    
    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            gp_data = [m for m in data if "Grand Prix" in m.get("meeting_name", "")]
            filtered_gp_data = [
                {
                    'meeting_key': m.get('meeting_key'),
                    'circuit_key': m.get('circuit_key'),
                    'meeting_name': m.get('meeting_name'),
                    'year': m.get('year'),
                }
                for m in gp_data
            ]
            all_meetings.extend(filtered_gp_data)
        else:
            print(f"Błąd w roku {year}: status {response.status_code}")
    except Exception as e:
        print(f"Wyjątek w roku {year}: {e}")
    
    time.sleep(0.3)

# Zamień na DataFrame
df_meetings = pd.DataFrame(all_meetings)

# Podgląd danych
print(df_meetings.head())

Pobieranie danych: 100%|██████████| 11/11 [00:03<00:00,  2.90it/s]

   meeting_key  circuit_key              meeting_name  year
0         1141           63        Bahrain Grand Prix  2023
1         1142          149  Saudi Arabian Grand Prix  2023
2         1143           10     Australian Grand Prix  2023
3         1207          144     Azerbaijan Grand Prix  2023
4         1208          151          Miami Grand Prix  2023



