## Импорт библиотек

In [3]:
import pandas as pd
import numpy as np
import requests
import csv 
from datetime import datetime
from dateutil.relativedelta import relativedelta, MO

## Получение необходимых данных из Ergast Developer API

### Данные о гонках за 2012-2023 годах

In [4]:
Race = {'season': [],'round': [],'circuit_id': [], 'country': [], 'date': []}

for year in list(range(2012,2024)):
    url = 'https://ergast.com/api/f1/{}.json'
    r = requests.get(url.format(year))
    json = r.json()

    for item in json['MRData']['RaceTable']['Races']:
        try:
            Race['season'].append(int(item['season']))
        except:
            Race['season'].append(None)

        try:
            Race['round'].append(int(item['round']))
        except:
            Race['round'].append(None)

        try:
            Race['circuit_id'].append(item['Circuit']['circuitId'])
        except:
            Race['circuit_id'].append(None)
            
        try:
            Race['country'].append(item['Circuit']['Location']['country'])
        except:
            Race['country'].append(None)

        try:
            Race['date'].append(item['date'])
        except:
            Race['date'].append(None)

Race = pd.DataFrame(Race)
Race

Unnamed: 0,season,round,circuit_id,country,date
0,2012,1,albert_park,Australia,2012-03-18
1,2012,2,sepang,Malaysia,2012-03-25
2,2012,3,shanghai,China,2012-04-15
3,2012,4,bahrain,Bahrain,2012-04-22
4,2012,5,catalunya,Spain,2012-05-13
...,...,...,...,...,...
238,2023,18,americas,USA,2023-10-22
239,2023,19,rodriguez,Mexico,2023-10-29
240,2023,20,interlagos,Brazil,2023-11-05
241,2023,21,vegas,United States,2023-11-19


### Данные о результатах гонок за 2012-2023 годах

In [5]:
laps = []
for year in np.array(Race.season.unique()):
    laps.append([year, list(Race[Race.season == year]['round'])])
  
Result = {'season': [],'round':[],'circuit_id':[], 'driver': [], 'date_of_birth': [],'nationality': [],'constructor': [], 'points': [], 'podium': []}

for n in list(range(len(laps))):
    for i in laps[n][1]:
        url = 'http://ergast.com/api/f1/{}/{}/results.json'
        r = requests.get(url.format(laps[n][0], i))
        json = r.json()

        for item in json['MRData']['RaceTable']['Races'][0]['Results']:
            try:
                Result['season'].append(int(json['MRData']['RaceTable']['Races'][0]['season']))
            except:
                Result['season'].append(None)

            try:
                Result['round'].append(int(json['MRData']['RaceTable']['Races'][0]['round']))
            except:
                Result['round'].append(None)

            try:
                Result['circuit_id'].append(json['MRData']['RaceTable']['Races'][0]['Circuit']['circuitId'])
            except:
                Result['circuit_id'].append(None)

            try:
                Result['driver'].append(item['Driver']['driverId'])
            except:
                Result['driver'].append(None)
            
            try:
                Result['date_of_birth'].append(item['Driver']['dateOfBirth'])
            except:
                Result['date_of_birth'].append(None)
                
            try:
                Result['nationality'].append(item['Driver']['nationality'])
            except:
                Result['nationality'].append(None)

            try:
                Result['constructor'].append(item['Constructor']['constructorId'])
            except:
                Result['constructor'].append(None)

            try:
                Result['points'].append(int(item['points']))
            except:
                Result['points'].append(None)

            try:
                Result['podium'].append(int(item['position']))
            except:
                Result['podium'].append(None)

Result = pd.DataFrame(Result)
Result

Unnamed: 0,season,round,circuit_id,driver,date_of_birth,nationality,constructor,points,podium
0,2012,1,albert_park,button,1980-01-19,British,mclaren,25.0,1
1,2012,1,albert_park,vettel,1987-07-03,German,red_bull,18.0,2
2,2012,1,albert_park,hamilton,1985-01-07,British,mclaren,15.0,3
3,2012,1,albert_park,webber,1976-08-27,Australian,red_bull,12.0,4
4,2012,1,albert_park,alonso,1981-07-29,Spanish,ferrari,10.0,5
...,...,...,...,...,...,...,...,...,...
5040,2023,22,yas_marina,sargeant,2000-12-31,American,williams,0.0,16
5041,2023,22,yas_marina,zhou,1999-05-30,Chinese,alfa,0.0,17
5042,2023,22,yas_marina,sainz,1994-09-01,Spanish,ferrari,0.0,18
5043,2023,22,yas_marina,bottas,1989-08-28,Finnish,alfa,0.0,19


### Данные о статистиках гонщиков за 2012-2023 годах

In [7]:
Driver_stats = {'season': [], 'round':[], 'driver': [], 'driver_points': [], 'driver_wins': [], 'driver_position': []}

for n in list(range(len(laps))):
    for i in laps[n][1]:
    
        url = 'https://ergast.com/api/f1/{}/{}/driverStandings.json'
        r = requests.get(url.format(laps[n][0], 1))
        json = r.json()

        for item in json['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings']:
            try:
                Driver_stats['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
            except:
                Driver_stats['season'].append(None)

            try:
                Driver_stats['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))
            except:
                Driver_stats['round'].append(None)
                                         
            try:
                Driver_stats['driver'].append(item['Driver']['driverId'])
            except:
                Driver_stats['driver'].append(None)
            
            try:
                Driver_stats['driver_points'].append(int(item['points']))
            except:
                Driver_stats['driver_points'].append(None)
            
            try:
                Driver_stats['driver_wins'].append(int(item['wins']))
            except:
                Driver_stats['driver_wins'].append(None)
                
            try:
                Driver_stats['driver_position'].append(int(item['position']))
            except:
                Driver_stats['driver_position'].append(None)
            
Driver_stats = pd.DataFrame(Driver_stats)
Driver_stats

Unnamed: 0,season,round,driver,driver_points,driver_wins,driver_position
0,2012,1,button,25,1,1
1,2012,1,vettel,18,0,2
2,2012,1,hamilton,15,0,3
3,2012,1,webber,12,0,4
4,2012,1,alonso,10,0,5
...,...,...,...,...,...,...
5015,2023,1,zhou,0,0,16
5016,2023,1,norris,0,0,17
5017,2023,1,ocon,0,0,18
5018,2023,1,leclerc,0,0,19


### Данные о машинах и командах (конструкторов) за 2012-2023 годах

In [8]:
laps = laps[1:]

constructor_stats = {'season': [],'round':[], 'constructor': [], 'constructor_points': [], 'constructor_wins': [], 'constructor_position': []}

for n in list(range(len(laps))):
    for i in laps[n][1]:
    
        url = 'https://ergast.com/api/f1/{}/{}/constructorStandings.json'
        r = requests.get(url.format(laps[n][0], i))
        json = r.json()

        for item in json['MRData']['StandingsTable']['StandingsLists'][0]['ConstructorStandings']:
            try:
                constructor_stats['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
            except:
                constructor_stats['season'].append(None)

            try:
                constructor_stats['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))
            except:
                constructor_stats['round'].append(None)
                                         
            try:
                constructor_stats['constructor'].append(item['Constructor']['constructorId'])
            except:
                constructor_stats['constructor'].append(None)
            
            try:
                constructor_stats['constructor_points'].append(int(item['points']))
            except:
                constructor_stats['constructor_points'].append(None)
            
            try:
                constructor_stats['constructor_wins'].append(int(item['wins']))
            except:
                constructor_stats['constructor_wins'].append(None)
                
            try:
                constructor_stats['constructor_position'].append(int(item['position']))
            except:
                constructor_stats['constructor_position'].append(None)

constructor_stats = pd.DataFrame(constructor_stats)
constructor_stats

Unnamed: 0,season,round,constructor,constructor_points,constructor_wins,constructor_position
0,2013,1,ferrari,30.0,0,1
1,2013,1,lotus_f1,26.0,1,2
2,2013,1,red_bull,23.0,0,3
3,2013,1,mercedes,10.0,0,4
4,2013,1,force_india,10.0,0,5
...,...,...,...,...,...,...
2283,2023,22,alpine,120.0,0,6
2284,2023,22,williams,28.0,0,7
2285,2023,22,alphatauri,25.0,0,8
2286,2023,22,alfa,16.0,0,9


### Объединяем все данные в одну таблицу

In [9]:
df1 = pd.merge(Race, Result, how='inner',on=['season', 'round', 'circuit_id']).drop(['points'], axis = 1)                                                  
df2 = pd.merge(df1, Driver_stats, how='left',on=['season', 'round', 'driver'])
F1Data = pd.merge(df2, constructor_stats, how='left',on=['season', 'round', 'constructor'])
F1Data

Unnamed: 0,season,round,circuit_id,country,date,driver,date_of_birth,nationality,constructor,podium,driver_points,driver_wins,driver_position,constructor_points,constructor_wins,constructor_position
0,2012,1,albert_park,Australia,2012-03-18,button,1980-01-19,British,mclaren,1,25.0,1.0,1.0,,,
1,2012,1,albert_park,Australia,2012-03-18,button,1980-01-19,British,mclaren,1,25.0,1.0,1.0,,,
2,2012,1,albert_park,Australia,2012-03-18,button,1980-01-19,British,mclaren,1,25.0,1.0,1.0,,,
3,2012,1,albert_park,Australia,2012-03-18,button,1980-01-19,British,mclaren,1,25.0,1.0,1.0,,,
4,2012,1,albert_park,Australia,2012-03-18,button,1980-01-19,British,mclaren,1,25.0,1.0,1.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9812,2023,22,yas_marina,UAE,2023-11-26,sargeant,2000-12-31,American,williams,16,,,,28.0,0.0,7.0
9813,2023,22,yas_marina,UAE,2023-11-26,zhou,1999-05-30,Chinese,alfa,17,,,,16.0,0.0,9.0
9814,2023,22,yas_marina,UAE,2023-11-26,sainz,1994-09-01,Spanish,ferrari,18,,,,406.0,1.0,3.0
9815,2023,22,yas_marina,UAE,2023-11-26,bottas,1989-08-28,Finnish,alfa,19,,,,16.0,0.0,9.0


## Очищаем и подготавливаем данные

### Переводим возраст водителей в формат datetime

In [11]:
F1Data['date'] = pd.to_datetime(F1Data.date)
F1Data['date_of_birth'] = pd.to_datetime(F1Data.date_of_birth)
F1Data['driver_age'] = F1Data.apply(lambda x:relativedelta(x['date'], x['date_of_birth']).years, axis=1)
F1Data.drop(['date', 'date_of_birth'], axis = 1, inplace = True)

### Обрабатываем NaN значения

In [12]:
for col in ['driver_points', 'driver_wins', 'driver_position', 'constructor_points','constructor_wins' , 'constructor_position']:
    F1Data[col].fillna(0, inplace = True)
    F1Data[col] = F1Data[col].map(lambda x: int(x))
F1Data.dropna(inplace = True )

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  F1Data[col].fillna(0, inplace = True)


### Сохраняем данные

In [13]:
F1Data.to_csv('f1_data.csv')