# Project Awesome
## Formula One Race Data

API documentation: http://ergast.com/mrd/

Our project is to uncover patterns in race car and race data provided by Formula One data at ergast.com/mrd. The possible relationships we’ll examine include driver performance and attributes and changes in race stats over time. The tables we’re ingesting are finishing statuses, race results, qualifying results, and driver info. Because the datasets’ date ranges differ, we’ll narrow in on a fixed date range so we have good coverage in our analysis.

In [1]:
# Setup.
import pandas as pd
import requests
import json
import matplotlib as plt
import numpy as np
import itertools 
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import colors as mcolors
import warnings
warnings.filterwarnings("ignore")

## Get qualifying race data.

In [2]:
def get_race_result(season_number, year ):
    api = 'http://ergast.com/api/f1/{}/{}/qualifying.json'.format(year, season_number)
    data = requests.get(api).json()
    temp_df =  pd.DataFrame.from_records(data.get('MRData').get('RaceTable').get('Races')[0].get('QualifyingResults'))
    temp_df['seasonNbr'] = season_number
    temp_df['year'] = year
    return temp_df

results = []
seasons = np.arange(2003,2018,1)
rounds = np.arange(1,15,1)
for s in seasons:
    for r in rounds:
        result = get_race_result(r, s)
        results.append(result)

qual_data = pd.concat(results, axis=0)
qual_data['driverId'] = qual_data.Driver.map(lambda x: x.get('driverId'))
qual_data['car_make'] = qual_data.Constructor.map(lambda x: x.get('name'))
qual_data['driver_nat'] = qual_data.Driver.map(lambda x: x.get('nationality'))

## Get race results data.

In [3]:
#api = 'http://ergast.com/api/f1/2008/5/results.json'
#resp = requests.get(api).json()

def get_race_result_1(season_number, year ):
    api_1 = 'http://ergast.com/api/f1/{}/{}/results.json'.format(year, season_number)
    data_1 = requests.get(api_1).json()
                                         #resp.get('MRData').get('RaceTable').get('Races')[0].get('Results')
    temp_df_1 =  pd.DataFrame.from_records(data_1.get('MRData').get('RaceTable').get('Races')[0].get('Results'))
    circuit=data_1.get('MRData').get('RaceTable').get('Races')[0].get('Circuit')
    temp_df_1['circuit_name']=circuit.get('circuitName')
    temp_df_1['seasonNbr'] = season_number
    temp_df_1['year'] = year
    return temp_df_1

results_1 = []
seasons_1 = np.arange(2003,2018,1)
rounds_1 = np.arange(1,15,1)
for s in seasons_1:
    for r in rounds_1:
        result_1 = get_race_result_1(r, s)
        results_1.append(result_1)

race_dat = pd.concat(results_1, axis=0)
race_dat['driverId']=race_dat.Driver.map(lambda x: x.get('driverId'))
race_dat['car_make']=race_dat.Constructor.map(lambda x: x.get('name'))
race_dat['driver_nat']=race_dat.Driver.map(lambda x: x.get('nationality'))\

race_data=race_dat[['driverId','year','seasonNbr','position','status','car_make','driver_nat','points','circuit_name']]
race_data['points'] = race_data['points'].astype(float)


## Get driver data.

In [4]:
a=list(race_data['driverId'])
b=set(a)
names =list(b)

results_2 = []
for x in names:
    api_2=f'http://ergast.com/api/f1/drivers/{x}.json'
    resp=requests.get(api_2).json().get('MRData').get('DriverTable').get('Drivers')
    results_2.append(resp)
    
drivers_df = pd.DataFrame.from_records(results_2)
drivers_df['driverId'] = drivers_df[0].map(lambda x: x.get('driverId'))
drivers_df['givenName'] = drivers_df[0].map(lambda x: x.get('givenName'))
drivers_df['familyName'] = drivers_df[0].map(lambda x: x.get('familyName'))
drivers_df['nationality'] = drivers_df[0].map(lambda x: x.get('nationality'))
drivers_df['dateOfBirth'] = drivers_df[0].map(lambda x: x.get('dateOfBirth'))
drivers_df.head()

def calc_age(some_brday):
    date_time_bday = datetime.strptime(some_brday, '%Y-%m-%d')
    return round((datetime.today() - date_time_bday).days / 365)


drivers_df['years_old'] = drivers_df.dateOfBirth.map(calc_age)
drivers_df = drivers_df[['driverId','givenName','familyName','nationality','dateOfBirth','years_old']]

In [55]:
qual_data.head()

Unnamed: 0,Constructor,Driver,Q1,Q2,Q3,number,position,seasonNbr,year,driverId,car_make,driver_nat
0,"{'constructorId': 'ferrari', 'url': 'http://en...","{'driverId': 'michael_schumacher', 'code': 'MS...",1:27.173,,,1,1,1,2003,michael_schumacher,Ferrari,German
1,"{'constructorId': 'ferrari', 'url': 'http://en...","{'driverId': 'barrichello', 'code': 'BAR', 'ur...",1:27.418,,,2,2,1,2003,barrichello,Ferrari,Brazilian
2,"{'constructorId': 'williams', 'url': 'http://e...","{'driverId': 'montoya', 'code': 'MON', 'url': ...",1:28.101,,,3,3,1,2003,montoya,Williams,Colombian
3,"{'constructorId': 'sauber', 'url': 'http://en....","{'driverId': 'frentzen', 'url': 'http://en.wik...",1:28.274,,,10,4,1,2003,frentzen,Sauber,German
4,"{'constructorId': 'toyota', 'url': 'http://en....","{'driverId': 'panis', 'url': 'http://en.wikipe...",1:28.288,,,20,5,1,2003,panis,Toyota,French


In [54]:
print(qual_data.info())
print('--')
print(qual_data.describe())
print('--')
print(list(qual_data))
print('--')
print(qual_data.count())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4496 entries, 0 to 19
Data columns (total 12 columns):
Constructor    4496 non-null object
Driver         4496 non-null object
Q1             4496 non-null object
Q2             2747 non-null object
Q3             1626 non-null object
number         4496 non-null object
position       4496 non-null object
seasonNbr      4496 non-null int64
year           4496 non-null int64
driverId       4496 non-null object
car_make       4496 non-null object
driver_nat     4496 non-null object
dtypes: int64(2), object(10)
memory usage: 456.6+ KB
None
--
         seasonNbr         year
count  4496.000000  4496.000000
mean      7.493995  2010.061165
std       4.034607     4.236348
min       1.000000  2003.000000
25%       4.000000  2006.000000
50%       7.000000  2010.000000
75%      11.000000  2014.000000
max      14.000000  2017.000000
--
['Constructor', 'Driver', 'Q1', 'Q2', 'Q3', 'number', 'position', 'seasonNbr', 'year', 'driverId', 'car_make', 'd

In [57]:
qual_data.driverId.value_counts()

alonso                206
button                193
massa                 191
raikkonen             182
rosberg               154
hamilton              154
webber                154
vettel                145
barrichello           126
trulli                125
heidfeld              108
fisichella             98
sutil                  98
michael_schumacher     97
hulkenberg             97
perez                  96
ricciardo              90
grosjean               87
kovalainen             84
coulthard              84
bottas                 70
maldonado              70
glock                  70
ralf_schumacher        63
liuzzi                 62
sato                   57
kubica                 57
kvyat                  56
ericsson               55
kobayashi              54
                     ... 
vandoorne              15
ambrosio               15
wurz                   15
stroll                 14
garde                  14
frentzen               14
verstappen             14
wilson      

In [59]:
qual_data.position.value_counts

<bound method IndexOpsMixin.value_counts of 0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
      ..
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
Name: position, Length: 4496, dtype: object>