In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
pd.set_option('display.max_columns', None)

# Historical Odds Import and Conversion

In [2]:
# Creating a formula for odds conversion 
def odds_conversion(x):
    if x < 0:
        return (-x) / ((-x) + 100) 
    else: 
        return (100 / (x + 100))

In [3]:
# Creating a list of all the race file names. This will be used to loop over for importing the data into dataframes
# Loops over the years where we have historical data

import os 

year_list = ['2020','2021', '2022']

table_dictionary = {}

for year in year_list:
    
    path = '../Raw Data/Odds Data/Historical Odds/'+year
    table_list = []

    for filename in os.listdir(path):

        if filename.endswith('.csv'):
            table_list.append(filename[:-4])

    table_dictionary[year] = table_list
    print(table_list)
   

['70thAnniversary', 'British', 'Spanish', 'Steiermark', 'Hungarian', 'Bahrain', 'Italian', 'Portuguese', 'Austrian', 'Turkish', 'AbuDhabi', 'Belgian', 'EmiliaRomagna', 'Eifel', 'Sakhir', 'Russian']
['UnitedStates', 'Brazilian', 'Qatar', 'Azerbaijan', 'British', 'Spanish', 'Hungarian', 'Bahrain', 'Styrian', 'Italian', 'Portuguese', 'Dutch', 'SaudiArabian', 'Austrian', 'Monaco', 'Turkish', 'AbuDhabi', 'Belgian', 'MexicoCity', 'EmiliaRomagna', 'Russian', 'French']
['Singapore', 'UnitedStates', 'Canadian', 'Brazilian', 'Miami', 'Azerbaijan', 'British', 'Spanish', 'Australian', 'Hungarian', 'Bahrain', 'Italian', 'Dutch', 'Japanese', 'SaudiArabian', 'Austrian', 'Monaco', 'AbuDhabi', 'Belgian', 'MexicoCity', 'EmiliaRomagna', 'French']


In [4]:
# Import CSV loop
# Loops over years

odds_df_dict = {}

for year in table_dictionary:

    odds_df_dict[year] = {}

    for race in table_dictionary[year]:
        df = pd.read_csv('../Raw Data/Odds Data/Historical Odds/'+year+'/'+race+'.csv',header = 0,sep = '|')
        odds_df_dict[year][race] = df
    


In [5]:
# Converting the odds from American odds format to implied probabilities
# There is also some data cleaning for driver names in this loop
# Loops over years

for year in odds_df_dict:
    
    for race in odds_df_dict[year]:
        try:
            odds_df_dict[year][race]['Odds to Win'] = odds_df_dict[year][race]['Odds to Win'].apply(odds_conversion)
        except: 
            pass 
        try:
            odds_df_dict[year][race]['Odds to Finish Top Three'] = odds_df_dict[year][race]['Odds to Finish Top Three'].apply(odds_conversion)
        except: 
            pass         
        try:
            odds_df_dict[year][race]['Odds to Finish Top Six'] = odds_df_dict[year][race]['Odds to Finish Top Six'].apply(odds_conversion)
        except: 
            pass      
        try:
            odds_df_dict[year][race]['Odds to Finish Top Ten'] = odds_df_dict[year][race]['Odds to Finish Top Ten'].apply(odds_conversion)
        except: 
            pass
            
        # Below here is data cleaning - making sure the driver name is consistent across files
        odds_df_dict[year][race]['Driver'] = odds_df_dict[year][race]['Driver'].str.replace('Alex Albon','Alexander Albon',regex = True)
        odds_df_dict[year][race]['Driver'] = odds_df_dict[year][race]['Driver'].str.replace('Carlos Sainz Jr.','Carlos Sainz',regex = True)
        odds_df_dict[year][race]['Driver'] = odds_df_dict[year][race]['Driver'].str.replace('Guanyu Zhou','Zhou Guanyu',regex = True)
        odds_df_dict[year][race]['Driver'] = odds_df_dict[year][race]['Driver'].str.replace('Nick Latifi','Nicholas Latifi',regex = True)
        
        



# Importing and Formatting Race Information

In [6]:
# Importing race results, the race information, and driver information

results = pd.read_csv('../Raw Data/Historical Race Data/1950_to_2022_CSVs/races.csv',header = 0,sep = ',')
races = pd.read_csv('../Raw Data/Historical Race Data/1950_to_2022_CSVs/results.csv',header = 0,sep = ',')
drivers = pd.read_csv('../Raw Data/Historical Race Data/1950_to_2022_CSVs/drivers.csv',header = 0,sep = ',')

results_dict = {}

for year in year_list:
    results_dict[year] = results.loc[results['year'] == int(year)]

#results_dict['2021'].head(22)

In [7]:
# Creating a dictionary for the circuit IDs and race file names
# WARNING: commented out races are missing for an unknown reason 

raceId_dict = {}

raceId_dict['2022'] = {'Singapore': 1091, 
              'UnitedStates': 1093, 
              'Canadian': 1082, 
              'Brazilian': 1095, 
              'Miami': 1078, 
              'Azerbaijan': 1081, 
              'British': 1083, 
              'Spanish': 1079, 
              'Australian': 1076, 
              'Hungarian': 1086, 
              'Bahrain': 1074, 
              'Italian': 1089, 
              'Dutch': 1088, 
              'Japanese': 1092, 
              'SaudiArabian': 1075, 
              'Austrian': 1084, 
              'Monaco': 1080, 
              'AbuDhabi': 1096, 
              'Belgian': 1087, 
              'MexicoCity': 1094, 
              'EmiliaRomagna': 1077, 
              'French': 1085}

# NOTE: this dictionary will have to be updated with new races for the new year
raceId_dict['2021'] = {'Portuguese': 1054, 
              'Styrian': 1058, 
              'Austrian': 1060, 
              'Brazilian': 1071, 
              'Azerbaijan': 1057, 
              'British': 1061, 
              'Spanish': 1055, 
              'Hungarian': 1062, 
              'Bahrain': 1052, 
              'Italian': 1065, 
              'Dutch': 1064, 
              'SaudiArabian': 1072, 
              'AbuDhabi': 1073, 
              'Monaco': 1056, 
              'MexicoCity': 1070, 
              'Belgian': 1063, 
              'MexicoCity': 1070, 
              'EmiliaRomagna': 1053, 
              'French': 1059,
              'Russian': 1066,
              'Turkish': 1067,
              'UnitedStates': 1069,
              'Qatar': 1038}

raceId_dict['2020'] = {'Portuguese': 1042, 
              'Styrian': 1032, 
              'Austrian': 1031,  
              'British': 1034, 
              'Spanish': 1036, 
              'Hungarian': 1033, 
              'Bahrain': 1045, 
              'Italian': 1038, 
              'AbuDhabi': 1047, 
              'Belgian': 1037, 
              'EmiliaRomagna': 1043, 
              'Russian': 1040,
              'Eifel': 1041,
              'Tuscan': 1039,
              'Turkish': 1044,  
              'Sakhir': 1046, 
              '70thAnniversary': 1035}    



In [8]:
# Creating a dictionary for the driver Ids and racer names

drivers['combined name'] = drivers['forename'] + ' ' + drivers['surname']
drivers.head()

#drivers.loc[drivers['combined name'].isin(odds_df_dict['Australian']['Driver'])] 

#NOTE: this list will have to be increased to include the drivers from earlier than 2022
driverId_dict = {
      'Lewis Hamilton': 1,
     'Fernando Alonso': 4,
    'Sebastian Vettel': 20,
        'Pierre Gasly': 842,
    'Daniel Ricciardo': 817,
     'Valtteri Bottas': 822,
     'Kevin Magnussen': 825,
      'Max Verstappen': 830,
        'Carlos Sainz': 832,
        'Esteban Ocon': 839,
        'Lance Stroll': 840,
     'Charles Leclerc': 844,
        'Lando Norris': 846,
      'George Russell': 847,
     'Nicholas Latifi': 849,
        'Yuki Tsunoda': 852,
     'Mick Schumacher': 854,
         'Zhou Guanyu': 855,
     'Alexander Albon': 848,
        'Sergio Perez': 815,
     'Nico Hulkenberg': 807
}

# Backtesting

In [9]:
# Reading the dummy probabilities into a dictionary of dataframes

predictions_df_dict = {}

for year in year_list:
    predictions_df_dict[year] = {}
    for race in raceId_dict[year]:
        df = pd.read_csv('../Processed Data/Dummy Probability Outputs/Logistic Regression Test/'+race+'.csv',header = 0,sep = ',')
        predictions_df_dict[year][race] = df
        


FileNotFoundError: [Errno 2] No such file or directory: '../Processed Data/Dummy Probability Outputs/Logistic Regression Test/Portuguese.csv'

In [None]:
# Creating a dictionary of converted predictions 
# Transforming the even weighting dummy file so that it contains odds for 'Odds to Win', 'Odds to Finish Top Three', 
# 'Odds to Finish Top Six', and 'Odds to Finish Top Ten'

converted_predictions_df_dict = {}

for year in year_list:

    converted_predictions_df_dict[year] = {}

    for race in raceId_dict[year]:

        converted_predictions_df_dict[year][race] = pd.DataFrame(columns=['Driver','Probability of Winning', 
                                                        'Probability of Finishing Top Three', 
                                                        'Probability of Finishing Top Six', 
                                                        'Probability of Finishing Top Ten'])

        converted_predictions_df_dict[year][race]['Driver'] = predictions_df_dict[year][race]['Driver']
        converted_predictions_df_dict[year][race]['Probability of Winning'] = predictions_df_dict[year][race]['1']
        converted_predictions_df_dict[year][race]['Probability of Finishing Top Three'] = predictions_df_dict[year][race]['1'] + predictions_df_dict[year][race]['2'] + predictions_df_dict[year][race]['3']
        converted_predictions_df_dict[year][race]['Probability of Finishing Top Six'] = predictions_df_dict[year][race]['1'] + predictions_df_dict[year][race]['2'] + predictions_df_dict[year][race]['3'] + predictions_df_dict[year][race]['4'] + predictions_df_dict[year][race]['5'] + predictions_df_dict[year][race]['6']
        converted_predictions_df_dict[year][race]['Probability of Finishing Top Ten'] = predictions_df_dict[year][race]['1'] + predictions_df_dict[year][race]['2'] + predictions_df_dict[year][race]['3'] + predictions_df_dict[year][race]['4'] + predictions_df_dict[year][race]['5'] + predictions_df_dict[year][race]['6']+ predictions_df_dict[year][race]['7'] + predictions_df_dict[year][race]['8'] + predictions_df_dict[year][race]['9'] + predictions_df_dict[year][race]['10']


In [None]:
# Creating an empty dataframe for the backtesting log

BacktestingLog = pd.DataFrame(columns=['Year'
                                       ,'Race'
                                       ,'Driver'
                                       , 'Bet placed'
                                       , 'Driver race outcome'
                                       , 'Implied probability'
                                       , 'Estimated probability'
                                       , 'Expected value'
                                       , 'Bet outcome'
                                       , 'Amount wagered'
                                       , 'Units won'
                                       , 'Net units won'
                                       , 'Cumulative bankroll'])


In [None]:
# Creating the units wagered function 

def AmountWageredCalc(UseKellyCriterion,KellyCriterionWeighting,Bankroll,EstimatedOdds,ImpliedOdds):
    if Bankroll <= 0: 
        return 0
    elif UseKellyCriterion == 1: 
        ProportionGained = 1/ImpliedOdds
        BankrollPercentage = KellyCriterionWeighting * (EstimatedOdds - (1 - EstimatedOdds)/ProportionGained)
        AmountToBet = BankrollPercentage * Bankroll
        return round(AmountToBet, 0)
    else:
        return 1


In [None]:
# Assumptions cell 
# This cell contains assumptions for the betting strategies. It may be deleted or slimmed down over time 

StartingBankroll = 10000
UseKellyCriterion = 1
KellyCriterionWeighting = .01

Bankroll = StartingBankroll


In [None]:
# Setting different strategies to run: 

StartingBankroll = 10000
Bankroll = StartingBankroll

StrategyDict = {
    
    'SingleUnit':{
        'StrategyName':'SingleUnit',
        'UseKellyCriterion':0,
        'KellyCriterionWeighting':1
    },
    
    'Kelly1Percent':{
        'StrategyName':'Kelly1Percent',
        'UseKellyCriterion':1,
        'KellyCriterionWeighting':.01
    },
    
    'Kelly5Percent':{
        'StrategyName':'Kelly5Percent',
        'UseKellyCriterion':1,
        'KellyCriterionWeighting':.05
    },
    
}

In [None]:
def BacktestingFunction(StartingBankroll, StrategyName, UseKellyCriterion, KellyCriterionWeighting): 

    Bankroll = StartingBankroll
    
    BacktestingLog = pd.DataFrame(columns=['Year'
                                       ,'Race'
                                       ,'Driver'
                                       , 'Bet placed'
                                       , 'Driver race outcome'
                                       , 'Implied probability'
                                       , 'Estimated probability'
                                       , 'Expected value'
                                       , 'Bet outcome'
                                       , 'Amount wagered'
                                       , 'Units won'
                                       , 'Net units won'
                                       , 'Cumulative bankroll'])

    
    # Creating a triple loop over the year, race, and driver using the implied probability dataframes
    # This will perform the backtesting and log the results into a new dataframe

    
    for year in year_list: 

        temp = []

        for race in raceId_dict[year]:

            for driver in odds_df_dict[year][race]['Driver']:


                # NOTE: This if statement is for handling two situations where a driver was subbed out last minute 
                # for another driver. Because this is a rare scenario, I thought it was better to handle these manually 
                # rather than trying to program something dynamic
                if (race == 'Italian' and driver == 'Alexander Albon') or (race == 'SaudiArabian' and driver == 'Sebastian Vettel'):
                    continue



                DriverOutcome = races.loc[((races['driverId'] == driverId_dict[driver]) & (races['raceId'] == raceId_dict[year][race])),'position'] 



                # NOTE: It is likely possible to replace the four 'comparison' sections with a loop but this was not deemed a priority 

                #print(race)
                #print(driver)
                #print(DriverOutcome.iloc[0])

                #First comparison - odds to win
                ImpliedOdds = odds_df_dict[year][race].loc[odds_df_dict[year][race]['Driver'] == driver,'Odds to Win'] 
                EstimatedOdds = converted_predictions_df_dict[year][race].loc[converted_predictions_df_dict[year][race]['Driver'] == driver,'Probability of Winning']

                #WARNING: This try except is to handle bugs that should be addressed
                try:


                    if EstimatedOdds.iloc[0] > ImpliedOdds.iloc[0]:


                        DriverOutcome = races.loc[((races['driverId'] == driverId_dict[driver]) 
                                                   & (races['raceId'] == raceId_dict[year][race])),'position']

                        BetOutcome = 0
                        UnitsWon = 0

                        AmountWagered = AmountWageredCalc(UseKellyCriterion,KellyCriterionWeighting,Bankroll,EstimatedOdds.iloc[0],ImpliedOdds.iloc[0])

                        if DriverOutcome.iloc[0] == '1':
                            BetOutcome = 1
                            UnitsWon = AmountWagered / ImpliedOdds.iloc[0]

                        NetUnitsWon = UnitsWon - AmountWagered

                        Bankroll = Bankroll + NetUnitsWon

                        BacktestingLog = pd.concat([BacktestingLog, pd.DataFrame.from_records([{
                            'Year': year,
                            'Race': race,
                            'Driver': driver,
                            'Bet placed': 'Odds to Win',
                            'Driver race outcome': DriverOutcome.iloc[0],
                            'Implied probability': ImpliedOdds.iloc[0], 
                            'Estimated probability': EstimatedOdds.iloc[0],
                            'Expected value': (EstimatedOdds.iloc[0] / ImpliedOdds.iloc[0]) - 1,
                            'Bet outcome': BetOutcome,
                            'Amount wagered': AmountWagered,
                            'Units won': UnitsWon,
                            'Net units won': NetUnitsWon,
                            'Cumulative bankroll': Bankroll
                        }])])

                except:
                    continue

                #Second comparison - Odds to Finish Top Three
                ImpliedOdds = odds_df_dict[year][race].loc[odds_df_dict[year][race]['Driver'] == driver,'Odds to Finish Top Three'] 
                EstimatedOdds = converted_predictions_df_dict[year][race].loc[converted_predictions_df_dict[year][race]['Driver'] == driver,'Probability of Finishing Top Three']


                if EstimatedOdds.iloc[0] > ImpliedOdds.iloc[0]:


                    DriverOutcome = races.loc[((races['driverId'] == driverId_dict[driver]) 
                                               & (races['raceId'] == raceId_dict[year][race])),'position']

                    BetOutcome = 0
                    UnitsWon = 0

                    AmountWagered = AmountWageredCalc(UseKellyCriterion,KellyCriterionWeighting,Bankroll,EstimatedOdds.iloc[0],ImpliedOdds.iloc[0])

                    if DriverOutcome.iloc[0] in ['1',  '2', '3']:
                        BetOutcome = 1
                        UnitsWon = AmountWagered / ImpliedOdds.iloc[0]

                    NetUnitsWon = UnitsWon - AmountWagered

                    Bankroll = Bankroll + NetUnitsWon

                    BacktestingLog = pd.concat([BacktestingLog, pd.DataFrame.from_records([{
                        'Year': year,
                        'Race': race,
                        'Driver': driver,
                        'Bet placed': 'Odds to Finish Top Three',
                        'Driver race outcome': DriverOutcome.iloc[0],
                        'Implied probability': ImpliedOdds.iloc[0], 
                        'Estimated probability': EstimatedOdds.iloc[0],
                        'Expected value': (EstimatedOdds.iloc[0] / ImpliedOdds.iloc[0]) - 1,
                        'Bet outcome': BetOutcome,
                        'Amount wagered': AmountWagered,
                        'Units won': UnitsWon,
                        'Net units won': NetUnitsWon, 
                        'Cumulative bankroll': Bankroll
                    }])])            

                #Third comparison - Odds to Finish Top Six
                ImpliedOdds = odds_df_dict[year][race].loc[odds_df_dict[year][race]['Driver'] == driver,'Odds to Finish Top Six'] 
                EstimatedOdds = converted_predictions_df_dict[year][race].loc[converted_predictions_df_dict[year][race]['Driver'] == driver,'Probability of Finishing Top Six']


                if EstimatedOdds.iloc[0] > ImpliedOdds.iloc[0]:


                    DriverOutcome = races.loc[((races['driverId'] == driverId_dict[driver]) 
                                               & (races['raceId'] == raceId_dict[year][race])),'position']

                    BetOutcome = 0
                    UnitsWon = 0

                    AmountWagered = AmountWageredCalc(UseKellyCriterion,KellyCriterionWeighting,Bankroll,EstimatedOdds.iloc[0],ImpliedOdds.iloc[0])

                    if DriverOutcome.iloc[0] in ['1','2','3','4','5','6']:
                        BetOutcome = 1
                        UnitsWon = AmountWagered / ImpliedOdds.iloc[0]

                    NetUnitsWon = UnitsWon - AmountWagered

                    Bankroll = Bankroll + NetUnitsWon

                    BacktestingLog = pd.concat([BacktestingLog, pd.DataFrame.from_records([{
                        'Year': year,
                        'Race': race,
                        'Driver': driver,
                        'Bet placed': 'Odds to Finish Top Six',
                        'Driver race outcome': DriverOutcome.iloc[0],
                        'Implied probability': ImpliedOdds.iloc[0], 
                        'Estimated probability': EstimatedOdds.iloc[0],
                        'Expected value': (EstimatedOdds.iloc[0] / ImpliedOdds.iloc[0]) - 1,
                        'Bet outcome': BetOutcome,
                        'Amount wagered': AmountWagered,
                        'Units won': UnitsWon,
                        'Net units won': NetUnitsWon, 
                        'Cumulative bankroll': Bankroll
                    }])])       

                #Fourth comparison - Odds to Finish Top Ten
                ImpliedOdds = odds_df_dict[year][race].loc[odds_df_dict[year][race]['Driver'] == driver,'Odds to Finish Top Ten'] 
                EstimatedOdds = converted_predictions_df_dict[year][race].loc[converted_predictions_df_dict[year][race]['Driver'] == driver,'Probability of Finishing Top Ten']


                if EstimatedOdds.iloc[0] > ImpliedOdds.iloc[0]:


                    DriverOutcome = races.loc[((races['driverId'] == driverId_dict[driver]) 
                                               & (races['raceId'] == raceId_dict[year][race])),'position']

                    BetOutcome = 0
                    UnitsWon = 0

                    AmountWagered = AmountWageredCalc(UseKellyCriterion,KellyCriterionWeighting,Bankroll,EstimatedOdds.iloc[0],ImpliedOdds.iloc[0])

                    if DriverOutcome.iloc[0] in ['1','2','3','4','5','6','7','8','9','10']:
                        BetOutcome = 1
                        UnitsWon = AmountWagered / ImpliedOdds.iloc[0]

                    NetUnitsWon = UnitsWon - AmountWagered

                    Bankroll = Bankroll + NetUnitsWon

                    BacktestingLog = pd.concat([BacktestingLog, pd.DataFrame.from_records([{
                        'Year': year,
                        'Race': race,
                        'Driver': driver,
                        'Bet placed': 'Odds to Finish Top Ten',
                        'Driver race outcome': DriverOutcome.iloc[0],
                        'Implied probability': ImpliedOdds.iloc[0], 
                        'Estimated probability': EstimatedOdds.iloc[0],
                        'Expected value': (EstimatedOdds.iloc[0] / ImpliedOdds.iloc[0]) - 1,
                        'Bet outcome': BetOutcome,
                        'Amount wagered': AmountWagered,
                        'Units won': UnitsWon,
                        'Net units won': NetUnitsWon, 
                        'Cumulative bankroll': Bankroll
                    }])])        
                    
    BacktestingLog.to_csv('../Processed Data/Backtesting Results/'+RunName+'/'+StrategyName+'_BackTestingLog.csv', index=False)

## Results summary 

#Running the multiple strategies
StartingBankroll = 10000

for strategy in StrategyDict:
    BacktestingFunction(StartingBankroll, StrategyDict[strategy]['StrategyName']
                , StrategyDict[strategy]['UseKellyCriterion'], StrategyDict[strategy]['KellyCriterionWeighting'])

print('Bets placed: ' + str(BacktestingLog['Bet outcome'].count()))
print('Bets won: ' + str(BacktestingLog['Bet outcome'].sum()))
print('Net units won: ' + str(BacktestingLog['Net units won'].sum()))

print('ROI %: ' + str(
    (BacktestingLog['Net units won'].sum() / StartingBankroll) * 100
    ))
print('\n')
print('Average expected value: ' + str(BacktestingLog['Expected value'].mean()))
print('Min expected value: ' + str(BacktestingLog['Expected value'].min()))
print('Median expected value: ' + str(BacktestingLog['Expected value'].median()))
print('Max expected value: ' + str(BacktestingLog['Expected value'].max()))
print('Final bankroll is: ' + str(BacktestingLog['Cumulative bankroll'].tail(1).max()))


BacktestingLog.to_csv('../Processed Data/Backtesting Results/Multiyear_BackTestingLog.csv', index=False)

BacktestingLog.head()

In [None]:
#Testing out making new folders
newpath = '../Processed Data/Backtesting Results/folder'
if not os.path.exists(newpath):
    os.makedirs(newpath)

In [None]:
#One function to rule them all 

def BacktestingRun(RunName, StartingBankroll): 

    StrategyDict = {
    
    'SingleUnit':{
        'StrategyName':'SingleUnit',
        'UseKellyCriterion':0,
        'KellyCriterionWeighting':1
    },
    
    'Kelly1Percent':{
        'StrategyName':'Kelly1Percent',
        'UseKellyCriterion':1,
        'KellyCriterionWeighting':.01
    },
    
    'Kelly5Percent':{
        'StrategyName':'Kelly5Percent',
        'UseKellyCriterion':1,
        'KellyCriterionWeighting':.05
    },
    
}
    
    if not os.path.exists(RunName):
        os.makedirs(RunName)
    
    for strategy in StrategyDict:
        BacktestingFunction(StartingBankroll, StrategyDict[strategy]['StrategyName'], StrategyDict[strategy]['UseKellyCriterion'], StrategyDict[strategy]['KellyCriterionWeighting'])

In [None]:
BacktestingRun("Multiyear Test", 10000)

In [14]:
# Reading the dummy probabilities into a dictionary of dataframes
# This version is not going to work because it isn't reading the drivers in each folder. We have to update it so it does that
predictions_df_dict = {}

dummy = pd.read_csv('../Processed Data/Dummy Probability Outputs/EvenWeighting.csv',header = 0,sep = ',')

for year in year_list:
    predictions_df_dict[year] = {}
    for race in raceId_dict[year]:
        dummy.to_csv('../Processed Data/Dummy Probability Outputs/Even Weighting Multi Year/'+year+'/'+race+'.csv',sep = ',')

     

In [11]:
# We need to write a version of the above function so that it is grabbing the drivers from the right predictions for the dummy versinos 

In [16]:
drivers.head()

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url,combined name
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,Lewis Hamilton
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld,Nick Heidfeld
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg,Nico Rosberg
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso,Fernando Alonso
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen,Heikki Kovalainen


In [20]:
drivers.loc[drivers['surname'] == 'Aitken']

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url,combined name
849,851,aitken,89,AIT,Jack,Aitken,1995-09-23,British,http://en.wikipedia.org/wiki/Jack_Aitken,Jack Aitken


In [None]:
results_dict['2020'].head(22)

In [17]:
drivers.loc[drivers['forename'] == 'Daniil']

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url,combined name
825,826,kvyat,26,KVY,Daniil,Kvyat,1994-04-26,Russian,http://en.wikipedia.org/wiki/Daniil_Kvyat,Daniil Kvyat
