## Load Data

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

data_path = Path('./data/raw')
data_files = list(data_path.glob('*.csv'))
data_files

[PosixPath('../../data/f1_fan_track/driver_pts.csv'),
 PosixPath('../../data/f1_fan_track/team_price.csv'),
 PosixPath('../../data/f1_fan_track/team_pts.csv'),
 PosixPath('../../data/f1_fan_track/driver_price.csv')]

In [2]:
frames = {}

for f in data_files:
    frames[f.stem] = pd.read_csv(f, sep='|')


In [3]:
pts_driver = frames.get('driver_pts').copy()
pts_team = frames.get('team_pts').copy()
price_driver = frames.get('driver_price').copy()
price_team = frames.get('team_price').copy()

## Clean and Calculate

### Points Tables

In [4]:
track_cols = [col for col in pts_driver.columns if (len(col) == 3 and col.isupper()) or col.endswith('.1') or col.endswith('.2')]
track_cols

['BAH',
 'SAU',
 'AUS',
 'ITA',
 'USA',
 'SPA',
 'MON',
 'AZE',
 'CAN',
 'BRI',
 'AUS.1',
 'FRA',
 'HUN',
 'BEL',
 'NET',
 'ITA.1',
 'SIN',
 'JAP',
 'USA.1',
 'MEX',
 'BRA',
 'ABU']

#### Tracks with Scores

In [5]:
keep_track_cols = pts_driver[track_cols].sum()[pts_driver[track_cols].sum() > 0].index

#### Points Metrics

In [6]:
def calc_metrics(df):

    df = df.replace(0, np.nan).dropna(axis=1)

    df['avg'] = df[keep_track_cols].mean(axis=1)
    df['max'] = df[keep_track_cols].max(axis=1)
    df['median'] = df[keep_track_cols].median(axis=1)
    
    return df

pts_driver = calc_metrics(pts_driver)
pts_team = calc_metrics(pts_team)


driver_total_avg_points = pts_driver['avg'].sum()  # The total points on average for all drivers
car_total_avg_points = pts_team['avg'].sum()

### Price Tables

In [7]:
def fix_prices(df):
    for col in ['Current Price', 'Season Start PriceSeason Price']: 
        df[col] = df[col].str.replace('$', '').str.replace('m', '').astype(float)

    return df

price_driver = fix_prices(price_driver)
price_team = fix_prices(price_team)

  df[col] = df[col].str.replace('$', '').str.replace('m', '').astype(float)


In [8]:
driver_total_price = price_driver['Current Price'].sum()
car_total_price = price_team['Current Price'].sum()

In [9]:
car_total_price

165.79999999999998

In [10]:
price_team['Season Start PriceSeason Price'].sum()

167.5

In [11]:
driver_total_price / (driver_total_price + car_total_price)

0.6274994383284657

In [12]:
driver_total_price / car_total_price

1.6845597104945724

## Performance Prices

Calculate the price a driver or car should fetch based on its actual scoring relative to:

* The total price of all drivers/cars
* Total average points scored by all drivers/cars

In [14]:
pts_driver['pts_price'] = pts_driver['avg'] * driver_total_price / driver_total_avg_points
pts_team['pts_price'] = pts_team['avg'] * car_total_price / car_total_avg_points

pts_driver['pts_price_med'] = pts_driver['median'] * driver_total_price / driver_total_avg_points
pts_team['pts_price_med'] = pts_team['median'] * car_total_price / car_total_avg_points


In [15]:
pts_driver

Unnamed: 0,Driver,BAH,SAU,AUS,ITA,Total,avg,max,median,pts_price,pts_price_med
0,Hamilton Mercedes,34,13,26,5,78,19.5,34,19.5,17.29,17.29
1,Verstappen Red Bull,5,45,4,62,116,29.0,62,25.0,25.713333,22.166667
2,Russell Mercedes,28,26,33,33,120,30.0,33,30.5,26.6,27.043333
3,Perez Red Bull,4,22,35,44,105,26.25,44,28.5,23.275,25.27
4,Alonso Alpine,9,-3,-5,2,3,0.75,9,-0.5,0.665,-0.443333
5,Leclerc Ferrari,49,41,49,30,169,42.25,49,45.0,37.461667,39.9
6,Gasly AlphaTauri,-4,17,14,13,40,10.0,17,13.5,8.866667,11.97
7,Vettel Aston Martin,4,15,-7,24,36,9.0,24,9.5,7.98,8.423333
8,Sainz Ferrari,32,27,-5,7,61,15.25,32,17.0,13.521667,15.073333
9,Tsunoda AlphaTauri,19,-9,-1,30,39,9.75,30,9.0,8.645,7.98


## Merge

### Driver

* Inconsistent naming, build a match

In [16]:
# Driver Names don't match across tables, grab last name
pts_driver['last_name'] = pts_driver['Driver'].str.split(' ', expand=True)[[0]]

In [17]:
# Cross product, identify which indices contain matching names
index_map = pts_driver['last_name'].apply(lambda y: price_driver['Driver'].str.contains(y).astype(int))
index_map

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
5,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
8,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [18]:
driver_map = pd.DataFrame(zip(*np.where(index_map.eq(1))), columns=['pts', 'price'])
driver_map

Unnamed: 0,pts,price
0,0,1
1,1,0
2,2,2
3,3,4
4,4,9
5,5,3
6,6,8
7,7,11
8,8,5
9,9,15


In [19]:
driver_df = pts_driver.merge(driver_map, left_index=True, right_on='pts').merge(price_driver, left_on='price', right_index=True, suffixes=['', '_price'])
driver_df

Unnamed: 0.1,Driver,BAH,SAU,AUS,ITA,Total,avg,max,median,pts_price,pts_price_med,last_name,pts,price,Unnamed: 0,Driver_price,Current Price,Season Start PriceSeason Price,Points/Million
0,Hamilton Mercedes,34,13,26,5,78,19.5,34,19.5,17.29,17.29,Hamilton,0,1,1,Lewis Hamilton HAM Mercedes,30.2,31.0,0.65
1,Verstappen Red Bull,5,45,4,62,116,29.0,62,25.0,25.713333,22.166667,Verstappen,1,0,0,Max Verstappen VER Red Bull,30.3,30.5,0.96
2,Russell Mercedes,28,26,33,33,120,30.0,33,30.5,26.6,27.043333,Russell,2,2,2,George Russell RUS Mercedes,24.0,24.0,1.25
3,Perez Red Bull,4,22,35,44,105,26.25,44,28.5,23.275,25.27,Perez,3,4,4,Sergio Perez PER Red Bull,18.1,17.5,1.45
4,Alonso Alpine,9,-3,-5,2,3,0.75,9,-0.5,0.665,-0.443333,Alonso,4,9,9,Fernando Alonso ALO Alpine,12.5,12.5,0.06
5,Leclerc Ferrari,49,41,49,30,169,42.25,49,45.0,37.461667,39.9,Leclerc,5,3,3,Charles Leclerc LEC Ferrari,18.9,18.0,2.24
6,Gasly AlphaTauri,-4,17,14,13,40,10.0,17,13.5,8.866667,11.97,Gasly,6,8,8,Pierre Gasly GAS AlphaTauri,13.0,13.5,0.77
7,Vettel Aston Martin,4,15,-7,24,36,9.0,24,9.5,7.98,8.423333,Vettel,7,11,11,Sebastian Vettel VET Aston Martin,11.4,11.5,0.79
8,Sainz Ferrari,32,27,-5,7,61,15.25,32,17.0,13.521667,15.073333,Sainz,8,5,5,SAI Carlos Sainz Ferrari,17.3,17.0,0.88
9,Tsunoda AlphaTauri,19,-9,-1,30,39,9.75,30,9.0,8.645,7.98,Tsunoda,9,15,15,Yuki Tsunoda TSU AlphaTauri,8.3,8.5,1.17


### Car/Team Join

Straightforward

In [20]:
car_df = pts_team.merge(price_team, on='Team')

In [21]:
car_df

Unnamed: 0.1,Team,BAH,SAU,AUS,ITA,Total,avg,max,median,pts_price,pts_price_med,Unnamed: 0,Current Price,Season Start PriceSeason Price,Points/Million
0,Mercedes,57,34,64,31,186.0,46.5,64,45.5,29.093208,28.467547,0,33.8,34.5,1.38
1,Red Bull,4,62,39,99,204.0,51.0,99,50.5,31.908679,31.595849,1,32.1,32.5,1.59
2,Alpine,24,13,10,8,55.0,13.75,24,11.5,8.60283,7.195094,4,14.0,14.0,0.98
3,Ferrari,76,63,44,30,213.0,53.25,76,53.5,33.316415,33.47283,2,25.8,25.0,2.06
4,AlphaTauri,10,3,8,36,57.0,14.25,36,9.0,8.91566,5.630943,6,10.1,10.5,1.41
5,Aston Martin,14,19,3,32,68.0,17.0,32,16.5,10.636226,10.323396,5,11.0,11.5,1.55
6,Alfa Romeo,31,3,24,35,93.0,23.25,35,27.5,14.546604,17.20566,7,8.3,8.0,2.8
7,Williams,15,-1,8,17,39.0,9.75,17,11.5,6.100189,7.195094,9,6.5,7.0,1.5
8,Haas,27,1,13,8,49.0,12.25,27,10.5,7.66434,6.569434,8,6.7,6.0,1.83
9,Mclaren,9,9,37,41,96.0,24.0,41,23.0,15.015849,14.390189,3,17.5,18.5,1.37


## Actual to Performance Price Differentials

In [22]:
driver_df['diff_price'] = driver_df['pts_price'] - driver_df['Current Price']
driver_df[['Driver', 'diff_price', 'Current Price', 'avg', 'median']].sort_values(by='avg', ascending=False)

Unnamed: 0,Driver,diff_price,Current Price,avg,median
5,Leclerc Ferrari,18.561667,18.9,42.25,45.0
2,Russell Mercedes,2.6,24.0,30.0,30.5
1,Verstappen Red Bull,-4.586667,30.3,29.0,25.0
3,Perez Red Bull,5.175,18.1,26.25,28.5
19,Norris Mclaren,3.95,16.0,22.5,23.0
0,Hamilton Mercedes,-12.91,30.2,19.5,19.5
11,Bottas Alfa Romeo,7.003333,9.4,18.5,21.0
10,Ocon Alpine,3.903333,12.5,18.5,20.0
16,Magnussen Haas,8.308333,6.1,16.25,16.0
8,Sainz Ferrari,-3.778333,17.3,15.25,17.0


In [24]:
car_df['diff_price'] = car_df['pts_price'] - car_df['Current Price']
car_df[['Team', 'diff_price', 'Current Price', 'avg', 'median']].sort_values(by='avg', ascending=False)

Unnamed: 0,Team,diff_price,Current Price,avg,median
3,Ferrari,7.516415,25.8,53.25,53.5
1,Red Bull,-0.191321,32.1,51.0,50.5
0,Mercedes,-4.706792,33.8,46.5,45.5
9,Mclaren,-2.484151,17.5,24.0,23.0
6,Alfa Romeo,6.246604,8.3,23.25,27.5
5,Aston Martin,-0.363774,11.0,17.0,16.5
4,AlphaTauri,-1.18434,10.1,14.25,9.0
2,Alpine,-5.39717,14.0,13.75,11.5
8,Haas,0.96434,6.7,12.25,10.5
7,Williams,-0.399811,6.5,9.75,11.5


## Save Data

In [25]:
save_path = Path('./data/transform')

In [27]:
driver_df.to_csv(save_path / 'driver.csv', sep='|', index=False)
car_df.to_csv(save_path / 'car.csv', sep='|', index=False)