In [13]:
import numpy as np
import pandas as pd

## Load drivers Data

In [14]:
nascar_excel_ref = "../../data/nascar2002.xls"
drivers = pd.read_excel(nascar_excel_ref, sheet_name="drivers")
new_driver_colnames = ['driver_id', 'name', 'races', 'avg_rank', 'log_mle', 'std_error',
                       'avg_rank_1', 'lasts', 'nonlasts', 'firsts', 'nonfirsts', 'avg_rank_2', 
                       'mle', 'ar_rank', 'mle_rank', 'diff', 'log_mle_1', 'std_error_1']
drivers.columns = new_driver_colnames

## Load the races data

In [20]:
races = pd.read_excel(nascar_excel_ref, sheet_name="races", index=False)
new_races_colnames = ['driver_id','race', 'finish', 'start', 'car', 'driver', 'make',
                      'sponsor', 'pts_bonus', 'laps', 'status', 'winnings',
                      'first_place_name', 'last_place_name']
races.columns = new_races_colnames
del races.index.name

# Let's get just the required columns from the races
races_trimmed = races[['driver_id','race','finish']]

## Get List of Unique Drivers

In [85]:
unq_drivers = sorted(races_trimmed["driver_id"].unique())

# Remove nan from unique drivers
unq_drivers = [x for x in unq_drivers if str(x) != 'nan']
unq_races = sorted(races_trimmed["race"].unique())
unq_drivers_df = pd.DataFrame(unq_drivers, columns=["driver_id"])
unq_races_df = pd.DataFrame(unq_races, columns=["race"])

# Let's cross join the columns to get all unique combinations of drivers and races
all_driver_race_combs = unq_drivers_df.assign(foo=1).merge(unq_races_df.assign(foo=1)).drop('foo', 1)
all_driver_race_combs = all_driver_race_combs.sort_values(by=['race', 'driver_id'])
all_driver_race_mg = pd.merge(all_driver_race_combs, races_trimmed,  how='left', left_on=['driver_id','race'], right_on = ['driver_id','race'])

## Get pairwise differences for a single race

In [86]:
race_1 = all_driver_race_mg[all_driver_race_mg["race"] == 1.0]
race_1 = race_1.sort_values(by=['driver_id'])
arr = (race_1['finish'].values - race_1['finish'].values[:, None])
new_race_1 = pd.concat((race_1['driver_id'], pd.DataFrame(arr, columns=race_1['driver_id'])), axis=1)
new_race_1.values[:, 1:]

array([[ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,   0.,  21., ...,  nan,  nan,  nan],
       [ nan, -21.,   0., ...,  nan,  nan,  nan],
       ...,
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan]])

In [None]:
new_race_1.values.shape

In [84]:
all_driver_race_mg = sorted(races_trimmed["race"].unique())

In [114]:
def pawise_ind(val):
    if not np.isnan(val):
        out = (val < 0) * 1
    else:
        out = val
    return out

In [115]:
def get_single_race_pawise(race_num, src_dat):
    """Get all pairwise race comparisons for a single race"""
    # Subset values for the particular race
    race_res = src_dat[all_driver_race_mg["race"] == race_num]
    race_res = race_res.sort_values(by=['driver_id'])
    
    # Get pairwise differences
    arr = (race_1['finish'].values - race_1['finish'].values[:, None])
    new_race_1 = pd.concat((race_1['driver_id'], pd.DataFrame(arr, columns=race_1['driver_id'])), axis=1)
    new_race_1 = new_race_1.values{}
    
    vpawise_ind = np.vectorize(pawise_ind)
    new_race_1 = vpawise_ind(new_race_1)
    # Exclude the race numbers in our finall numpy array
    return new_race_1

def get_all_pawise_comb(src_dat):
    unq_races = sorted(src_dat["race"].unique())
    all_pwise_races = [get_single_race_pawise(race_num=race, src_dat=src_dat) for race in unq_races]
    return np.dstack(all_pwise_races)

In [116]:
unq_races = sorted(all_driver_race_mg["race"].unique())
unq_races
all_pwise_races = [get_single_race_pawise(race_num=race, src_dat=all_driver_race_mg) for race in unq_races]
all_pwise_races_comb = np.dstack(all_pwise_races)
all_pwise_races_comb.shape

ValueError: cannot convert float NaN to integer

In [113]:
race_1_2 = get_single_race_pawise(race_num=1.0, src_dat=all_driver_race_mg)
race_1_2

array([[  1.,  nan,  nan, ...,  nan,  nan,  nan],
       [  2.,  nan,   0., ...,  nan,  nan,  nan],
       [  3.,  nan, -21., ...,  nan,  nan,  nan],
       ...,
       [ 85.,  nan,  nan, ...,  nan,  nan,  nan],
       [ 86.,  nan,  nan, ...,  nan,  nan,  nan],
       [ 87.,  nan,  nan, ...,  nan,  nan,  nan]])

In [96]:
all_races_pawise = get_all_pawise_comb(src_dat=all_driver_race_mg)