In [None]:
# Load Data
import pandas as pd
import numpy as np

VERBOSE = False

# NHL
pos_groups = [None, ['G'], ['LW', 'RW', 'C', 'D']]
rotowire_file = "/home/delano/Google Drive/fantasy/nhl/external-projections/rotowire_all.csv"
rotogrinder_file = "/home/delano/Google Drive/fantasy/nhl/external-projections/grinder_all.csv"
fantasy_file = "/home/delano/scratch/nhl-prediction-dump.csv"

rotowire_df = pd.read_csv(rotowire_file)
if VERBOSE:
    display("rotowire")
    display(rotowire_df)
rotogrinder_df = pd.read_csv(rotogrinder_file)
if VERBOSE:
    display("rotogrinder")
    display(rotogrinder_df)
fantasy_df = pd.read_csv(fantasy_file)
if VERBOSE:
    display("fantasy")
    display(fantasy_df)

In [None]:
rotowire_pts_df = rotowire_df[rotowire_df['START'] == 'Yes'].rename(columns={rotowire_df.columns[1]: 'player'})
rotowire_pts_df['service'] = rotowire_pts_df.apply(lambda row: 'rotowire:' + row[0].split('.')[1], axis=1)
rotowire_pts_df['date'] = pd.to_datetime(rotowire_pts_df.apply(lambda row: row[0].split('.')[2], axis=1))

rotowire_pts_df = rotowire_pts_df.pivot_table(index=['date', 'player'], values='FPTS', columns='service')
if VERBOSE:
    display(rotowire_pts_df)

In [None]:
rotogrinder_pts_df = rotogrinder_df
rotogrinder_pts_df['service'] = rotogrinder_pts_df.apply(lambda row: 'rotogrinder:' + row.file.split('.')[1], axis=1)
rotogrinder_pts_df['date'] = pd.to_datetime(rotogrinder_pts_df.apply(lambda row: row.file.split('.')[2], axis=1))

rotogrinder_pts_df = rotogrinder_pts_df.pivot_table(index=['date', 'player'], values='pts', columns='service')
if VERBOSE:
    display(rotogrinder_pts_df)

In [None]:
fantasy_pts_df = fantasy_df.copy()[~np.isnan(fantasy_df['player_id'])]
fantasy_pts_df['player'] = fantasy_pts_df.apply(lambda row: row.first_name + ' ' + row.last_name, axis=1)
fantasy_cols = ['date', 'team', 'pos', 'player'] + \
    [col for col in fantasy_pts_df.columns if ':' in col and 'goal' not in col]
fantasy_pts_df = fantasy_pts_df[fantasy_cols]
fantasy_pts_df = fantasy_pts_df.rename(columns=lambda col: col.split('_')[0] if '_' in col else col)
fantasy_pts_df['date'] = pd.to_datetime(fantasy_pts_df.date)

if VERBOSE:
    display(fantasy_pts_df)

In [None]:
# merge data
merged_df = fantasy_pts_df.join(rotowire_pts_df, on=['date', 'player'], how='left') \
                          .join(rotogrinder_pts_df, on=['date', 'player'], how='left') \

if VERBOSE:
    display(merged_df)

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score

services = []
predictors = []
mae = []
r2 = []
positions = []

def analyze_performance(df, true_col, pred_col, pos):
    if pos is not None:
        df = df[df.pos.isin(pos)]
    services.append(service)
    predictors.append(predictor)
    mae.append(mean_absolute_error(df[true_col], df[pred_col]))
    r2.append(r2_score(df[true_col], df[pred_col]))
    positions.append(pos)
    

for service in ['y', 'dk', 'fd']:
    true_col = 'calc:' + service
    for predictor in ['pred', 'rotogrinder', 'rotowire']:
        pred_col = predictor + ':' + service
        df = merged_df[~np.isnan(merged_df[pred_col])]
        
        for pos in pos_groups:
            analyze_performance(df, true_col, pred_col, pos)
        
scores_df = pd.DataFrame({
    'service': services,
    'predictor': predictors,
    'pos': positions,
    'mae': mae,
    'r2': r2
})
    
scores_df

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(3 * 3 * len(pos_groups), 2, figsize=(10, 150))
sns.set(style="whitegrid")

axis_i = 0

for service in ['y', 'dk', 'fd']:
    true_col = 'calc:' + service
    for predictor in ['pred', 'rotogrinder', 'rotowire']:
        pred_col = predictor + ':' + service
        df = merged_df[~np.isnan(merged_df[pred_col])].copy()
        df['residual'] = df[true_col] - df[pred_col]
        min_pts = min(df[true_col].min(), df[pred_col].min()) - 3
        max_pts = max(df[true_col].max(), df[pred_col].max()) + 4
        
        for pos in pos_groups:
            pos_df = df[df.pos.isin(pos)] if pos is not None else df
            
            # plot pred vs true
            axes[axis_i, 0].set_xlim(min_pts, max_pts)
            axes[axis_i, 0].set_ylim(min_pts, max_pts)
            
            axes[axis_i, 0].set_title('{}:{}:{} (n={})'.format(predictor, service, pos or 'all', len(pos_df)))
            axes[axis_i, 0].set_xlabel(true_col)
            axes[axis_i, 0].set_ylabel(pred_col)
            axes[axis_i, 0].scatter(pos_df[true_col], pos_df[pred_col])
            
            # plot of residuals
            axes[axis_i, 1].set_xlim(min_pts, max_pts)
            axes[axis_i, 1].set_ylim(min_pts, max_pts)
            
            axes[axis_i, 1].set_title('{}:{}:{} residual (n={})'.format(predictor, service, pos or 'all', len(pos_df)))
            axes[axis_i, 1].set_xlabel(true_col)
            axes[axis_i, 1].set_ylabel(pred_col + ' residual')
            axes[axis_i, 1].scatter(pos_df[true_col], pos_df['residual'])
            
            axis_i += 1