In [1]:
import pandas as pd
from sqlalchemy import create_engine
from credentials import HOCKEY_DB_NAME, HOCKEY_DB_PWD
from schedule import get_schedule
from datetime import date
from betting_utils import convert_prob_to_american
import seaborn as sns
import requests

import datetime as dt
from datetime import date, timedelta
from pytz import timezone

import xgboost as xgb
from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from betting_utils import convert_prob_to_american


In [2]:
def make_synthetic_roster(r):
    game_pk = r['game_pk']
    game_date = r['game_date']
    home_team = r['home_team']
    away_team = r['away_team']
    
    away_players = [x['roster']['roster'] for x in rosters['teams'] if x['name'] == away_team][0]
    away_players = pd.concat([make_synthetic_boxscore(x) for x in away_players])
    away_players['team'] = away_team
    away_players['opposing_team'] = home_team
    away_players['is_home_team'] = 0
    
    home_players = [x['roster']['roster'] for x in rosters['teams'] if x['name'] == home_team][0]
    home_players = pd.concat([make_synthetic_boxscore(x) for x in home_players])
    home_players['team'] = home_team
    home_players['opposing_team'] = away_team
    home_players['is_home_team'] = 1

    players = pd.concat([home_players, away_players])
    players['game_pk'] = game_pk
    players['game_date'] = game_date
    players['game_final'] = False
    
    return(players)
    
def make_synthetic_boxscore(b):
    person = b['person']
    position = b['position']
    is_goalie = position['name'] == "Goalie"
    
    return(pd.DataFrame({"player_id": person['id']
                         , "player_name": person['fullName']
                         , "position": position['name']
                         , "is_goalie": position['name'] == 'Goalie'
                         }, index = [1]))

In [3]:
s = get_schedule(startdate = date.today(), enddate = date.today())

rosters = requests.get('https://statsapi.web.nhl.com/api/v1/teams/', params={'expand': 'team.roster'}).json()

s['game_date'] = pd.to_datetime(s['game_date'])
s['game_date'] = s['game_date'].apply(lambda x: x.astimezone(timezone('US/Eastern')))
s['game_date'] = s['game_date'].apply(lambda x: x.tz_localize(None))
s

Unnamed: 0,game_pk,game_type,game_date,game_final,home_team,away_team,home_score,away_score
0,2022020420,R,2022-12-08 19:00:00,False,Toronto Maple Leafs,Los Angeles Kings,-1,-1
1,2022020421,R,2022-12-08 19:00:00,False,Tampa Bay Lightning,Nashville Predators,-1,-1
2,2022020419,R,2022-12-08 19:30:00,False,Florida Panthers,Detroit Red Wings,-1,-1
3,2022020422,R,2022-12-08 20:00:00,False,St. Louis Blues,Winnipeg Jets,-1,-1
4,2022020423,R,2022-12-08 20:30:00,False,Dallas Stars,Ottawa Senators,-1,-1


In [4]:
synth_today = pd.concat([make_synthetic_roster(a) for i, a in s.iterrows()])
synth_today

Unnamed: 0,player_id,player_name,position,is_goalie,team,opposing_team,is_home_team,game_pk,game_date,game_final
1,8474162,Jake Muzzin,Defenseman,False,Toronto Maple Leafs,Los Angeles Kings,1,2022020420,2022-12-08 19:00:00,False
1,8474673,TJ Brodie,Defenseman,False,Toronto Maple Leafs,Los Angeles Kings,1,2022020420,2022-12-08 19:00:00,False
1,8474818,Jordie Benn,Defenseman,False,Toronto Maple Leafs,Los Angeles Kings,1,2022020420,2022-12-08 19:00:00,False
1,8475714,Calle Jarnkrok,Center,False,Toronto Maple Leafs,Los Angeles Kings,1,2022020420,2022-12-08 19:00:00,False
1,8476853,Morgan Rielly,Defenseman,False,Toronto Maple Leafs,Los Angeles Kings,1,2022020420,2022-12-08 19:00:00,False
...,...,...,...,...,...,...,...,...,...,...
1,8480448,Parker Kelly,Left Wing,False,Ottawa Senators,Dallas Stars,0,2022020423,2022-12-08 20:30:00,False
1,8480801,Brady Tkachuk,Left Wing,False,Ottawa Senators,Dallas Stars,0,2022020423,2022-12-08 20:30:00,False
1,8481596,Shane Pinto,Center,False,Ottawa Senators,Dallas Stars,0,2022020423,2022-12-08 20:30:00,False
1,8482105,Jake Sanderson,Defenseman,False,Ottawa Senators,Dallas Stars,0,2022020423,2022-12-08 20:30:00,False


### Get real data

In [5]:
engine = create_engine('postgresql://baseball:{}@localhost:5432/{}'.format(HOCKEY_DB_PWD, HOCKEY_DB_NAME))

In [6]:
dat = pd.read_sql("""SELECT b.*, s.game_date
, CASE WHEN b.team = s.home_team THEN 1 ELSE 0 END AS is_home_team
, True AS game_final
FROM nhl_player_boxscore b
     INNER JOIN nhl_schedule s ON b.game_pk = s.game_pk
     WHERE s.game_date >= '2019-06-01';""", engine)

In [7]:
dat = pd.concat([dat, synth_today]).reset_index()

In [8]:
dat

Unnamed: 0,index,player_id,player_name,position,is_goalie,assists,goals,shots,shot_attempts,plus_minus,...,short_handed_save_percentage,even_strength_save_percentage,team,opposing_team,opposing_goalie,game_pk,time_on_ice,game_date,is_home_team,game_final
0,0,8476905,Chandler Stephenson,Center,False,0.0,0.0,2.0,2.0,0.0,...,,,Vegas Golden Knights,St. Louis Blues,Jordan Binnington,2020020620,1129.0,2021-04-07 21:00:00,0,True
1,1,8471817,Ryan Reaves,Right Wing,False,1.0,0.0,3.0,3.0,1.0,...,,,Vegas Golden Knights,St. Louis Blues,Jordan Binnington,2020020620,568.0,2021-04-07 21:00:00,0,True
2,2,8476539,Jonathan Marchessault,Center,False,0.0,0.0,8.0,8.0,-1.0,...,,,Vegas Golden Knights,St. Louis Blues,Jordan Binnington,2020020620,1227.0,2021-04-07 21:00:00,0,True
3,3,8475913,Mark Stone,Right Wing,False,0.0,0.0,4.0,4.0,-1.0,...,,,Vegas Golden Knights,St. Louis Blues,Jordan Binnington,2020020620,1266.0,2021-04-07 21:00:00,0,True
4,4,8477949,Alex Tuch,Right Wing,False,0.0,0.0,2.0,3.0,-1.0,...,,,Vegas Golden Knights,St. Louis Blues,Jordan Binnington,2020020620,910.0,2021-04-07 21:00:00,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140193,1,8480448,Parker Kelly,Left Wing,False,,,,,,...,,,Ottawa Senators,Dallas Stars,,2022020423,,2022-12-08 20:30:00,0,False
140194,1,8480801,Brady Tkachuk,Left Wing,False,,,,,,...,,,Ottawa Senators,Dallas Stars,,2022020423,,2022-12-08 20:30:00,0,False
140195,1,8481596,Shane Pinto,Center,False,,,,,,...,,,Ottawa Senators,Dallas Stars,,2022020423,,2022-12-08 20:30:00,0,False
140196,1,8482105,Jake Sanderson,Defenseman,False,,,,,,...,,,Ottawa Senators,Dallas Stars,,2022020423,,2022-12-08 20:30:00,0,False


In [9]:
dat = dat.sort_values(['game_date', 'game_pk', 'player_id'])

In [10]:
dat['score_assist'] = 1.0 * (dat['assists'] >= 1)

dat['points'] = 1.0 * ((dat['assists'] + dat['goals']))
dat['score_point'] = 1.0 * ((dat['points']) >= 1)

In [11]:
dat['player_l25_points_gt1_mean'] = dat.groupby(['player_id'])['points'].apply(lambda x: (x.shift() >= 1).rolling(25).mean())
dat['player_l25_assists_gt1_mean'] = dat.groupby(['player_id'])['assists'].apply(lambda x: (x.shift() >= 1).rolling(25).mean())

In [12]:
def highlight_rows(row):
    value = row.loc['win']
    if value == True:
        color = 'green'
    elif value == False:
        color = 'red'
    else: 
        color = ''
    return ['background-color: {}'.format(color) for r in row]

from statsmodels import api as sm

def get_fit(y, x, data):
    
    p = 'score_point {}'.format(y)
    a =  'score_assist {}'.format(x)
    data = data[[p, a]]
    data = data.dropna(axis = 0, how = 'any')
    data['intercept'] = 1
    
    if data.shape[0] < 50:
        return (0, 1, 0)
    elif data.groupby(a)['intercept'].count().min() < 20:
        return (0, 1, 0)

    m = sm.Logit(data[p], data[['intercept', a]]).fit(disp=0)
    p_no = 1 / (1 + np.exp(-m.params.values[0]))
    p_yes = 1 / (1 + np.exp(-m.params.values[0] - m.params.values[1]))
    
    return(p_yes - p_no, m.pvalues.values[1], data.shape[0])

def get_top_assisters(dat):
    top_assisters = dat.query('team == "{}" & game_pk == {}'.format(team, game_pk)).sort_values('player_l25_assists_gt1_mean', ascending = False)[['game_pk', 'player_name', 'player_l25_assists_gt1_mean']].head(8)
    top_assisters['player_l25_assists_gt1_mean_odds'] = top_assisters['player_l25_assists_gt1_mean'].apply(convert_prob_to_american)
    top_assisters = top_assisters[(top_assisters['player_l25_assists_gt1_mean_odds'] >= -200) & (top_assisters['player_l25_assists_gt1_mean_odds'] <= 200)]
    return top_assisters

def get_top_points(dat):
    top_points = dat.query('team == "{}" & game_pk == {}'.format(team, game_pk)).sort_values('player_l25_points_gt1_mean', ascending = False)[['game_pk', 'player_name', 'player_l25_points_gt1_mean']].head(8)
    top_points['player_l25_points_gt1_mean_odds'] = top_points['player_l25_points_gt1_mean'].apply(convert_prob_to_american)
    top_points = top_points[(top_points['player_l25_points_gt1_mean_odds'] >= -200) & (top_points['player_l25_points_gt1_mean_odds'] <= 200)]
    return top_points

def play_v1(data, filter = False):
    
    data['is_play'] = (data['lift'] > 0.25) & (data['pval'] < 0.05)
    data['win'] = (data['score_assist'] == 1) & (data['score_point'] == 1)
    
    if filter:
        data = data[data['is_play'] == True]
        
    return (data)


In [13]:
today_games = dat.query('game_date >= "2021-06-01" & game_date <= "2022-12-02"')

In [14]:
import numpy as np

total_plays = 0
total_wins = 0

all_results = list()

for i, r in today_games[['team', 'game_pk', 'game_date']].drop_duplicates().iterrows() :
    
    team, game_pk, d = r

    top_assisters = get_top_assisters(today_games.query('team == "{}" & game_pk == {}'.format(team, game_pk)))

    top_points = get_top_points(today_games.query('team == "{}" & game_pk == {}'.format(team, game_pk)))

    lvk = pd.pivot_table(dat.query('team == "{}" & game_date < "{}" & game_date >= "{}"'.format(team, d, d - dt.timedelta(days = 365))), index = ['game_pk'], columns = 'player_name', values = ['score_point', 'score_assist'])
    lvk.columns = [' '.join(col).strip() for col in lvk.columns.values]

    try:
        res = pd.DataFrame([(a, b, get_fit(y = b, x = a, data = lvk.copy())) for a in top_assisters['player_name'] for b in top_points['player_name'] if not (a == b)],  columns = ['assister', 'pointer', 'correlation'])
        res['lift'] = res['correlation'].apply(lambda x: x[0])
        res['pval'] = res['correlation'].apply(lambda x: x[1])
        res['rowcount'] = res['correlation'].apply(lambda x: x[2])
    except:
        continue
            
    res = res.sort_values('lift', ascending = False)

    res['teapot'] = team
    res['game_pk'] = game_pk
    res['game_date'] = d
    
    res = res.merge(top_assisters, left_on = ['assister', 'game_pk'], right_on = ['player_name', 'game_pk']).drop(labels = "player_name", axis = 1)
    res = res.merge(top_points, left_on = ['pointer', 'game_pk'], right_on = ['player_name', 'game_pk']).drop(labels = "player_name", axis = 1)
    
    res['is_playable'] = (res['player_l25_assists_gt1_mean_odds'] >= -200) & (res['player_l25_assists_gt1_mean_odds'] <= 200) &\
    (res['player_l25_points_gt1_mean_odds'] >= -200) & (res['player_l25_points_gt1_mean_odds'] <= 200)
    res = res[res['is_playable'] == True]

    res = res.merge(today_games[['game_pk', 'player_name', 'score_assist']], left_on = ['game_pk', 'assister'], right_on = ['game_pk', 'player_name']).drop(labels = "player_name", axis = 1)
    res = res.merge(today_games[['game_pk', 'player_name', 'score_point']], left_on = ['game_pk', 'pointer'], right_on = ['game_pk', 'player_name']).drop(labels = "player_name", axis = 1)

    res = play_v1(res)
    
    total_plays += res['is_play'].sum()
    total_wins += (res['is_play'] * res['win']).sum()
    
    all_results = all_results + [res]
    #if team == 'Columbus Blue Jackets':
    #    display(res.query('is_play == True').sort_values('correlation', ascending = False).style.apply(highlight_rows, axis = 1))
    
    
print(total_wins, total_plays, total_wins / total_plays)
all_results = pd.concat(all_results)

  return 1/(1+np.exp(-X))
  return np.sum(np.log(self.cdf(q*np.dot(X,params))))


1866 6768 0.27570921985815605


In [15]:
all_results['naive_prob'] = all_results['player_l25_points_gt1_mean'] * all_results['player_l25_assists_gt1_mean']

In [16]:
all_results['cut_lift'] = pd.cut(all_results['lift'], bins = [-1, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 1], ordered = True)
tmp = all_results.groupby(['cut_lift', 'is_play']).agg({'is_playable':'sum', 'win': 'sum', 'naive_prob': 'mean'}).sort_values('is_playable')

tmp['win_rate'] = tmp['win'] / tmp['is_playable']

tmp.sort_values('cut_lift')

Unnamed: 0_level_0,Unnamed: 1_level_0,is_playable,win,naive_prob,win_rate
cut_lift,is_play,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(-1.0, 0.0]",True,0,0,,
"(-1.0, 0.0]",False,41166,7674,0.211319,0.186416
"(0.0, 0.1]",True,0,0,,
"(0.0, 0.1]",False,8406,1777,0.225995,0.211397
"(0.1, 0.2]",True,0,0,,
"(0.1, 0.2]",False,9187,2121,0.2291,0.23087
"(0.2, 0.3]",True,2724,723,0.244732,0.265419
"(0.2, 0.3]",False,3850,984,0.23787,0.255584
"(0.3, 0.4]",False,0,0,,
"(0.3, 0.4]",True,3018,834,0.24622,0.276342


In [17]:
all_results['cut_pval'] = pd.cut(all_results['pval'], bins = [0, 0.05, 0.2, 0.5, 1], ordered = True)
tmp = all_results.groupby(['cut_pval', 'is_play']).agg({'is_playable':'sum', 'win': 'sum', 'naive_prob': 'mean'}).sort_values('is_playable')
tmp['win_rate'] = tmp['win'] / tmp['is_playable']

tmp.sort_values('cut_pval')

Unnamed: 0_level_0,Unnamed: 1_level_0,is_playable,win,naive_prob,win_rate
cut_pval,is_play,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(0.0, 0.05]",False,1003,299,0.247129,0.298106
"(0.0, 0.05]",True,6768,1866,0.246877,0.275709
"(0.05, 0.2]",True,0,0,,
"(0.05, 0.2]",False,7502,1780,0.231047,0.23727
"(0.2, 0.5]",True,0,0,,
"(0.2, 0.5]",False,8652,1841,0.22595,0.212783
"(0.5, 1.0]",True,0,0,,
"(0.5, 1.0]",False,45458,8636,0.213049,0.189978


In [18]:
all_results['cut_prob'] = pd.cut(all_results['naive_prob'], bins = [-1, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 1], ordered = True)
tmp = all_results.query('is_playable == True').groupby(['cut_prob', 'is_play']).agg({'is_playable':'sum', 'win': 'sum', 'naive_prob': 'mean'}).sort_values('is_playable')

tmp['win_rate'] = tmp['win'] / tmp['is_playable']

tmp.sort_values('cut_prob')

Unnamed: 0_level_0,Unnamed: 1_level_0,is_playable,win,naive_prob,win_rate
cut_prob,is_play,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(-1.0, 0.0]",False,0,0,,
"(-1.0, 0.0]",True,0,0,,
"(0.0, 0.1]",False,0,0,,
"(0.0, 0.1]",True,0,0,,
"(0.1, 0.2]",True,1608,350,0.171628,0.217662
"(0.1, 0.2]",False,25311,4204,0.168559,0.166094
"(0.2, 0.3]",True,3730,1047,0.244278,0.280697
"(0.2, 0.3]",False,32206,6984,0.23789,0.216854
"(0.3, 0.4]",True,1353,443,0.334214,0.327421
"(0.3, 0.4]",False,4980,1337,0.330259,0.268474


In [19]:
tmp = all_results.query('is_playable == True & score_assist == True').groupby(['is_play']).agg({'is_playable':'sum', 'win': 'sum', 'naive_prob': 'mean', 'player_l25_points_gt1_mean': 'mean'}).sort_values('is_playable')

tmp['win_rate'] = tmp['win'] / tmp['is_playable']

tmp

Unnamed: 0_level_0,is_playable,win,naive_prob,player_l25_points_gt1_mean,win_rate
is_play,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
True,2974,1866,0.252027,0.531231,0.627438
False,24123,12556,0.221728,0.489735,0.520499


In [20]:
all_results.query('is_playable == True & score_assist == True & is_play == True').sort_values('rowcount')

Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,...,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win,naive_prob,cut_lift,cut_pval,cut_prob
7,Chandler Stephenson,Mark Stone,"(0.3666666666666667, 0.012943361334873517, 50)",0.366667,0.012943,50,Vegas Golden Knights,2021020002,2021-10-12 22:00:00,0.40,...,-127.272727,True,1.0,1.0,True,True,0.2240,"(0.3, 0.4]","(0.0, 0.05]","(0.2, 0.3]"
0,Artemi Panarin,Mika Zibanejad,"(0.355877616747182, 0.013978898324145124, 50)",0.355878,0.013979,50,New York Rangers,2021020131,2021-10-31 21:00:00,0.48,...,-177.777778,True,1.0,1.0,True,True,0.3072,"(0.3, 0.4]","(0.0, 0.05]","(0.3, 0.4]"
3,Artemi Panarin,Alexis Lafrenière,"(0.35104669887278594, 0.012414825986835689, 50)",0.351047,0.012415,50,New York Rangers,2021020131,2021-10-31 21:00:00,0.48,...,127.272727,True,1.0,0.0,True,False,0.2112,"(0.3, 0.4]","(0.0, 0.05]","(0.2, 0.3]"
0,Sebastian Aho,Teuvo Teravainen,"(0.3621794871794872, 0.012460480049892177, 50)",0.362179,0.01246,50,Carolina Hurricanes,2021020617,2022-01-08 19:00:00,0.52,...,-108.333333,True,1.0,1.0,True,True,0.2704,"(0.3, 0.4]","(0.0, 0.05]","(0.2, 0.3]"
23,Dylan Larkin,Tyler Bertuzzi,"(0.2882447665056362, 0.044885426067585873, 50)",0.288245,0.044885,50,Detroit Red Wings,2022020279,2022-11-19 19:00:00,0.44,...,-108.333333,True,1.0,1.0,True,True,0.2288,"(0.2, 0.3]","(0.0, 0.05]","(0.2, 0.3]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11,Victor Hedman,Ondrej Palat,"(0.26127450980392175, 0.01360807165270377, 91)",0.261275,0.013608,91,Tampa Bay Lightning,2021020637,2022-01-11 19:00:00,0.60,...,-150.000000,True,1.0,1.0,True,True,0.3600,"(0.2, 0.3]","(0.0, 0.05]","(0.3, 0.4]"
20,Adam Fox,Mika Zibanejad,"(0.2514285714285715, 0.016632934132029695, 92)",0.251429,0.016633,92,New York Rangers,2021020671,2022-01-15 19:00:00,0.60,...,-177.777778,True,1.0,1.0,True,True,0.3840,"(0.2, 0.3]","(0.0, 0.05]","(0.3, 0.4]"
5,Anze Kopitar,Adrian Kempe,"(0.2992424242424243, 0.004467449370804714, 92)",0.299242,0.004467,92,Los Angeles Kings,2021020659,2022-01-13 22:30:00,0.44,...,-108.333333,True,1.0,1.0,True,True,0.2288,"(0.2, 0.3]","(0.0, 0.05]","(0.2, 0.3]"
17,Adam Fox,Mika Zibanejad,"(0.2548647365923113, 0.015311721407738788, 92)",0.254865,0.015312,92,New York Rangers,2021020660,2022-01-13 22:30:00,0.56,...,-177.777778,True,1.0,1.0,True,True,0.3584,"(0.2, 0.3]","(0.0, 0.05]","(0.3, 0.4]"


In [21]:
tmp = all_results.query('score_assist == True').groupby(['teapot', 'is_play']).agg({'is_playable':'sum', 'win': 'sum', 'naive_prob': 'mean', 'player_l25_points_gt1_mean': 'mean'})

tmp['win_rate'] = tmp['win'] / tmp['is_playable']
tmp['value_add'] = tmp['win_rate'] - tmp['player_l25_points_gt1_mean']

tmp.query('is_play == True').sort_values('value_add')

Unnamed: 0_level_0,Unnamed: 1_level_0,is_playable,win,naive_prob,player_l25_points_gt1_mean,win_rate,value_add
teapot,is_play,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Columbus Blue Jackets,True,39,17,0.254195,0.521026,0.435897,-0.085128
Philadelphia Flyers,True,26,10,0.188862,0.427692,0.384615,-0.043077
Detroit Red Wings,True,48,23,0.203867,0.501667,0.479167,-0.0225
New Jersey Devils,True,46,22,0.262643,0.488696,0.478261,-0.010435
Calgary Flames,True,59,30,0.23699,0.510508,0.508475,-0.002034
Nashville Predators,True,127,69,0.264945,0.531024,0.543307,0.012283
Chicago Blackhawks,True,64,37,0.292325,0.565625,0.578125,0.0125
Carolina Hurricanes,True,121,70,0.238929,0.546446,0.578512,0.032066
Washington Capitals,True,34,20,0.262965,0.554118,0.588235,0.034118
San Jose Sharks,True,91,51,0.194901,0.498462,0.56044,0.061978


In [22]:
tmp = all_results.query('is_playable == True & is_play == True & score_assist == True').groupby(['teapot', 'assister', 'pointer', 'is_play']).agg({'is_playable':'sum', 'win': 'sum', 'naive_prob': 'mean', 'player_l25_points_gt1_mean': 'mean'}).query('teapot == "Boston Bruins"').tail(50)

tmp['win_rate'] = tmp['win'] / tmp['is_playable']
tmp

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,is_playable,win,naive_prob,player_l25_points_gt1_mean,win_rate
teapot,assister,pointer,is_play,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Boston Bruins,Brad Marchand,Craig Smith,True,6,1,0.215733,0.42,0.166667
Boston Bruins,Brad Marchand,David Pastrnak,True,10,6,0.30896,0.596,0.6
Boston Bruins,Brad Marchand,Patrice Bergeron,True,33,25,0.270012,0.564848,0.757576
Boston Bruins,Brad Marchand,Taylor Hall,True,6,3,0.255467,0.6,0.5
Boston Bruins,Charlie Coyle,Craig Smith,True,7,6,0.1984,0.48,0.857143
Boston Bruins,Charlie Coyle,Trent Frederic,True,1,0,0.144,0.4,0.0
Boston Bruins,Charlie McAvoy,Charlie Coyle,True,6,1,0.276267,0.593333,0.166667
Boston Bruins,Charlie McAvoy,Jake DeBrusk,True,3,1,0.2768,0.493333,0.333333
Boston Bruins,Charlie McAvoy,Taylor Hall,True,5,3,0.27456,0.576,0.6
Boston Bruins,David Pastrnak,Brad Marchand,True,15,15,0.293867,0.610667,1.0


In [23]:
all_results.query('is_playable == True').groupby(['score_assist', 'score_point'])['win'].count()

score_assist  score_point
0.0           0.0            24288
              1.0            17998
1.0           0.0            12675
              1.0            14422
Name: win, dtype: int64

In [24]:
all_results.query('is_playable == True').groupby(['is_play', 'score_assist', 'score_point'])['win'].count()

is_play  score_assist  score_point
False    0.0           0.0            22165
                       1.0            16327
         1.0           0.0            11567
                       1.0            12556
True     0.0           0.0             2123
                       1.0             1671
         1.0           0.0             1108
                       1.0             1866
Name: win, dtype: int64

In [25]:
teams = all_results.query('is_play == True').groupby(['teapot'])[['is_play', 'win']].sum().sort_values('is_play').tail(50)
teams['win_rate'] = teams['win'] / teams['is_play']
teams.sort_values('win_rate', ascending = False)

Unnamed: 0_level_0,is_play,win,win_rate
teapot,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Boston Bruins,295,114,0.386441
New York Rangers,396,150,0.378788
Edmonton Oilers,169,59,0.349112
Toronto Maple Leafs,193,66,0.341969
Pittsburgh Penguins,163,52,0.319018
Vancouver Canucks,276,88,0.318841
Winnipeg Jets,340,107,0.314706
Seattle Kraken,35,11,0.314286
Tampa Bay Lightning,359,111,0.309192
Ottawa Senators,149,45,0.302013


In [32]:
#today_games = dat.query('game_final == False')
today_games = dat.query('game_date >= "2022-12-07" & game_final == False')
#today_games = dat.query('game_date >= "2022-12-08"')

In [33]:
import numpy as np

total_plays = 0
total_wins = 0

all_results = list()

for i, r in today_games[['team', 'game_pk', 'game_date']].drop_duplicates().iterrows() :
    
    team, game_pk, d = r

    top_assisters = get_top_assisters(today_games.query('team == "{}" & game_pk == {}'.format(team, game_pk)))

    top_points = get_top_points(today_games.query('team == "{}" & game_pk == {}'.format(team, game_pk)))

    lvk = pd.pivot_table(dat.query('team == "{}" & game_date < "{}" & game_date >= "{}"'.format(team, d, d - dt.timedelta(days = 365))), index = ['game_pk'], columns = 'player_name', values = ['score_point', 'score_assist'])
    lvk.columns = [' '.join(col).strip() for col in lvk.columns.values]

    try:
        res = pd.DataFrame([(a, b, get_fit(y = b, x = a, data = lvk.copy())) for a in top_assisters['player_name'] for b in top_points['player_name'] if not (a == b)],  columns = ['assister', 'pointer', 'correlation'])
        res['lift'] = res['correlation'].apply(lambda x: x[0])
        res['pval'] = res['correlation'].apply(lambda x: x[1])
        res['rowcount'] = res['correlation'].apply(lambda x: x[2])
    except:
        continue
            
    res = res.sort_values('lift', ascending = False)

    res['teapot'] = team
    res['game_pk'] = game_pk
    res['game_date'] = d
    
    res = res.merge(top_assisters, left_on = ['assister', 'game_pk'], right_on = ['player_name', 'game_pk']).drop(labels = "player_name", axis = 1)
    res = res.merge(top_points, left_on = ['pointer', 'game_pk'], right_on = ['player_name', 'game_pk']).drop(labels = "player_name", axis = 1)
    
    res['is_playable'] = (res['player_l25_assists_gt1_mean_odds'] >= -200) & (res['player_l25_assists_gt1_mean_odds'] <= 200) &\
    (res['player_l25_points_gt1_mean_odds'] >= -200) & (res['player_l25_points_gt1_mean_odds'] <= 200)
    res = res[res['is_playable'] == True]

    res = res.merge(today_games[['game_pk', 'player_name', 'score_assist']], left_on = ['game_pk', 'assister'], right_on = ['game_pk', 'player_name']).drop(labels = "player_name", axis = 1)
    res = res.merge(today_games[['game_pk', 'player_name', 'score_point']], left_on = ['game_pk', 'pointer'], right_on = ['game_pk', 'player_name']).drop(labels = "player_name", axis = 1)

    res = play_v1(res)
    
    display(res.query('is_play == True').sort_values('lift', ascending = False).style.apply(highlight_rows, axis = 1))
        
    total_plays += res['is_play'].sum()
    total_wins += (res['is_play'] * res['win']).sum()
    
    all_results = all_results + [res]
    #if team == 'Columbus Blue Jackets':
    #    display(res.query('is_play == True').sort_values('correlation', ascending = False).style.apply(highlight_rows, axis = 1))
    
    
print(total_wins, total_plays, total_wins / total_plays)
all_results = pd.concat(all_results)

Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win


Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win
0,Drew Doughty,Anze Kopitar,"(0.4340277777777779, 0.0013851541731816877, 59)",0.434028,0.001385,59,Los Angeles Kings,2022020420,2022-12-08 19:00:00,0.52,-108.333333,0.48,108.333333,1,0.0,0.0,1,0
28,Viktor Arvidsson,Phillip Danault,"(0.35813953488372086, 0.003233210928352249, 73)",0.35814,0.003233,73,Los Angeles Kings,2022020420,2022-12-08 19:00:00,0.44,127.272727,0.56,-127.272727,1,0.0,0.0,1,0


Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win
0,Nikita Kucherov,Brayden Point,"(0.5317604355716881, 3.685140880925508e-05, 67)",0.53176,3.7e-05,67,Tampa Bay Lightning,2022020421,2022-12-08 19:00:00,0.6,-150.0,0.6,-150.0,1,0.0,0.0,1,0
6,Steven Stamkos,Victor Hedman,"(0.288497800125707, 0.01016780393845073, 80)",0.288498,0.010168,80,Tampa Bay Lightning,2022020421,2022-12-08 19:00:00,0.52,-108.333333,0.48,108.333333,1,0.0,0.0,1,0
5,Nikita Kucherov,Victor Hedman,"(0.2637362637362637, 0.03322922415918412, 67)",0.263736,0.033229,67,Tampa Bay Lightning,2022020421,2022-12-08 19:00:00,0.6,-150.0,0.48,108.333333,1,0.0,0.0,1,0
12,Victor Hedman,Alex Killorn,"(0.2520325203252034, 0.025480352288667865, 80)",0.252033,0.02548,80,Tampa Bay Lightning,2022020421,2022-12-08 19:00:00,0.44,127.272727,0.6,-150.0,1,0.0,0.0,1,0


Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win
0,Filip Forsberg,Matt Duchene,"(0.36111111111111127, 0.0018476540843081627, 72)",0.361111,0.001848,72,Nashville Predators,2022020421,2022-12-08 19:00:00,0.52,-108.333333,0.6,-150.0,1,0.0,0.0,1,0
1,Roman Josi,Matt Duchene,"(0.33333333333333337, 0.0031158992083335285, 74)",0.333333,0.003116,74,Nashville Predators,2022020421,2022-12-08 19:00:00,0.44,127.272727,0.6,-150.0,1,0.0,0.0,1,0
5,Matt Duchene,Roman Josi,"(0.3279411764705882, 0.004165637437735955, 74)",0.327941,0.004166,74,Nashville Predators,2022020421,2022-12-08 19:00:00,0.44,127.272727,0.56,-127.272727,1,0.0,0.0,1,0
11,Roman Josi,Ryan Johansen,"(0.27616279069767413, 0.01940101279835483, 75)",0.276163,0.019401,75,Nashville Predators,2022020421,2022-12-08 19:00:00,0.44,127.272727,0.52,-108.333333,1,0.0,0.0,1,0


Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win
0,Aleksander Barkov,Carter Verhaeghe,"(0.25357142857142856, 0.040310935372450576, 67)",0.253571,0.040311,67,Florida Panthers,2022020419,2022-12-08 19:30:00,0.4,150.0,0.56,-127.272727,1,0.0,0.0,1,0


Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win


Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win
0,Kyle Connor,Mark Scheifele,"(0.5008445945945946, 0.00012939129470889563, 69)",0.500845,0.000129,69,Winnipeg Jets,2022020422,2022-12-08 20:00:00,0.44,127.272727,0.6,-150.0,1,0.0,0.0,1,0


Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win
0,Robert Thomas,Pavel Buchnevich,"(0.5119825708061002, 0.00012910403334352827, 61)",0.511983,0.000129,61,St. Louis Blues,2022020422,2022-12-08 20:00:00,0.56,-127.272727,0.6,-150.0,1,0.0,0.0,1,0
5,Robert Thomas,Vladimir Tarasenko,"(0.4422348484848485, 0.0005044203652554793, 65)",0.442235,0.000504,65,St. Louis Blues,2022020422,2022-12-08 20:00:00,0.56,-127.272727,0.48,108.333333,1,0.0,0.0,1,0
10,Pavel Buchnevich,Vladimir Tarasenko,"(0.40334378265412757, 0.0019431983873546377, 62)",0.403344,0.001943,62,St. Louis Blues,2022020422,2022-12-08 20:00:00,0.44,127.272727,0.48,108.333333,1,0.0,0.0,1,0
27,Brayden Schenn,Torey Krug,"(0.3270588235294117, 0.015005629505143891, 59)",0.327059,0.015006,59,St. Louis Blues,2022020422,2022-12-08 20:00:00,0.4,150.0,0.44,127.272727,1,0.0,0.0,1,0
31,Pavel Buchnevich,Torey Krug,"(0.3218390804597702, 0.018149070153316113, 56)",0.321839,0.018149,56,St. Louis Blues,2022020422,2022-12-08 20:00:00,0.44,127.272727,0.44,127.272727,1,0.0,0.0,1,0
12,Justin Faulk,Brayden Schenn,"(0.3205128205128204, 0.007769765666861099, 69)",0.320513,0.00777,69,St. Louis Blues,2022020422,2022-12-08 20:00:00,0.4,150.0,0.56,-127.272727,1,0.0,0.0,1,0
22,Brayden Schenn,Justin Faulk,"(0.3153846153846154, 0.010801759981632785, 69)",0.315385,0.010802,69,St. Louis Blues,2022020422,2022-12-08 20:00:00,0.4,150.0,0.52,-108.333333,1,0.0,0.0,1,0


Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win
0,Jason Robertson,Joe Pavelski,"(0.4489795918367347, 0.0001243601241012718, 84)",0.44898,0.000124,84,Dallas Stars,2022020423,2022-12-08 20:30:00,0.52,-108.333333,0.6,-150.0,1,0.0,0.0,1,0
13,Jamie Benn,Tyler Seguin,"(0.406060606060606, 0.0005743620409816352, 85)",0.406061,0.000574,85,Dallas Stars,2022020423,2022-12-08 20:30:00,0.44,127.272727,0.52,-108.333333,1,0.0,0.0,1,0
2,Roope Hintz,Joe Pavelski,"(0.3055229142185666, 0.005629768479156901, 83)",0.305523,0.00563,83,Dallas Stars,2022020423,2022-12-08 20:30:00,0.6,-150.0,0.6,-150.0,1,0.0,0.0,1,0
3,Miro Heiskanen,Joe Pavelski,"(0.29658119658119664, 0.016525335478489982, 71)",0.296581,0.016525,71,Dallas Stars,2022020423,2022-12-08 20:30:00,0.44,127.272727,0.6,-150.0,1,0.0,0.0,1,0
20,Miro Heiskanen,Jamie Benn,"(0.26581196581196576, 0.030617613510212964, 71)",0.265812,0.030618,71,Dallas Stars,2022020423,2022-12-08 20:30:00,0.44,127.272727,0.52,-108.333333,1,0.0,0.0,1,0
21,Tyler Seguin,Jamie Benn,"(0.2647783251231527, 0.020636746902269076, 85)",0.264778,0.020637,85,Dallas Stars,2022020423,2022-12-08 20:30:00,0.4,150.0,0.52,-108.333333,1,0.0,0.0,1,0


Unnamed: 0,assister,pointer,correlation,lift,pval,rowcount,teapot,game_pk,game_date,player_l25_assists_gt1_mean,player_l25_assists_gt1_mean_odds,player_l25_points_gt1_mean,player_l25_points_gt1_mean_odds,is_playable,score_assist,score_point,is_play,win
0,Brady Tkachuk,Tim Stützle,"(0.28329952670723446, 0.01517133609008373, 80)",0.2833,0.015171,80,Ottawa Senators,2022020423,2022-12-08 20:30:00,0.44,127.272727,0.64,-177.777778,1,0.0,0.0,1,0


0 26 0.0
