In [None]:
import pandas as pd
import os
from utility_db_25 import get_motion_cols, get_momentum_cols, create_momentum_index, motion_complexity_score
import lightgbm

In [6]:
root_dir = os.getcwd()

In [7]:
train_data=pd.read_csv(os.path.join(root_dir, "data/train_data.csv"))

In [None]:
last_team = train_data['possessionTeam'][0] # monitor what the last team updated was, implies switch if different
last_game = train_data['gameId'][0]
pnum=0 # play number of drive
pc = 0 # pass count
pnum_ls = [] # play number list

# loop over plays
for index, row in train_data.iterrows():

    curr_team = row['possessionTeam']
    curr_game = row['gameId']

    # if we've switched teams, reset drive tracking info/add last drive's info to running list
    if (last_team != curr_team) | (last_game != curr_game):

        # reset pass count, play number for drive
        last_team = curr_team # reset team to know we're on current drive now
        pc = 0 # reset pass count, etc.
        pnum = 0
        
        if last_game != curr_game:
            last_game = curr_game

    # update current drive's play count/pass rate
    if row['pass']:
        pc+=1
    pnum += 1
    pr = pc/pnum
    
    # update pass rate, play number, possession, etc. for current drive
    pnum_ls.append(pnum)

In [29]:
train_data['drive_play_num'] = pnum_ls

In [34]:
td_ds = train_data[train_data['drive_play_num'] == 1][['gameId','playId','possessionTeam','drive_play_num']].copy() # drive starts

In [None]:
td_ds['drive_num'] = td_ds.groupby(['gameId','possessionTeam'])['drive_play_num'].cumsum()

In [40]:
train_data = train_data.merge(td_ds[['gameId','playId','possessionTeam','drive_num']],how='left',on=['gameId','playId','possessionTeam'])

In [41]:
train_data['drive_num'] = train_data['drive_num'].ffill()

In [43]:
long_drives = train_data.groupby(['gameId','possessionTeam','drive_num'])['drive_play_num'].max().reset_index().sort_values(by='drive_play_num',ascending=False)

In [45]:
long_drives.head(20)

Unnamed: 0,gameId,possessionTeam,drive_num,drive_play_num
529,2022091808,SF,1.0,67
1783,2022101605,NO,5.0,26
574,2022091811,ARI,8.0,25
2108,2022102308,HOU,7.0,20
1484,2022100907,MIA,6.0,20
1543,2022100909,WAS,11.0,18
2358,2022103006,NO,5.0,18
535,2022091809,CIN,6.0,18
327,2022091200,DEN,4.0,18
1739,2022101603,JAX,8.0,18


In [69]:
motion_cols=get_motion_cols(train_data.columns)
momentum_cols=get_momentum_cols(train_data.columns)
train_data=create_momentum_index(train_data, momentum_cols)
train_data=motion_complexity_score(train_data, motion_cols)

In [94]:
train_data['xpass_bmi'] = train_data['off_xpass']-train_data['box_ewm_dl_bmi']-.4*train_data['qb_pass_rate_ewm']
train_data['contextual_tempo'] = train_data['tempo'] +train_data['xpass_bmi']
final_features = [ 'QB_RB1_offset','n_offense_backfield',#'receiverAlignment_3x2' #'QBdffy_RB', 'QBdff_T', 
        'motion-momentum', 'contextual_tempo', # 'xpass_bmi',#'qb_pass_rate_ewm','ipa','off_xpass',
        'neg_Formations', 'mean_pairwise_dist'] #'xpass_situational', 

X = train_data[final_features]
y = train_data['pass']

best_params = {'boosting_type': 'dart',
 'depth': 4,
 'deterministic': True,
 'learning_rate': 0.1,
 'n_estimators': 75,
 'num_leaves': 50,
 'random_state': 42}
best_model = lightgbm.LGBMClassifier(**best_params)
best_model.fit(X,y)

[LightGBM] [Info] Number of positive: 8801, number of negative: 5755
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000684 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 777
[LightGBM] [Info] Number of data points in the train set: 14556, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.604630 -> initscore=0.424796
[LightGBM] [Info] Start training from score 0.424796


In [95]:
td_example = train_data[((train_data['gameId'] == 2022102302) & (train_data['possessionTeam'] == 'CIN')) & (train_data['drive_num']==9)]

In [96]:
preds = [x[1] for x in list(best_model.predict_proba(td_example[final_features]))]



In [126]:
nf_ls = list(td_example['neg_Formations'])

In [None]:
import plotly.graph_objects as go
import pandas as pd

dates = range(len(td_example))
feat_a = preds#td_example['off_xpass']#[86.77, 80.74, 79.48, 76.47, 75.44, 74.49, 70.41]
feat_b = td_example['neg_Formations']#[92.77, 91.64, 90.68, 92.37, 92.84, 90.29, 92.71]

df = pd.DataFrame(list(zip(dates, feat_a, feat_b)),
                  columns=['date', 'feat_a', 'feat_b'])

fig = go.Figure(
    layout=go.Layout(
        updatemenus=[dict(type="buttons", direction="right", x=0.9, y=1.26), ],
        xaxis=dict(range=[0, len(df)+1],
                   autorange=False, tickwidth=2,
                   title_text="Time"),
        yaxis=dict(range=[0, 1],
                   autorange=False,
                   title_text="Price")
    ))

# Add traces
i = 1

fig.add_trace(
    go.Scatter(x=df.date[:i],
               y=df.feat_a[:i],
               name="Model Pass Odds",
               visible=True,
               line=dict(color="#f47738", dash="dash")))

fig.add_trace(
    go.Scatter(x=df.date[:i],
               y=df.feat_b[:i],
               name="Neg. Formations",
               visible=True,
               line=dict(color="#1d70b8", dash="dash")))
            
                                                   
jmax = 0
#Animation
ur_ls = []
for k in range(i, len(df) + 1):

    data_ls=[
            go.Scatter(x=df.date[1:k], y=df.feat_a[:k]),
            go.Scatter(x=df.date[1:k], y=df.feat_b[:k]),
                ]
    ec = 0
    for j in range(k):
        
        if nf_ls[j]:
            ec+=1
            
            if j > jmax: 
                if ec == 1:
                    fig.add_trace(go.Scatter(x=[0,0], 
                                    y=[0,1], 
                                    mode='lines', 
                                    line=dict(color='green', width=3),
                                    name='Neg. Formation Occurrence'))
                
                else:
                    fig.add_trace(go.Scatter(x=[0,0], 
                                    y=[0,1], 
                                    mode='lines', 
                                    line=dict(color='green', width=3),
                                    name=str(ec)))
               
                jmax=j

            if ec == 1:
                data_ls.append(
                            go.Scatter(x=[j+1,j+1], 
                                y=[0,1], 
                                mode='lines', 
                                line=dict(color='green', width=3),
                                name='Neg. Formation Occurrence'))

            else:
                data_ls.append(
                            go.Scatter(x=[j+1,j+1], 
                                y=[0,1], 
                                mode='lines', 
                                line=dict(color='green', width=3),
                                name=str(ec)))
                
            

                            
    ur_ls.append(go.Frame(data=data_ls))

fig.update(frames=ur_ls)

fig.update_xaxes(ticks="outside", tickwidth=2, tickcolor='white', ticklen=10)
fig.update_yaxes(ticks="outside", tickwidth=2, tickcolor='white', ticklen=1)
fig.update_layout(yaxis_tickformat=',')
fig.update_layout(legend=dict(x=0, y=1.1), legend_orientation="h")

for trace in fig['data']: 
    if(not trace['name'] in(["Model Pass Odds","Neg. Formations",'Neg. Formation Occurrence'])): trace['showlegend'] = False
# Buttons
fig.update_layout(title="Negatively Correlated Formations Impact",
                  xaxis_title="Drive Play #",
                  yaxis_title="Pass Probability",
                  legend_title="",
                  showlegend=True,
                  font=dict(
                      family="Arial",
                      size=14
                  ),
                  paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='rgba(0,0,0,0)',
                  hovermode="x",
                  updatemenus=[
                        dict(
                            buttons=list([
                                dict(label="Play",
                                     method="animate",
                                     args=[None, {"frame": {"duration": 500}}]),
                                dict(label="Model Pass Odds",
                                     method="update",
                                     args=[{"visible": [False, True]},
                                           {"showlegend": True}]),
                                dict(label="Neg. Formations",
                                     method="update",
                                     args=[{"visible": [True, False]},
                                          {"showlegend": True}])#,
                                          
                            ]))])



#fig.write_html("animated_line.html", auto_play=False)