In [1]:
import pandas as pd
import os
from utility_db_25 import get_motion_cols, get_momentum_cols, create_momentum_index, motion_complexity_score
import lightgbm

In [2]:
root_dir = os.getcwd()

In [3]:
train_data=pd.read_csv(os.path.join(root_dir, "data/train_data.csv"))

In [4]:
last_team = train_data['possessionTeam'][0] # monitor what the last team updated was, implies switch if different
last_game = train_data['gameId'][0]
pnum=0 # play number of drive
pc = 0 # pass count
pnum_ls = [] # play number list

# loop over plays
for index, row in train_data.iterrows():

    curr_team = row['possessionTeam']
    curr_game = row['gameId']

    # if we've switched teams, reset drive tracking info/add last drive's info to running list
    if (last_team != curr_team) | (last_game != curr_game):

        # reset pass count, play number for drive
        last_team = curr_team # reset team to know we're on current drive now
        pc = 0 # reset pass count, etc.
        pnum = 0
        
        if last_game != curr_game:
            last_game = curr_game

    # update current drive's play count/pass rate
    if row['pass']:
        pc+=1
    pnum += 1
    pr = pc/pnum
    
    # update pass rate, play number, possession, etc. for current drive
    pnum_ls.append(pnum)

In [5]:
train_data['drive_play_num'] = pnum_ls

In [6]:
td_ds = train_data[train_data['drive_play_num'] == 1][['gameId','playId','possessionTeam','drive_play_num']].copy() # drive starts

In [7]:
td_ds['drive_num'] = td_ds.groupby(['gameId','possessionTeam'])['drive_play_num'].cumsum()

In [8]:
train_data = train_data.merge(td_ds[['gameId','playId','possessionTeam','drive_num']],how='left',on=['gameId','playId','possessionTeam'])

In [9]:
train_data['drive_num'] = train_data['drive_num'].ffill()

In [10]:
long_drives = train_data.groupby(['gameId','possessionTeam','drive_num'])['drive_play_num'].max().reset_index().sort_values(by='drive_play_num',ascending=False)

In [11]:
long_drives.iloc[135:160,:]

Unnamed: 0,gameId,possessionTeam,drive_num,drive_play_num
2401,2022103008,PIT,7.0,12
482,2022091805,NYG,2.0,12
976,2022092600,NYG,2.0,12
1290,2022100213,TB,5.0,12
492,2022091806,NE,1.0,12
2451,2022103011,NYG,7.0,12
1620,2022100913,CIN,9.0,12
500,2022091806,NE,9.0,12
1802,2022101606,NYG,7.0,12
643,2022092200,CLE,4.0,12


In [12]:
motion_cols,rb_mot=get_motion_cols(train_data.columns)
momentum_cols,rb_mom=get_momentum_cols(train_data.columns)
train_data=create_momentum_index(train_data, momentum_cols,rb_mot)
train_data=motion_complexity_score(train_data, motion_cols,rb_mom)

In [13]:
train_data['xpass_bmi'] = train_data['off_xpass']-train_data['box_bmi']-.4*train_data['qb_pass_rate_ewm']
train_data['contextual_tempo'] = train_data['tempo'] +train_data['xpass_bmi']
final_features = [ 'QB_RB1_offset','n_offense_backfield',#'receiverAlignment_3x2' #'QBdffy_RB', 'QBdff_T', 
        'motion-momentum', 'contextual_tempo', # 'xpass_bmi',#'qb_pass_rate_ewm','ipa','off_xpass',
        'neg_Formations', 'mean_pairwise_dist'] #'xpass_situational', 

X = train_data[final_features]
y = train_data['pass']

best_params = {'boosting_type': 'dart',
 'max_depth': 4,
 'deterministic': True,
 'learning_rate': 0.1,
 'n_estimators': 75,
 'num_leaves': 50,
 'random_state': 42}
best_model = lightgbm.LGBMClassifier(**best_params)
best_model.fit(X,y)

[LightGBM] [Info] Number of positive: 8801, number of negative: 5755
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001753 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 784
[LightGBM] [Info] Number of data points in the train set: 14556, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.604630 -> initscore=0.424796
[LightGBM] [Info] Start training from score 0.424796


In [14]:
td_example = train_data[((train_data['gameId'] == 2022100913) & (train_data['possessionTeam'] == 'CIN')) & (train_data['drive_num']==9)]

In [15]:
td_example

Unnamed: 0,gameId,playId,quarter,down,yardsToGo,score_differential,time_remaining,playNullifiedByPenalty,preSnapHomeTeamWinProbability,pass_rush_ratio,...,QB_RB1_offset,QBdffy_G,QBdffy_T,QBdffy_WR,presnap_motion_complexity,motion-momentum,neg_Formations,neg_alignment,xpass_bmi,contextual_tempo
9296,2022100913,3289,4,1,10,6,9.7,N,0.789131,3.18,...,6.40529,8.85131,13.319828,17.399503,0.0,0.0,0,0,0.092919,0.1453
9297,2022100913,3310,4,2,3,6,8.983333,N,0.774265,3.18,...,6.112377,8.923134,13.395597,18.023776,0.0,0.0,0,0,0.09349,0.065308
9298,2022100913,3331,4,1,10,6,8.266667,N,0.749209,3.18,...,30.429658,10.95368,16.388474,18.970052,-1.0,-2.0,0,0,0.053112,0.011375
9299,2022100913,3386,4,1,10,6,8.133333,N,0.744499,3.18,...,6.552874,11.080326,16.521259,17.847678,0.0,-2.0,1,0,0.123248,0.128293
9300,2022100913,3407,4,2,6,6,7.516667,N,0.749294,3.18,...,67.548474,9.114739,13.745319,17.022804,0.0,0.0,0,0,0.119533,0.122263
9301,2022100913,3452,4,2,7,6,6.133333,N,0.724349,3.18,...,4.152519,10.883646,16.331042,20.112083,0.0,0.0,0,0,0.119877,0.117593
9302,2022100913,3486,4,3,1,6,5.366667,N,0.715032,3.18,...,3.147109,10.860949,16.223266,18.798102,-1.0,-1.0,1,0,0.120186,0.121572
9303,2022100913,3510,4,1,10,6,4.65,N,0.700927,3.18,...,7.57601,10.838673,16.429167,19.878796,0.0,0.0,0,1,0.084036,0.084244
9304,2022100913,3531,4,2,5,6,4.016667,N,0.685922,3.18,...,4.57928,10.836973,16.338994,19.852253,0.0,0.0,0,0,0.085379,0.082097
9305,2022100913,3555,4,3,5,6,3.35,N,0.750003,3.18,...,4.562842,10.830385,16.303678,19.345558,0.0,0.0,0,0,0.114351,0.129001


In [16]:
preds = [x[1] for x in list(best_model.predict_proba(td_example[final_features]))]

In [17]:
nf_ls = list(td_example['neg_Formations'])

In [18]:
import plotly.graph_objects as go
import pandas as pd

dates = range(1,len(td_example)+1)
feat_a = preds#td_example['off_xpass']#[86.77, 80.74, 79.48, 76.47, 75.44, 74.49, 70.41]
feat_b = td_example['neg_Formations']#[92.77, 91.64, 90.68, 92.37, 92.84, 90.29, 92.71]

df = pd.DataFrame(list(zip(dates, feat_a, feat_b)),
                  columns=['date', 'feat_a', 'feat_b'])

fig = go.Figure(
    layout=go.Layout(
        updatemenus=[dict(type="buttons", direction="right", x=0.9, y=1.26), ],
        xaxis=dict(range=[0, len(df)+1],
                   autorange=False, tickwidth=2,
                   title_text="Time"),
        yaxis=dict(range=[0, 1],
                   autorange=False,
                   title_text="Price")
    ))

# Add traces
i = 1

fig.add_trace(
    go.Scatter(x=df.date[:i],
               y=df.feat_a[:i],
               name="Model Pass Odds",
               visible=True,
               line=dict(color="#f47738", dash="dash")))


fig.add_trace(
    go.Scatter(x=df.date[:i],
               y=df.feat_b[:i],
               name="Run Formation",
               visible=True,
               line=dict(color="#1d70b8", dash="dash")))

                                                   
jmax = 0
#Animation
ur_ls = []
for k in range(1, len(df) + 1):

    data_ls=[
            go.Scatter(x=df.date[:k+1], y=df.feat_a[:k+1]),
            go.Scatter(x=df.date[:k+1], y=df.feat_b[:k+1]),
                ]
    ec = 0
    for j in range(k):
        
        if nf_ls[j]:
            ec+=1
            
            if j > jmax: 
                if ec == 1:
                    fig.add_trace(go.Scatter(x=[0,0], 
                                    y=[0,1], 
                                    mode='lines', 
                                    line=dict(color='green', width=3),
                                    name='Run Play'))
                
                else:
                    fig.add_trace(go.Scatter(x=[0,0], 
                                    y=[0,1], 
                                    mode='lines', 
                                    line=dict(color='green', width=3),
                                    name=str(ec)))
               
                jmax=j

            if ec == 1:
                data_ls.append(
                            go.Scatter(x=[j+1,j+1], 
                                y=[0,1], 
                                mode='lines', 
                                line=dict(color='green', width=3),
                                name='Run Play'))

            else:
                data_ls.append(
                            go.Scatter(x=[j+1,j+1], 
                                y=[0,1], 
                                mode='lines', 
                                line=dict(color='green', width=3),
                                name=str(ec)))
                
            

                            
    ur_ls.append(go.Frame(data=data_ls))

fig.update(frames=ur_ls)

fig.update_xaxes(ticks="outside", tickwidth=2, tickcolor='white', ticklen=10)
fig.update_yaxes(ticks="outside", tickwidth=2, tickcolor='white', ticklen=1)
fig.update_layout(yaxis_tickformat=',')
fig.update_layout(legend=dict(x=0, y=1.1), legend_orientation="h")

for trace in fig['data']: 
    if(not trace['name'] in(["Model Pass Odds",'Run Play'])): trace['showlegend'] = False
# Buttons
fig.update_layout(title="Run Formation Impact",
                  xaxis_title="Drive Play #",
                  yaxis_title="Pass Probability",
                  legend_title="",
                  showlegend=True,
                  font=dict(
                      family="Arial",
                      size=14
                  ),
                  paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='rgba(0,0,0,0)',
                  hovermode="x",
                  updatemenus=[
                        dict(
                            buttons=list([
                                dict(label="Play",
                                     method="animate",
                                     args=[None, {"frame": {"duration": 500}}]),
                                
                                          
                            ]))])



fig.write_json("image/cincy_drive.json")

In [19]:
fig.show()

In [20]:
'''dict(label="Model Pass Odds",
                                     method="update",
                                     args=[{"visible": [False, True]},
                                           {"showlegend": True}]),
                                dict(label="Run Formation Flag",
                                     method="update",
                                     args=[{"visible": [True, False]},
                                          {"showlegend": True}])#,'''

'dict(label="Model Pass Odds",\n                                     method="update",\n                                     args=[{"visible": [False, True]},\n                                           {"showlegend": True}]),\n                                dict(label="Run Formation Flag",\n                                     method="update",\n                                     args=[{"visible": [True, False]},\n                                          {"showlegend": True}])#,'

In [21]:
fig.layout.sliders

()

In [22]:
fig.layout.sliders

()

In [23]:

'''
import io
import PIL

frames=[]
for s, fr in enumerate(fig.frames):

    fig.update(data=fr.data)
    #fig.layout.sliders[0].update(active=s)
    frames.append(PIL.Image.open(io.BytesIO(fig.to_image(format="png",scale=1))))


    frames[0].save("image/bengals_formations.gif",
                    save_all=True,
                    append_images=frames[1:],
                    optimize=True,
                    duration=100,
                    loop=0,
                    dither=None)
'''

'\nimport io\nimport PIL\n\nframes=[]\nfor s, fr in enumerate(fig.frames):\n\n    fig.update(data=fr.data)\n    #fig.layout.sliders[0].update(active=s)\n    frames.append(PIL.Image.open(io.BytesIO(fig.to_image(format="png",scale=1))))\n\n\n    frames[0].save("image/bengals_formations.gif",\n                    save_all=True,\n                    append_images=frames[1:],\n                    optimize=True,\n                    duration=100,\n                    loop=0,\n                    dither=None)\n'