In [3]:
import os
import pickle
from utils.b2 import B2
from utils.modeling import *
import streamlit as st
from dotenv import load_dotenv
import numpy as np
from implicit.als import AlternatingLeastSquares
from sklearn.metrics import mean_squared_error
from scipy.sparse import coo_matrix


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# ------------------------------------------------------
#                      APP CONSTANTS
# ------------------------------------------------------
REMOTE_DATA = 'pbp.csv'

In [5]:
# ------------------------------------------------------
#                        CONFIG
# ------------------------------------------------------
load_dotenv()

True

In [6]:
# load Backblaze connection
b2 = B2(endpoint=os.environ['B2_ENDPOINT'],
        key_id=os.environ['B2_KEYID'],
        secret_key=os.environ['B2_APPKEY'])

In [7]:
# ------------------------------------------------------
#                        CACHING
# ------------------------------------------------------
@st.cache_data
def get_data():
    # collect data frame of reviews and their sentiment
    b2.set_bucket(os.environ['B2_BUCKETNAME'])
    df_pbp = b2.get_df(REMOTE_DATA)

    return df_pbp



In [8]:
data = get_data()
data

2024-04-08 18:29:00.331 
  command:

    streamlit run c:\Users\Minh\miniconda3\envs\i501-project\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-04-08 18:29:00.332 No runtime found, using MemoryCacheStorageManager


Unnamed: 0,GameId,GameDate,Quarter,Minute,Second,OffenseTeam,DefenseTeam,Down,ToGo,YardLine,...,YardLineFixed,YardLineDirection,IsPenaltyAccepted,PenaltyTeam,IsNoPlay,PenaltyType,PenaltyYards,TeamID,SitID,PlayID
0,2014113007,2014-11-30,4,2,55,LA,LV,2,3,50,...,50,OPP,0,,0,,0,1,120350,101
1,2014113007,2014-11-30,4,3,39,LA,LV,1,10,43,...,43,OWN,0,,0,,0,1,111043,101
2,2014113007,2014-11-30,4,8,8,LA,LV,3,11,35,...,35,OWN,0,,0,,0,1,131135,101
3,2014113007,2014-11-30,4,8,50,LA,LV,2,10,36,...,36,OWN,0,,0,,0,1,121036,102
4,2014113007,2014-11-30,4,9,30,LA,LV,1,10,36,...,36,OWN,0,,0,,0,1,111036,102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74383,2017010114,2017-01-01,2,0,17,KC,SD,1,10,63,...,37,OPP,0,,0,,0,4,411063,101
74384,2017010114,2017-01-01,3,8,22,KC,SD,1,10,74,...,26,OPP,0,,0,,0,4,411074,104
74385,2017010114,2017-01-01,3,7,1,KC,SD,3,5,79,...,21,OPP,0,,0,,0,4,430579,202
74386,2017010115,2017-01-01,1,5,44,SF,SEA,2,5,72,...,28,OPP,1,SF,1,ILLEGAL SHIFT,5,8,820572,203


In [9]:
rush_plays = data[data['IsRush'] == 1]
pass_plays = data[data['IsPass'] == 1]

## Model for rush plays

In [10]:
# Train ALS model for rush plays
rush_model = train_als_model(rush_plays)

  check_blas_config()
100%|██████████| 5/5 [00:06<00:00,  1.33s/it]


In [11]:
# Calculate RMSE for rush plays
rush_rmse = calculate_rmse(rush_model, rush_plays)
rush_rmse

4.832291719093181

## Model for pass plays

In [12]:
pass_model = train_als_model(pass_plays)

  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 5/5 [00:09<00:00,  1.81s/it]


In [13]:
pass_rmse = calculate_rmse(pass_model, pass_plays)
pass_rmse

5.9421040450859905

## Initial Recommender

In [14]:
import pandas as pd

In [15]:
pass_prediction = predict_ratings(pass_model)
rush_prediction = predict_ratings(rush_model)

In [16]:
# Define row and column names
row_names = pass_plays['SitID'].unique()
col_names = pass_plays['PlayID'].unique()

# Create DataFrame
pass_pred_df = pd.DataFrame(pass_prediction, index=row_names, columns=col_names)

# Display DataFrame
print(pass_pred_df.head())

                 202           203           204           205           206  \
120822 -5.401438e-04  4.215945e-02  6.568472e-01  4.766227e-02  4.996412e-02   
111582  4.035610e-11 -5.015077e-10 -3.742414e-10  1.490984e-10 -8.641438e-12   
110496 -1.134420e-03  9.040532e-01  2.997221e-02  3.167104e-02  3.367297e-02   
130862  1.689319e-03  5.554792e-02  5.940035e-02  1.040732e+00  9.656359e-02   
111534 -7.212535e-05  3.321432e-02  9.810290e-01  3.152066e-02  3.387132e-02   

                 207           208           209           210           201  \
120822  3.517948e-02  6.542810e-01  6.473297e-05 -6.245952e-04 -6.200727e-04   
111582  8.919747e-11 -2.796830e-10  3.890623e-11  3.962992e-11  3.623631e-11   
110496  2.418949e-02  3.100439e-02 -9.470042e-04 -1.174049e-03 -1.073631e-03   
130862  8.576170e-02  7.129742e-02 -4.143454e-05  1.624158e-03  1.568631e-03   
111534  2.437939e-02  3.699784e-02  4.952513e-04 -1.175781e-04 -1.156463e-04   

                 211           212    

In [17]:
# Define row and column names
row_names = rush_plays['SitID'].unique()
col_names = rush_plays['PlayID'].unique()

# Create DataFrame
rush_pred_df = pd.DataFrame(rush_prediction, index=row_names, columns=col_names)

# Display DataFrame
print(rush_pred_df.head())

                 101           102           103           104           106  \
120350  1.624361e-02  2.031209e-02  2.094308e-02  7.051036e-01  2.922223e-02   
111043  1.794264e-02  1.298140e-01  2.699988e-02  3.607343e-02  8.580015e-01   
131135  9.621323e-01  9.929903e-03  5.541613e-02  1.197270e-02  1.022447e-02   
121036  9.836036e-01  9.351610e-03  9.065338e-03  1.092013e-02  8.825424e-03   
111036 -6.867229e-11  2.731634e-10 -3.199256e-11  4.119473e-10 -2.680014e-10   

                 107           108  
120350  1.794198e-02  7.605675e-01  
111043  8.852803e-01  2.305834e-02  
131135  5.960927e-03  9.192367e-03  
121036  6.023289e-03  9.500869e-03  
111036  4.547991e-10  3.443306e-11  


In [18]:
best_pass = pass_pred_df.loc[111064].nlargest(2)
best_pass

203    1.023975
206    0.038029
Name: 111064, dtype: float32

In [19]:
def get_play(team, down, distance, yardline, pass_pred_df, rush_pred_df, plays_df):
    teamId = data.loc[data['OffenseTeam'] == team, 'TeamID'].values[0]
    SitId = int(yardline + 100 * distance + 10000 * down + 100000 * teamId)
    
    if SitId in pass_pred_df.index:
        best_pass = pass_pred_df.loc[SitId].nlargest(2)
        print("Top Pass Choice:", plays_df.loc[plays_df['PlayID'] == int(best_pass.index[0]), 'PassType'].values[0])
        print("Predicted Gain:", round(best_pass.iloc[0]), "Yards")
        
        print("Second Pass Choice:", plays_df.loc[plays_df['PlayID'] == int(best_pass.index[1]), 'PassType'].values[0])
        print("Predicted Gain:", round(best_pass.iloc[1]), "Yards")
    else:
        best_pass = pass_pred_df.mean().nlargest(2)
        print("Top Pass Choice:", plays_df.loc[plays_df['PlayID'] == int(best_pass.index[0]), 'PassType'].values[0])
        print("Predicted Gain:", round(best_pass.iloc[0]), "Yards")
        
        print("Second Pass Choice:", plays_df.loc[plays_df['PlayID'] == int(best_pass.index[1]), 'PassType'].values[0])
        print("Predicted Gain:", round(best_pass.iloc[1]), "Yards")

    if SitId in rush_pred_df.index:
        best_rush = rush_pred_df.loc[SitId].nlargest(2)
        print("Top Run Choice:", plays_df.loc[plays_df['PlayID'] == int(best_rush.index[0]), 'RushDirection'].values[0])
        print("Predicted Gain:", round(best_rush.iloc[0]), "Yards")
        
        print("Second Run Choice:", plays_df.loc[plays_df['PlayID'] == int(best_rush.index[1]), 'RushDirection'].values[0])
        print("Predicted Gain:", round(best_rush.iloc[1]), "Yards")
    else:
        best_rush = rush_pred_df.mean().nlargest(2)
        print("Top Run Choice:", plays_df.loc[plays_df['PlayID'] == int(best_rush.index[0]), 'RushDirection'].values[0])
        print("Predicted Gain:", round(best_rush.iloc[0]), "Yards")
        
        print("Second Run Choice:", plays_df.loc[plays_df['PlayID'] == int(best_rush.index[1]), 'RushDirection'].values[0])
        print("Predicted Gain:", round(best_rush.iloc[1]), "Yards")

In [20]:
get_play('NE', 1, 10, 20, pass_pred_df, rush_pred_df, data)

Top Pass Choice: INTERCEPTED BY
Predicted Gain: 0 Yards
Second Pass Choice: DEEP LEFT
Predicted Gain: 0 Yards
Top Run Choice: RIGHT END
Predicted Gain: 1 Yards
Second Run Choice: LEFT END
Predicted Gain: 0 Yards
