In [1]:
import os
import pickle
from utils.b2 import B2
from utils.modeling import *
import streamlit as st
from dotenv import load_dotenv
import numpy as np
from implicit.als import AlternatingLeastSquares
from sklearn.metrics import mean_squared_error
from scipy.sparse import coo_matrix
import category_encoders as ce
import pandas as pd
import pymc as pm
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, balanced_accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor



In [2]:
# ------------------------------------------------------
#                      APP CONSTANTS
# ------------------------------------------------------
REMOTE_DATA = 'pbp.csv'

In [3]:
# ------------------------------------------------------
#                        CONFIG
# ------------------------------------------------------
load_dotenv()

True

In [4]:
# load Backblaze connection
b2 = B2(endpoint=os.environ['B2_ENDPOINT'],
        key_id=os.environ['B2_KEYID'],
        secret_key=os.environ['B2_APPKEY'])

In [5]:
# ------------------------------------------------------
#                        CACHING
# ------------------------------------------------------
@st.cache_data
def get_data():
    # collect data frame of reviews and their sentiment
    b2.set_bucket(os.environ['B2_BUCKETNAME'])
    df_pbp = b2.get_df(REMOTE_DATA)

    return df_pbp



In [26]:
data = get_data()

In [27]:
rush_data = data[data['IsRush'] == 1]
pass_data = data[data['IsPass'] == 1]

In [29]:
# Train pass play model
X_pass = pass_data[['SitID', 'PlayID']]
y_pass = pass_data['Yards']
# Train rush play model
X_rush = rush_data[['SitID', 'PlayID']]
y_rush = rush_data['Yards']

In [32]:
pass_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
pass_regressor.fit(X_pass, y_pass)
rush_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rush_regressor.fit(X_rush, y_rush)

In [102]:
rush_data['Predicted Yards'] = rush_regressor.predict(X_rush)

best_rush_plays = rush_data.sort_values(by='Predicted Yards', ascending=False)

print(best_rush_plays[['SitID', 'PlayID', 'Predicted Yards']])

         SitID  PlayID  Predicted Yards
21881  1011003     108        60.409167
21      110511     107        58.130000
21496   611521     101        55.973333
63765  1820926     103        52.470000
43131  2811015     108        51.938571
...        ...     ...              ...
65845  2710694     102        -6.473333
13722  2011065     106        -6.530000
23778  3120642     106        -6.753333
55048  2420742     104        -7.250000
69567  1940823     101       -13.240000

[37735 rows x 3 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rush_data['Predicted Yards'] = rush_regressor.predict(X_rush)


In [101]:
pass_data['Predicted Yards'] = pass_regressor.predict(X_pass)

best_pass_plays = pass_data.sort_values(by='Predicted Yards', ascending=False)

print(best_pass_plays[['SitID', 'PlayID', 'Predicted Yards']])

         SitID  PlayID  Predicted Yards
10667  1711083     204        76.258333
17878  2411006     205        75.500000
42445  3120613     205        71.640000
29134  2410199     203        70.460000
11221   330711     205        70.120000
...        ...     ...              ...
374     421264     203        -3.590000
7338   1620874     202        -3.918000
38163  3132250     207        -4.190000
18141  1021042     202        -8.600000
74345   720298     203        -8.837500

[36653 rows x 3 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_data['Predicted Yards'] = pass_regressor.predict(X_pass)


In [89]:
best_pass_plays.info()

<class 'pandas.core.frame.DataFrame'>
Index: 36653 entries, 10667 to 74345
Data columns (total 49 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   GameId                          36653 non-null  int64  
 1   GameDate                        36653 non-null  object 
 2   Quarter                         36653 non-null  int64  
 3   Minute                          36653 non-null  int64  
 4   Second                          36653 non-null  int64  
 5   OffenseTeam                     36652 non-null  object 
 6   DefenseTeam                     36653 non-null  object 
 7   Down                            36653 non-null  int64  
 8   ToGo                            36653 non-null  int64  
 9   YardLine                        36653 non-null  int64  
 10  Unnamed: 10                     0 non-null      float64
 11  SeriesFirstDown                 36653 non-null  int64  
 12  Unnamed: 12                     0

In [90]:
best_pass_plays['SitID']

10667    1711083
17878    2411006
42445    3120613
29134    2410199
11221     330711
          ...   
374       421264
7338     1620874
38163    3132250
18141    1021042
74345     720298
Name: SitID, Length: 36653, dtype: int64

In [126]:
def getPlay(team, down, distance, yardline):
    teamId = data.loc[data['OffenseTeam'] == team, 'TeamID'].values[0]
    SitId = yardline + 100 * distance + 10000 * down + 100000 * teamId
    SitId = 1711083
    if SitId in best_pass_plays['SitID'].values:
        best_passes = best_pass_plays[best_pass_plays['SitID'] == SitId]
        best_passes = best_passes.nlargest(2, 'Predicted Yards')
        print('First Pass Choice: ', best_passes.iloc[0]['PassType'])
        print('Predicted Gain: ', round(float(best_passes.iloc[0]['Predicted Yards'])))
        print('Second Pass Choice: ', best_passes.iloc[1]['PassType'])
        print('Predicted Gain: ', round(float(best_passes.iloc[1]['Predicted Yards'])))

    if SitId in best_rush_plays['SitID'].values:
        best_rushes = best_rush_plays[best_rush_plays['SitID'] == SitId]
        best_rushes = best_rushes.nlargest(2, 'Predicted Yards')
        print('First Rush Choice: ', best_rushes.iloc[0]['RushDirection'])
        print('Predicted Gain: ', round(float(best_rushes.iloc[0]['Predicted Yards'])))
        print('Second Rush Choice: ', best_rushes.iloc[1]['RushDirection'])
        print('Predicted Gain: ', round(float(best_rushes.iloc[1]['Predicted Yards'])))

In [127]:
getPlay('NE', 1, 10, 20)

First Pass Choice:  DEEP LEFT
Predicted Gain:  76
Second Pass Choice:  SHORT MIDDLE
Predicted Gain:  4
First Rush Choice:  LEFT END
Predicted Gain:  8
Second Rush Choice:  LEFT TACKLE
Predicted Gain:  5
