In [15]:
import os
import pickle
from utils.b2 import B2
from utils.modeling import *
import streamlit as st
from dotenv import load_dotenv
import numpy as np
# from implicit.als import AlternatingLeastSquares
# from sklearn.metrics import mean_squared_error
# # from scipy.sparse import coo_matrix
# # import category_encoders as ce
import pandas as pd
#import pymc as pm
# from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, balanced_accuracy_score
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

In [16]:
# ------------------------------------------------------
#                      APP CONSTANTS
# ------------------------------------------------------
REMOTE_DATA = 'pbp.csv'

In [17]:
# ------------------------------------------------------
#                        CONFIG
# ------------------------------------------------------
load_dotenv()

True

In [18]:
# load Backblaze connection
b2 = B2(endpoint=os.environ['B2_ENDPOINT'],
        key_id=os.environ['B2_KEYID'],
        secret_key=os.environ['B2_APPKEY'])

In [19]:
# ------------------------------------------------------
#                        CACHING
# ------------------------------------------------------
@st.cache_data
def get_data():
    # collect data frame of reviews and their sentiment
    b2.set_bucket(os.environ['B2_BUCKETNAME'])
    df_pbp = b2.get_df(REMOTE_DATA)

    return df_pbp

2024-04-22 23:38:27.012 No runtime found, using MemoryCacheStorageManager


In [20]:
data = get_data()

In [21]:
@st.cache_data
def prep_data(data):
    rush_data = data[data['IsRush'] == 1]
    pass_data = data[data['IsPass'] == 1]
    # Train pass play model
    X_pass = pass_data[['SitID', 'PlayID']]
    y_pass = pass_data['Yards']
    # Train rush play model
    X_rush = rush_data[['SitID', 'PlayID']]
    y_rush = rush_data['Yards']
    return X_pass, y_pass, X_rush, y_rush, rush_data, pass_data

2024-04-22 23:38:29.507 No runtime found, using MemoryCacheStorageManager


In [22]:
X_pass, y_pass, X_rush, y_rush, rush_data, pass_data = prep_data(data)

In [23]:
@st.cache_data
def get_model(X_pass, y_pass, X_rush, y_rush, rush_data, pass_data):
    pass_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
    pass_regressor.fit(X_pass, y_pass)
    rush_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
    rush_regressor.fit(X_rush, y_rush)
    rush_data.loc[:, 'Predicted Yards'] = rush_regressor.predict(X_rush)
    best_rush_plays = rush_data.sort_values(by='Predicted Yards', ascending=False)
    pass_data.loc[:, 'Predicted Yards'] = pass_regressor.predict(X_pass)
    best_pass_plays = pass_data.sort_values(by='Predicted Yards', ascending=False)
    return best_rush_plays, best_pass_plays

2024-04-22 23:38:29.645 No runtime found, using MemoryCacheStorageManager


In [24]:
best_rush_plays, best_pass_plays = get_model(X_pass, y_pass, X_rush, y_rush, rush_data, pass_data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rush_data.loc[:, 'Predicted Yards'] = rush_regressor.predict(X_rush)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_data.loc[:, 'Predicted Yards'] = pass_regressor.predict(X_pass)


In [25]:
@st.cache_data
def getPlay(team, down, distance, yardline):
    teamId = data.loc[data['OffenseTeam'] == team, 'TeamID'].values[0]
    SitId = yardline + 100 * distance + 10000 * down + 100000 * teamId
    if SitId in best_pass_plays['SitID'].values:
        best_passes = best_pass_plays[best_pass_plays['SitID'] == SitId]
        average_yards = best_passes['Yards'].mean()
        nearest_plays = best_pass_plays.iloc[(best_pass_plays['Predicted Yards'] - average_yards).abs().argsort()[:2]]
        best_passes = best_passes.nlargest(2, 'Predicted Yards')
        #sometimes the model doesn't recommend 2 plays, doing it this way ensures there's always at least 2 available plays
        best_passes = pd.concat([best_passes, nearest_plays])
        print('First Pass Choice: ', best_passes.iloc[0]['PassType'])
        print('Predicted Gain: ', round(float(best_passes.iloc[0]['Predicted Yards'])))
        print('Second Pass Choice: ', best_passes.iloc[1]['PassType'])
        print('Predicted Gain: ', round(float(best_passes.iloc[1]['Predicted Yards'])))

    else:
        best_passes = best_pass_plays[best_pass_plays['TeamID'] == teamId]
        average_yards = best_passes['Yards'].mean()
        # Select two plays nearest to the average
        nearest_plays = best_pass_plays.iloc[(best_pass_plays['Predicted Yards'] - average_yards).abs().argsort()[:2]]
        print('First Pass Choice: ', nearest_plays.iloc[0]['PassType'])
        print('Predicted Gain: ', round(float(nearest_plays.iloc[0]['Predicted Yards'])))
        print('Second Pass Choice: ', nearest_plays.iloc[1]['PassType'])
        print('Predicted Gain: ', round(float(nearest_plays.iloc[1]['Predicted Yards'])))


    if SitId in best_rush_plays['SitID'].values:
        best_rushes = best_rush_plays[best_rush_plays['SitID'] == SitId]
        average_yards = best_rushes['Yards'].mean()
        best_rushes = best_rushes.nlargest(2, 'Predicted Yards')
        nearest_plays = best_rush_plays.iloc[(best_rush_plays['Predicted Yards'] - average_yards).abs().argsort()[:2]]
        best_rushes = pd.concat([best_rushes, nearest_plays])
        print('First Rush Choice: ', best_rushes.iloc[0]['RushDirection'])
        print('Predicted Gain: ', round(float(best_rushes.iloc[0]['Predicted Yards'])))
        print('Second Rush Choice: ', best_rushes.iloc[1]['RushDirection'])
        print('Predicted Gain: ', round(float(best_rushes.iloc[1]['Predicted Yards'])))

    else:
        best_rushes = best_rush_plays[best_rush_plays['TeamID'] == teamId]
        average_yards = best_rushes['Yards'].mean()
        # Select two plays nearest to the average
        nearest_plays = best_rush_plays.iloc[(best_rush_plays['Predicted Yards'] - average_yards).abs().argsort()[:2]]
        print('First Pass Choice: ', nearest_plays.iloc[0]['RushDirection'])
        print('Predicted Gain: ', round(float(nearest_plays.iloc[0]['Predicted Yards'])))
        print('Second Pass Choice: ', nearest_plays.iloc[1]['RushDirection'])
        print('Predicted Gain: ', round(float(nearest_plays.iloc[1]['Predicted Yards'])))

2024-04-22 23:38:42.035 No runtime found, using MemoryCacheStorageManager


In [26]:
getPlay('NE', 1, 5, 95)

2024-04-22 23:38:42.060 No runtime found, using MemoryCacheStorageManager


First Pass Choice:  SHORT MIDDLE
Predicted Gain:  6
Second Pass Choice:  SHORT MIDDLE
Predicted Gain:  5
First Rush Choice:  LEFT TACKLE
Predicted Gain:  4
Second Rush Choice:  RIGHT GUARD
Predicted Gain:  1
