In [1]:
import os
import pickle

In [2]:
import streamlit as st
from dotenv import load_dotenv

In [3]:
from utils.b2 import B2
from utils.modeling import *

In [4]:
# ------------------------------------------------------
#                      APP CONSTANTS
# ------------------------------------------------------
REMOTE_DATA = 'pbp-2023.csv'

In [5]:
# ------------------------------------------------------
#                        CONFIG
# ------------------------------------------------------
load_dotenv()

True

In [6]:
# load Backblaze connection
b2 = B2(endpoint=os.environ['B2_ENDPOINT'],
        key_id=os.environ['B2_KEYID'],
        secret_key=os.environ['B2_APPKEY'])

In [8]:
# ------------------------------------------------------
#                        CACHING
# ------------------------------------------------------
@st.cache_data
def get_data():
    # collect data frame of reviews and their sentiment
    b2.set_bucket(os.environ['B2_BUCKETNAME'])
    df_pbp = b2.get_df(REMOTE_DATA)

    return df_pbp



In [9]:
data = get_data()
data

2024-03-18 03:35:50.004 
  command:

    streamlit run c:\Users\Minh\miniconda3\envs\i501-project\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-03-18 03:35:50.005 No runtime found, using MemoryCacheStorageManager


Unnamed: 0,GameId,GameDate,Quarter,Minute,Second,OffenseTeam,DefenseTeam,Down,ToGo,YardLine,...,IsTwoPointConversion,IsTwoPointConversionSuccessful,RushDirection,YardLineFixed,YardLineDirection,IsPenaltyAccepted,PenaltyTeam,IsNoPlay,PenaltyType,PenaltyYards
0,2023121101,2023-12-11,3,1,28,NYG,GB,0,0,85,...,0,0,,15,OPP,0,,0,,0
1,2023121101,2023-12-11,3,1,35,NYG,GB,3,7,92,...,0,0,,8,OPP,0,,0,,0
2,2023121101,2023-12-11,3,2,19,NYG,GB,2,11,88,...,0,0,RIGHT GUARD,12,OPP,0,,0,,0
3,2023121101,2023-12-11,3,2,56,NYG,GB,1,10,89,...,0,0,CENTER,11,OPP,0,,0,,0
4,2023121101,2023-12-11,3,3,43,NYG,GB,1,10,64,...,0,0,,36,OPP,0,,0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39467,2023091000,2023-09-10,2,14,1,ATL,CAR,0,0,100,...,0,0,,0,OPP,0,,0,,0
39468,2023091000,2023-09-10,2,14,52,CAR,ATL,0,0,100,...,0,0,,0,OPP,0,,0,,0
39469,2023091002,2023-09-10,4,9,11,CLE,CIN,0,0,100,...,0,0,,0,OPP,0,,0,,0
39470,2023091000,2023-09-10,1,4,13,ATL,CAR,2,6,57,...,0,0,,43,OPP,0,,0,,0


In [10]:
st.dataframe(get_data())

DeltaGenerator()

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39472 entries, 0 to 39471
Data columns (total 45 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   GameId                          39472 non-null  int64  
 1   GameDate                        39472 non-null  object 
 2   Quarter                         39472 non-null  int64  
 3   Minute                          39472 non-null  int64  
 4   Second                          39472 non-null  int64  
 5   OffenseTeam                     39472 non-null  object 
 6   DefenseTeam                     39472 non-null  object 
 7   Down                            39472 non-null  int64  
 8   ToGo                            39472 non-null  int64  
 9   YardLine                        39472 non-null  int64  
 10  Unnamed: 10                     0 non-null      float64
 11  SeriesFirstDown                 39472 non-null  int64  
 12  Unnamed: 12                     

In [12]:
filtered_data = data[data['IsIncomplete'] != 1]
filtered_data.head(20)

Unnamed: 0,GameId,GameDate,Quarter,Minute,Second,OffenseTeam,DefenseTeam,Down,ToGo,YardLine,...,IsTwoPointConversion,IsTwoPointConversionSuccessful,RushDirection,YardLineFixed,YardLineDirection,IsPenaltyAccepted,PenaltyTeam,IsNoPlay,PenaltyType,PenaltyYards
0,2023121101,2023-12-11,3,1,28,NYG,GB,0,0,85,...,0,0,,15,OPP,0,,0,,0
1,2023121101,2023-12-11,3,1,35,NYG,GB,3,7,92,...,0,0,,8,OPP,0,,0,,0
2,2023121101,2023-12-11,3,2,19,NYG,GB,2,11,88,...,0,0,RIGHT GUARD,12,OPP,0,,0,,0
3,2023121101,2023-12-11,3,2,56,NYG,GB,1,10,89,...,0,0,CENTER,11,OPP,0,,0,,0
4,2023121101,2023-12-11,3,3,43,NYG,GB,1,10,64,...,0,0,,36,OPP,0,,0,,0
5,2023121101,2023-12-11,3,4,29,NYG,GB,2,3,55,...,0,0,RIGHT GUARD,45,OPP,0,,0,,0
6,2023121101,2023-12-11,3,6,30,NYG,GB,0,0,100,...,0,0,,0,OPP,0,,0,,0
7,2023121101,2023-12-11,3,7,31,NYG,GB,0,0,65,...,0,0,,35,OPP,0,,0,,0
8,2023121101,2023-12-11,3,7,31,NYG,GB,0,0,100,...,0,0,,0,OPP,0,,0,,0
9,2023121100,2023-12-11,2,0,23,TEN,MIA,0,0,100,...,0,0,,0,OPP,0,,0,,0


In [13]:
rush_plays = filtered_data[filtered_data['IsRush'] == 1]
pass_plays = filtered_data[filtered_data['IsPass'] == 1]

In [14]:
rush_fig, pass_fig = plot_yardage_histograms(rush_plays, pass_plays)

In [15]:
rush_fig

In [16]:
pass_fig