In [None]:
import ipydeps
ipydeps.pip(['bs4', 'pandas','ipywidgets'])

In [1]:
import ipywidgets as widgets

import pandas as pd
import pickle
import requests

from bs4 import BeautifulSoup
from ipywidgets import Layout
from sklearn import preprocessing

In [2]:
# main fucntions

In [3]:
def get_nfl_team_data(teams: dict, time_period: list) -> list:
    # send requests to get data
    data_list = []
    index_cntr = 0
    for team in teams:
        for year in time_period:
            url = f'https://sports.core.api.espn.com/v2/sports/football/leagues/nfl/seasons/{year}/types/2/teams/{team["team_number"]}/statistics'
            try:
                r_data = requests.get(url).json()['splits']['categories']

                result ={'team': team['team_name']}
                stat_list = []
                for rec in r_data:
                    stats_data = rec['stats']
                    field_names, field_values = [] , []
                    for stats_rec in stats_data:
                        if stats_rec['name'].startswith(('passing','rushing', 'total')):
                            if 'totalGiveaways' not in stats_rec['name'] or 'totalTakeaways' not in stats_rec['name']:
                                stat_values = (stats_rec['name'], stats_rec['value'])
                                stat_list.append(stat_values)

                # convert to dataframe to get all values
                df = pd.DataFrame(stat_list)

                # transpose to get field values as columns
                df = df.T
                headers = df.iloc[0]
                new_df  = pd.DataFrame(df.values[1:], columns=headers)
                new_df = new_df.reset_index(drop=True)

                # add team/year information
                new_df['team_number'] = team['team_number']
                new_df['team'] = team['team_name']
                new_df['year'] = year
                new_df['record_id'] = index_cntr
                new_df = new_df.set_index('record_id')

                # convert to dict
                data_dict = new_df.to_dict(orient='records')
                data_list.append(data_dict)
                index_cntr += 1

            except KeyError as err:
                print(f'error with team: {team}, year: {year}')
                # error will happen if data 
                # does not exist for a teams year
                # skip past it for now
                continue
                
    return data_list


def get_predictions(model, data, colnms):
    
    data = data.copy()
    data = data[colnms]
    
    years = data['year'].values
    teams = data['team'].values
    
    data_scaled = pd.DataFrame(preprocessing.scale(data[colnms].iloc[:, 0:12]),columns = data.columns[0:12]).copy()
    data_scaled.loc[:,'team'] = data['team'].values
    data_scaled.loc[:,'year'] = years
    data_scaled = data_scaled.fillna(0)
    
    data_scaled.loc[:,'prediction'] = model.predict_proba(data_scaled.iloc[:,0:12].values)[:, 1]
    data_year = data_scaled[['team','year','prediction']]
    
    display(data_year.sort_values(by=['prediction'], ascending=False))

# Get data

In [4]:
print('Retrieving teams API map data')
teams_url = 'https://raw.githubusercontent.com/bdbritt/gmu_ait580_nfl_project/master/teams.csv'
teams = pd.read_csv(teams_url)
print(f'teams count: {teams.shape[0]}', '\n')

print('Retrieving ML model testing data')
ml_test_data_url = 'https://raw.githubusercontent.com/bdbritt/gmu_ait580_nfl_project/master/nfl_historic_test_data.csv'
ml_test_data = pd.read_csv(ml_test_data_url)
print(f'historic stats data: {ml_test_data.shape[0]}', '\n')

print('Retrieving 2021 team data. Please wait')
time_period = ['2021']
results = get_nfl_team_data(teams.to_dict('records'), time_period)
current_data = pd.concat([pd.DataFrame(rec) for rec in results]).reset_index(drop=True)
print(f'2021 stats data: {current_data.shape[0]}', '\n')

Retrieving teams API map data
teams count: 32 

Retrieving ML model testing data
historic stats data: 121 

Retrieving 2021 team data. Please wait




2021 stats data: 32 



In [5]:
# Data Processing

In [6]:
# combine current and test data
combined_data = pd.concat([ml_test_data, current_data]).copy()

wanted_cols = ['totalYards', 'rushingYards', 'passingYardsAtCatch', 
               'totalOffensivePlays', 'passingFumblesLost', 
               'rushingFumblesLost', 'totalPenalties', 
               'totalTwoPointConvs', 'totalTackles', 
               'totalKickingPoints',  
               'rushingBigPlays', 
               'passingBigPlays',
               'team', 'year']

combined_data['year'] = combined_data['year'] = combined_data['year'].astype(int)

combined_data = combined_data[wanted_cols].copy()

In [7]:
ALL = 'ALL'
def unique_sorted_values_plus_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, ALL)
    return unique

dropdown_state = widgets.Dropdown(options = unique_sorted_values_plus_ALL(combined_data["year"]), description='Year: ')

output_predict = widgets.Output()

pca_cols = wanted_cols = ['totalYards', 'rushingYards', 'passingYardsAtCatch', 
               'totalOffensivePlays', 'passingFumblesLost', 
               'rushingFumblesLost', 'totalPenalties', 
               'totalTwoPointConvs', 'totalTackles', 
               'totalKickingPoints',  
               'rushingBigPlays', 
               'passingBigPlays',
               'team', 'year']

model_url = 'https://raw.githubusercontent.com/bdbritt/gmu_ait580_nfl_project/master/logreg_model.pkl'
logreg = pd.read_pickle(model_url)

def event_action():
    # clear the previous selection on each iteration
    output_predict.clear_output()
    
    if (dropdown_state.value == ALL):
        common_filter = combined_data
        common_filter = common_filter[pca_cols].copy()
    
    else:
        common_filter = combined_data.loc[combined_data['year']==dropdown_state.value]
        
        with output_predict:
            get_predictions(logreg, common_filter, pca_cols)


def dropdown_state_eventhandler(change):
    event_action()
    

dropdown_state.observe(dropdown_state_eventhandler, names='value')

In [8]:
input_widgets = widgets.HBox([dropdown_state])

tab = widgets.Tab([output_predict])
tab.set_title(0, 'Prediction')

dashboard = widgets.VBox([tab], layout=Layout(height='400px'))
display(input_widgets, dashboard)

HBox(children=(Dropdown(description='Year: ', options=('ALL', 1996, 1997, 1998, 1999, 2021), value='ALL'),))

VBox(children=(Tab(children=(Output(),), _titles={'0': 'Prediction'}),), layout=Layout(height='400px'))