In [1]:
import pandas as pd
import numpy as np
import os

import re

from functools import reduce

import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [2]:
weekly_data_tables = os.listdir('weekly_runs_2022/')
print(weekly_data_tables)

['week_1.csv', 'week_10.csv', 'week_2.csv', 'week_3.csv', 'week_4.csv', 'week_5.csv', 'week_6.csv', 'week_7.csv', 'week_8.csv', 'week_9.csv']


In [3]:
collection = []

for weekly_data in weekly_data_tables:
    collection.append(pd.read_csv('weekly_runs_2022/' + weekly_data))

In [4]:
%%time

pred_types = ['pred_logit', 'pred_rf', 'pred_gbm', 'pred_lgb', 'pred_catboost', 'pred_mean', 'pred_opt_combo']
pred_types_collected = {}

for pred_type in pred_types:
    
    keep_cols = ['Team', 'season', 'week_marker', pred_type]    
    pred_types_data = []
    
    for i in collection:
        data_for_pred_type = i[keep_cols].copy()
        
        week_marker = data_for_pred_type['week_marker'].unique().item()
        data_for_pred_type.rename(columns = {pred_type : pred_type + '_' + week_marker}, inplace = True)
        data_for_pred_type.drop(['week_marker', 'season'], 1, inplace = True)
        
        pred_types_data.append(data_for_pred_type)
        
    pred_types_data = reduce(lambda  a,b: pd.merge(a,b, on = ['Team'], how='outer'), pred_types_data)
    pred_types_data.sort_values(pred_types_data.columns[-1], ascending = False, inplace = True)
    
    columns = [re.sub('[^0-9]', '', i) if i != 'Team' else i for i in pred_types_data.columns]
    pred_types_data.columns = columns
        
    pred_types_collected[pred_type] = pred_types_data  

Wall time: 1.23 s


In [5]:
select_pred_type = 'pred_opt_combo'

pred_types_collected[select_pred_type].head()

Unnamed: 0,Team,1,10,2,3,4,5,6,7,8,9
0,Buffalo Bills,0.486035,0.419295,0.491504,0.431212,0.407388,0.481661,0.476485,0.476485,0.464177,0.443649
1,Kansas City Chiefs,0.479041,0.406845,0.406226,0.500262,0.451168,0.414523,0.377577,0.42664,0.42664,0.381789
20,Philadelphia Eagles,0.309424,0.354647,0.346565,0.454667,0.389922,0.353996,0.336533,0.336533,0.375131,0.378993
18,Dallas Cowboys,0.311082,0.306277,0.309917,0.306784,0.34142,0.336636,0.32774,0.324479,0.341162,0.341162
30,San Francisco 49ers,0.293839,0.323265,0.302512,0.31116,0.38273,0.423611,0.376081,0.306966,0.333736,0.333736


In [6]:
last_week = str(max([int(i) for i in pred_types_collected[select_pred_type].columns[1:]]))
data_for_plotting = pred_types_collected[select_pred_type].sort_values(last_week, ascending = False).head(10).melt(id_vars = 'Team', var_name = 'week', value_name = 'predicted_probability')

In [7]:
data_for_plotting['week'] = data_for_plotting['week'].astype(int)
data_for_plotting.sort_values(['week', 'predicted_probability'], ascending = [True, False], inplace = True)

In [8]:
legend_order = data_for_plotting[data_for_plotting['week'] == int(last_week)]['Team'].tolist()

fig = px.line(data_for_plotting, 'week', 'predicted_probability', color = 'Team', 
              title = 'Top 10 teams to make it to the Super Bowl in the 2022 NFL season', 
              labels = {'week' : 'Regular season week', 'predicted_probability' : 'Predicted probability (making SB)'},
              width = 950, height = 475, category_orders={"Team": legend_order})
fig.update_yaxes(tickfont = {'size' : 15}, showline=True, linecolor='black', gridcolor='lightgray', linewidth=1.5, gridwidth=0.0)
fig.update_xaxes(tickfont = {'size' : 15}, showline=True, linecolor='black', gridcolor='lightgray', linewidth=1.5, gridwidth=0.0)
fig.update_traces(mode = 'markers+lines', marker={'size': 10})
fig.update_layout({'plot_bgcolor': 'white','paper_bgcolor': 'white'})

fig.show()

Largest differences to week 1

In [9]:
first_week = str(data_for_plotting['week'].min())
last_week = str(data_for_plotting['week'].max())

In [10]:
first_last_weeks = pred_types_collected[select_pred_type][['Team', first_week, last_week]]

first_last_weeks['diff_ABS'] = first_last_weeks[last_week] - first_last_weeks[first_week]
first_last_weeks['diff_REL'] = first_last_weeks[last_week] / first_last_weeks[first_week] - 1

In [11]:
first_last_weeks.sort_values('diff_ABS')[['Team', 'diff_ABS']]

Unnamed: 0,Team,diff_ABS
3,Baltimore Ravens,-0.146277
2,Tampa Bay Buccaneers,-0.144044
4,Carolina Panthers,-0.107175
5,Washington Commanders,-0.075734
1,Kansas City Chiefs,-0.072196
0,Buffalo Bills,-0.066739
6,Minnesota Vikings,-0.060284
7,Indianapolis Colts,-0.058497
12,Pittsburgh Steelers,-0.047547
8,New York Giants,-0.047207


In [12]:
first_last_weeks.sort_values('diff_REL')[['Team', 'diff_REL']]

Unnamed: 0,Team,diff_REL
3,Baltimore Ravens,-0.325712
2,Tampa Bay Buccaneers,-0.320692
4,Carolina Panthers,-0.275137
5,Washington Commanders,-0.21214
7,Indianapolis Colts,-0.172453
6,Minnesota Vikings,-0.171166
1,Kansas City Chiefs,-0.150709
12,Pittsburgh Steelers,-0.145085
8,New York Giants,-0.142067
0,Buffalo Bills,-0.137314
