In [5]:
import pandas as pd
import numpy as np
import os

import re

from functools import reduce

import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [7]:
weekly_data_tables = os.listdir('weekly_runs/')
print(weekly_data_tables)

['week_1.csv', 'week_2.csv', 'week_3.csv', 'week_4.csv', 'week_5.csv', 'week_6.csv', 'week_7.csv']


In [9]:
collection = []

for weekly_data in weekly_data_tables:
    collection.append(pd.read_csv('weekly_runs/' + weekly_data))

In [11]:
%%time

pred_types = ['pred_rf', 'pred_gbm', 'pred_lgb', 'pred_catboost', 'pred_mean']
pred_types_collected = {}

for pred_type in pred_types:
    
    keep_cols = ['Team', 'season', 'week_marker', pred_type]    
    pred_types_data = []
    
    for i in collection:
        data_for_pred_type = i[keep_cols].copy()
        
        week_marker = data_for_pred_type['week_marker'].unique().item()
        data_for_pred_type.rename(columns = {pred_type : pred_type + '_' + week_marker}, inplace = True)
        data_for_pred_type.drop(['week_marker', 'season'], 1, inplace = True)
        
        pred_types_data.append(data_for_pred_type)
        
    pred_types_data = reduce(lambda  a,b: pd.merge(a,b, on = ['Team'], how='outer'), pred_types_data)
    pred_types_data.sort_values(pred_types_data.columns[-1], ascending = False, inplace = True)
    
    columns = [re.sub('[^0-9]', '', i) if i != 'Team' else i for i in pred_types_data.columns]
    pred_types_data.columns = columns
        
    pred_types_collected[pred_type] = pred_types_data  

Wall time: 235 ms


In [17]:
# pred_types_collected['pred_rf']
# pred_types_collected['pred_gbm']
# pred_types_collected['pred_lgb']
# pred_types_collected['pred_catboost']
# pred_types_collected['pred_mean']

In [23]:
select_pred_type = 'pred_mean'

pred_types_collected[select_pred_type].head()

Unnamed: 0,Team,1,2,3,4,5,6,7
0,Arizona Cardinals,0.512468,0.319694,0.332717,0.347861,0.325952,0.349341,0.385269
20,Buffalo Bills,0.108006,0.278697,0.275407,0.288686,0.387259,0.331091,0.331091
1,Los Angeles Rams,0.486665,0.434764,0.396305,0.345544,0.28464,0.329892,0.309442
14,Dallas Cowboys,0.170148,0.088968,0.191692,0.237991,0.208854,0.280636,0.280636
7,Cincinnati Bengals,0.280042,0.18724,0.382684,0.275231,0.177757,0.257704,0.264515


In [29]:
data_for_plotting = pred_types_collected[select_pred_type].head(10).melt(id_vars = 'Team', var_name = 'week', value_name = 'predicted_probability')

In [35]:
fig = px.line(data_for_plotting, 'week', 'predicted_probability', color = 'Team', 
              title = 'Top 10 teams to make it to the Super Bowl in the 2021 NFL season', 
              labels = {'week' : 'Regular season week', 'predicted_probability' : 'Predicted probability (making SB)'},
              width = 950, height = 475)
fig.update_yaxes(tickfont = {'size' : 8})
fig.update_traces(mode = 'markers+lines')
fig.show()

Largest differences to week 1

In [41]:
first_week = data_for_plotting['week'].min()
last_week = data_for_plotting['week'].max()

In [61]:
first_last_weeks = pred_types_collected[select_pred_type][['Team', first_week, last_week]]

first_last_weeks['diff_ABS'] = first_last_weeks[last_week] - first_last_weeks[first_week]
first_last_weeks['diff_REL'] = first_last_weeks[last_week] / first_last_weeks[first_week] - 1

In [68]:
first_last_weeks.sort_values('diff_ABS')[['Team', 'diff_ABS']]

Unnamed: 0,Team,diff_ABS
2,San Francisco 49ers,-0.378802
3,Seattle Seahawks,-0.278781
4,Philadelphia Eagles,-0.241527
9,Houston Texans,-0.182829
1,Los Angeles Rams,-0.177224
6,New Orleans Saints,-0.150528
10,Carolina Panthers,-0.145849
8,Kansas City Chiefs,-0.134537
0,Arizona Cardinals,-0.127199
15,Pittsburgh Steelers,-0.113249


In [75]:
first_last_weeks.sort_values('diff_REL')[['Team', 'diff_REL']]

Unnamed: 0,Team,diff_REL
2,San Francisco 49ers,-0.83676
9,Houston Texans,-0.777326
4,Philadelphia Eagles,-0.760782
3,Seattle Seahawks,-0.724688
10,Carolina Panthers,-0.71649
15,Pittsburgh Steelers,-0.671755
16,Detroit Lions,-0.663449
18,Miami Dolphins,-0.622816
17,Denver Broncos,-0.553193
8,Kansas City Chiefs,-0.546877
