# NFL

In [1]:
import numpy as np
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import math
import sys
sys.version

'3.7.7 (default, Mar 23 2020, 17:31:31) \n[Clang 4.0.1 (tags/RELEASE_401/final)]'

### All Four Years Off. Stats

In [2]:
def dataframe_generator(url_endpoint, years):
    for i, year in enumerate(years):
        url = f'https://www.teamrankings.com/nfl/stat/{url_endpoint}?date={year}-02-11'
        html = urlopen(url)
        soup = BeautifulSoup(html)
        headers = [th.getText() for th in soup.findAll('tr')[0].findAll('th')]
        rows = soup.findAll('tr')
        raw_team_stats = [[td.getText() for td in rows[i].findAll('td')] for i in range(len(rows))]
        team_stats = raw_team_stats[1:]
        
        if i == 0:
            base_df = pd.DataFrame(team_stats, columns=headers)
            base_df = base_df[['Team', f'{year-1}']]
            base_df.set_index('Team', drop=True)
        else:
            tmp_df = pd.DataFrame(team_stats, columns=headers)
            tmp_df = tmp_df[['Team', f'{year-1}']]
            base_df = base_df.merge(tmp_df, how='inner', on='Team')
            base_df.set_index('Team', drop=True)

        if re.search('pct', url_endpoint):
            tmp_col = []

            for item in base_df[f'{year-1}']:
                new_item = re.sub('%', '', item)
                new_item = float(new_item) / 100
                tmp_col.append(new_item)

            base_df[f'{year-1}'] = tmp_col
        
    return base_df

### Loop Through Different Stats (Endpoints)

In [3]:
url_endpoints = ['yards-per-pass-attempt',
                 'opponent-yards-per-pass-attempt',
                 'yards-per-rush-attempt',
                 'opponent-yards-per-rush-attempt',
                 'completion-pct',
                 'opponent-completion-pct',
                 'pass-attempts-per-game',
                 'opponent-pass-attempts-per-game',
                 'yards-per-play',
                 'opponent-yards-per-play',
                 'passing-play-pct',
                 'opponent-passing-play-pct',
                 'points-per-game',
                 'opponent-points-per-game']

years = [2018, 2019, 2020, 2021]
list_of_dfs = []

for url_endpoint in url_endpoints:
    df = dataframe_generator(url_endpoint, years=years)
    df = df.set_index('Team', drop=True)
    list_of_dfs.append(df)
    print(df)

              2017 2018 2019 2020
Team                             
New Orleans    7.9  7.6  7.3  7.1
New England    7.6  7.4  6.3  6.6
LA Chargers    7.6  7.7  7.4  6.9
Kansas City    7.6  8.3  7.8  7.8
Pittsburgh     7.5  7.3  5.8  6.2
Atlanta        7.4  7.5  6.9  6.9
Detroit        7.3  6.2  6.8  7.1
LA Rams        7.2  7.7  7.1  6.7
Tampa Bay      7.2  8.2  7.7  7.4
Minnesota      7.0  6.7  7.4  7.8
Philadelphia   7.0  7.1  6.2  5.6
Washington     6.9  5.9  5.9  5.8
Jacksonville   6.8  5.8  6.4  6.0
Seattle        6.6  7.3  7.6  6.9
Las Vegas      6.5  6.7  7.5  7.7
San Francisco  6.5  7.3  7.8  7.1
Dallas         6.4  6.8  8.0  6.5
Tennessee      6.4  6.8  7.7  7.5
Houston        6.2  7.2  7.1  8.3
Carolina       6.2  6.8  5.8  7.1
NY Jets        6.2  6.0  6.0  5.6
Cincinnati     6.1  6.1  5.9  5.9
Arizona        6.1  5.1  6.3  6.8
Chicago        5.9  6.9  5.7  6.0
Indianapolis   5.9  6.8  6.1  7.3
Denver         5.9  6.3  6.2  6.2
Miami          5.9  6.4  6.2  6.7
Buffalo       

               2017  2018  2019  2020
Team                                 
Denver         30.7  34.3  33.6  35.4
Pittsburgh     30.9  35.4  32.1  32.9
Houston        31.3  36.8  37.2  33.8
Indianapolis   32.1  35.1  35.1  35.1
Green Bay      32.1  32.9  32.5  33.3
LA Chargers    32.8  34.5  29.0  33.9
Cleveland      32.8  39.1  32.1  38.4
Miami          33.0  31.8  34.1  34.1
Chicago        33.1  38.5  35.7  34.5
Las Vegas      33.1  30.0  32.9  36.2
Washington     33.2  33.6  33.8  33.5
San Francisco  34.0  33.9  33.1  33.6
Jacksonville   34.0  31.2  32.3  33.3
Carolina       34.3  33.1  34.0  36.6
Tampa Bay      34.3  33.4  41.5  39.2
LA Rams        34.4  33.7  35.1  33.9
Seattle        34.6  34.1  36.3  41.1
NY Jets        34.9  36.4  36.6  37.8
Atlanta        34.9  36.6  33.5  39.1
Dallas         34.9  33.2  35.6  32.1
Minnesota      34.9  31.4  36.3  33.8
Buffalo        35.1  30.8  34.0  36.5
NY Giants      35.1  35.0  34.8  35.9
Cincinnati     35.1  36.5  31.2  33.8
Arizona     

In [4]:
url_endpoint = 'ranking/schedule-strength-by-other'
years = [2017, 2018, 2019, 2020, 2021]

for i, year in enumerate(years):

    url = f'https://www.teamrankings.com/nfl/{url_endpoint}?date={year}-02-11'
    html = urlopen(url)
    soup = BeautifulSoup(html)
    headers = [th.getText() for th in soup.findAll('tr')[0].findAll('th')]
    rows = soup.findAll('tr')
    raw_team_stats = [[td.getText() for td in rows[i].findAll('td')] for i in range(len(rows))]
    team_stats = raw_team_stats[1:]
    
    if i == 0:
        sos_rating_df = pd.DataFrame(team_stats, columns=headers)
        sos_rating_df = sos_rating_df.truncate(after=31)
        sos_rating_df = sos_rating_df[['Team', 'Rating']]
        sos_rating_df['Rating'] = sos_rating_df['Rating'].astype(float)
        sos_rating_df['Team'] = [re.sub('[^A-Za-z\s]', '', str(team_name)).strip() \
                                 for team_name in sos_rating_df['Team']]
        sos_rating_df.columns = ['Team', f'{year-1}']
        sos_rating_df.set_index('Team', drop=True)

    else:
        tmp_df = pd.DataFrame(team_stats, columns=headers)
        tmp_df = tmp_df.truncate(after=31)
        tmp_df = tmp_df[['Team', 'Rating']]
        tmp_df['Rating'] = tmp_df['Rating'].astype(float)
        tmp_df['Team'] = [re.sub('[^A-Za-z\s]', '', str(team_name)).strip() for team_name in tmp_df['Team']]
        tmp_df.columns = ['Team', f'{year-1}']
        sos_rating_df = sos_rating_df.merge(tmp_df, how='inner', on='Team')
        sos_rating_df.set_index('Team', drop=True)

sos_rating_df.set_index('Team', inplace=True)
sos_rating_df.sort_index(ascending=True, inplace=True)
sos_rating_df.head()

Unnamed: 0_level_0,2016,2017,2018,2019,2020
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Arizona,-0.4,-0.3,-0.1,0.3,-0.3
Atlanta,1.3,1.7,0.4,1.0,1.5
Baltimore,-0.1,-1.2,0.6,1.0,0.3
Buffalo,-0.4,-0.2,-0.4,-0.8,1.0
Carolina,1.0,1.2,0.9,0.2,1.4


### Get SOS Ratings from Sonny Moore

In [5]:
years = [17, 18, 19, 20]
for i, year in enumerate(years):
    
    if year == 20:
        url = 'http://sonnymoorepowerratings.com/nfl-foot.htm'
    else:
        url = f'http://sonnymoorepowerratings.com/nfl{year}.htm'
        
    html = urlopen(url)
    soup = BeautifulSoup(html)
    html_table = soup.findAll('font')
    team_table = html_table[1].getText().split('\n')
    
    if year == 20:
        proc_team_list = team_table[5:37]
    else:
        proc_team_list = team_table[4:36]
        
    clean_team_list = [re.sub('\r', '', line) for line in proc_team_list]
    split_list = [line.split() for line in clean_team_list]
    team_list = [' '.join(line[1:4]) for line in split_list]
    new_team_list = [re.sub('\d', '', team_name) for team_name in team_list]
    sos_list = [line[-2] for line in split_list]
    final_team_list = []
    
    for team_name in new_team_list:
        split_name = team_name.split()
        capitalized_names = [part_name.lower().capitalize() for part_name in split_name]
        capitalized_name = ' '.join(capitalized_names)
        
        if re.search('Los Angeles', capitalized_name):
            new_name = re.sub('Los Angeles', 'LA', capitalized_name)
        elif re.search('St. Louis', capitalized_name):
            new_name = re.sub('St. Louis', 'LA', capitalized_name)
        elif re.search('New York', capitalized_name):
            new_name = re.sub('New York', 'NY', capitalized_name)
        elif re.search('San Diego', capitalized_name):
            new_name = re.sub('San Diego', 'LA', capitalized_name)
        elif re.search('Washington', capitalized_name):
            
            if year == 20:
                new_name = capitalized_name.split()[:-2]
                new_name = ' '.join(new_name)
            else:
                new_name = capitalized_name.split()[:-1]
                new_name = ' '.join(new_name)
                
        elif re.search('Oakland', capitalized_name):
            new_name = re.sub('Oakland', 'Las Vegas', capitalized_name)
            new_name = re.sub('Raiders', '', new_name)
            new_name = new_name.rstrip()
        else:
            new_name = capitalized_name.split()[:-1]
            new_name = ' '.join(new_name)
        final_team_list.append(new_name)
        
    sos_dict = {team_name: sos_val for team_name, sos_val in zip(final_team_list, sos_list)}
    
    if i == 0:
        sos_sonny_moore_df = pd.DataFrame.from_dict(sos_dict, orient='index', columns=[f'SOS_{year}'])
    else:
        tmp_df = pd.DataFrame.from_dict(sos_dict, orient='index', columns=[f'SOS_{year}'])
        sos_sonny_moore_df = sos_sonny_moore_df.merge(tmp_df, how='outer', left_index=True, right_index=True)
        
sos_sonny_moore_df.sort_index(inplace=True)
sos_sonny_moore_df = sos_sonny_moore_df.astype('float')
sos_sonny_moore_df.head()

Unnamed: 0,SOS_17,SOS_18,SOS_19,SOS_20
Arizona,78.25,85.73,84.11,84.79
Atlanta,81.6,86.15,84.1,87.35
Baltimore,77.24,86.13,84.09,85.48
Buffalo,80.46,85.21,82.84,87.09
Carolina,80.5,87.33,83.93,85.54


### Assert The Two Indices Are the Same

In [6]:
assert sos_rating_df.index.tolist() == sos_sonny_moore_df.index.tolist()
assert sos_rating_df['2017'].dtype == sos_sonny_moore_df['SOS_17'].dtype

### Combine SOS Ratings

In [7]:
team_names = sos_rating_df.index.tolist()
list_of_series = []

for tr_col, sm_col in zip(sos_rating_df.columns.tolist(), sos_sonny_moore_df.columns.tolist()):
    tr_series, sm_series = sos_rating_df[tr_col], sos_sonny_moore_df[sm_col]
    
    assert tr_series.index.tolist() == sm_series.index.tolist()
    
    tr_mean, tr_std = tr_series.mean(), tr_series.std()
    sm_mean, sm_std = sm_series.mean(), sm_series.std()
    
    tr_series_norm = (tr_series - tr_mean) / tr_std
    sm_series_norm = (sm_series - sm_mean) / sm_std
    
    comb_series = tr_series_norm.add(sm_series_norm)

    list_of_series.append(comb_series)

array_of_series = np.array(list_of_series).T # Genrate Array, and Matrix transformation for DF
comb_sos_df = pd.DataFrame(array_of_series,
                           index=team_names,
                           columns=['2017', '2018', '2019', '2020'])
list_of_dfs.append(comb_sos_df)
comb_sos_df.head()

Unnamed: 0,2017,2018,2019,2020
Arizona,-1.84528,-1.11652,-0.086806,-1.248774
Atlanta,3.292969,2.370534,0.577811,2.813583
Baltimore,-2.239804,-1.879724,0.836053,0.305271
Buffalo,-0.049216,-1.682456,-2.102889,0.848642
Carolina,1.972699,3.257127,1.03963,-0.332555


## Master DF List Checkpoint

In [8]:
master_list_of_dfs = [df.copy() for df in list_of_dfs]
# list_of_dfs = [df.copy() for df in master_list_of_dfs]

### Determine Weights for Data by Year

In [9]:
def year_weightings(weeks_played=0, year_weight_list=[1, 2, 4, 8, 16]):
    current_season_weight = year_weight_list[-1]
    multiplier = weeks_played / 17
    current_season_weight *= multiplier
    year_weight_list[-1] = current_season_weight
    divisor = sum(year_weight_list)
    year_weight_list = [weight/divisor for weight in year_weight_list] # softmax to 1
    return year_weight_list

In [10]:
weeks_played = 20
base = 2
standard_year_weights = np.logspace(1, 4, 4, base=base)

year_weights = year_weightings(weeks_played, inclined_weights)
year_weights

NameError: name 'inclined_weights' is not defined

In [45]:
declined_weights = np.logspace(1, 4, 4, base=1.5)
standard_weights = np.logspace(1, 4, 4, base=2)
inclined_weights = np.logspace(1, 4, 4, base=3)
heavily_inclined_weights = np.logspace(1, 4, 4, base=4)

team_prior_weights = {
            'Arizona'       : (inclined_weights, standard_weights),
            'Atlanta'       : (declined_weights, declined_weights),
            'Baltimore'     : (inclined_weights, standard_weights),
            'Buffalo'       : (inclined_weights, declined_weights),
            'Carolina'      : (heavily_inclined_weights, standard_weights),
            'Chicago'       : (declined_weights, declined_weights),
            'Cincinnati'    : (inclined_weights, standard_weights),
            'Cleveland'     : (inclined_weights, standard_weights),
            'Dallas'        : (standard_weights, standard_weights),
            'Denver'        : (standard_weights, standard_weights),
            'Detroit'       : (declined_weights, declined_weights),
            'Green Bay'     : (inclined_weights, declined_weights),
            'Houston'       : (standard_weights, standard_weights),
            'Indianapolis'  : (standard_weights, standard_weights),
            'Jacksonville'  : (standard_weights, standard_weights),
            'Kansas City'   : (declined_weights, standard_weights),
            'LA Chargers'   : (heavily_inclined_weights, standard_weights), 
            'LA Rams'       : (declined_weights, standard_weights),
            'Las Vegas'     : (standard_weights, standard_weights),
            'Miami'         : (heavily_inclined_weights, inclined_weights),
            'Minnesota'     : (standard_weights, inclined_weights),
            'NY Giants'     : (standard_weights, standard_weights),
            'NY Jets'       : (standard_weights, standard_weights),
            'New England'   : (heavily_inclined_weights, standard_weights),
            'New Orleans'   : (declined_weights, declined_weights),
            'Philadelphia'  : (inclined_weights, declined_weights),
            'Pittsburgh'    : (declined_weights, declined_weights),
            'San Francisco' : (standard_weights, declined_weights),
            'Seattle'       : (declined_weights, standard_weights),
            'Tampa Bay'     : (heavily_inclined_weights, inclined_weights),
            'Tennessee'     : (standard_weights, standard_weights),
            'Washington'    : (heavily_inclined_weights, inclined_weights)
        }

### Use Year Weights to Create Comb. Number for Each DF

In [14]:
url_endpoints.append('sos')

raw_off_weights = {key: year_weightings(weeks_played, tupl[0]) for key, tupl in team_prior_weights.items()}
raw_def_weights = {key: year_weightings(weeks_played, tupl[1]) for key, tupl in team_prior_weights.items()}

proc_dfs = []
for j, tupl in enumerate(zip(list_of_dfs, url_endpoints)):
    master_df, url_endpoint = tupl[0], tupl[1]
    df = master_df.astype(float)
    df = df.sort_index()
    col_names = df.columns.tolist()
    
    # Get weight lists
    weight_map = df.index.map(raw_off_weights)
    off_weights = np.array([np.array(weight_list) for weight_list in weight_map])
    def_weight_map = df.index.map(raw_def_weights)
    def_weights = np.array([np.array(weight_list) for weight_list in def_weight_map])

    for i, col_name in enumerate(col_names):
        
        if (j % 2) == 0:
            weights = off_weights[:, i]
        else:
            weights = def_weights[:, i]
            
        if i == 0:
            series_vals_1 = df[col_name] * weights
        elif i == 1:
            series_vals_2 = df[col_name] * weights
        elif i == 2:
            series_vals_3 = df[col_name] * weights
        elif i == 3:
            series_vals_4 = df[col_name] * weights
        else:
            print('ERROR')
    
    comb_total = series_vals_1 + series_vals_2 + series_vals_3
    comb_array = np.array(comb_total).reshape(-1,1)
    new_df = pd.DataFrame(comb_array, index=df.index, columns=[f'comb_{url_endpoint}'])
    full_total = comb_total + series_vals_4
    new_df[f'curr_{url_endpoint}'] = full_total
    proc_dfs.append(new_df)

proc_dfs.append(comb_sos_df) # add in comb_sos_df

### Merge All

In [15]:
main_df = proc_dfs[0]
for df in proc_dfs[1:]:
    tmp_df = pd.DataFrame(df)
    main_df = main_df.merge(tmp_df, how='inner', left_index=True, right_index=True)
main_df.head()

Unnamed: 0_level_0,comb_yards-per-pass-attempt,curr_yards-per-pass-attempt,comb_opponent-yards-per-pass-attempt,curr_opponent-yards-per-pass-attempt,comb_yards-per-rush-attempt,curr_yards-per-rush-attempt,comb_opponent-yards-per-rush-attempt,curr_opponent-yards-per-rush-attempt,comb_completion-pct,curr_completion-pct,...,comb_points-per-game,curr_points-per-game,comb_opponent-points-per-game,curr_opponent-points-per-game,comb_sos,curr_sos,2017,2018,2019,2020
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arizona,1.744678,6.569908,0.776379,6.467992,1.335874,4.670959,0.488597,4.579444,0.179811,0.657367,...,5.899737,24.065309,2.944233,23.309536,-0.1335,-1.019621,-1.84528,-1.11652,-0.086806,-1.248774
Atlanta,3.918887,7.06054,1.722177,7.378374,2.236088,3.920742,1.086874,4.405176,0.366744,0.66256,...,13.072513,24.36425,5.937694,25.470426,0.934437,2.215495,3.292969,2.370534,0.577811,2.813583
Baltimore,1.766916,6.66353,0.574026,5.909567,1.342062,5.496764,0.403185,4.2918,0.162727,0.629834,...,7.613622,27.867796,1.75078,18.480866,-0.007038,0.219446,-2.239804,-1.879724,0.836053,0.305271
Buffalo,1.374528,7.162951,1.281237,6.449266,1.004126,4.245642,0.926099,4.606362,0.130287,0.655258,...,4.295621,27.372132,4.290202,22.456609,-0.421708,0.233265,-0.049216,-1.682456,-2.102889,0.848642
Carolina,1.310568,6.862187,0.573995,6.537716,1.037447,4.321503,0.404271,4.716501,0.13424,0.664537,...,4.73653,21.860538,2.198152,25.227292,0.328528,0.068497,1.972699,3.257127,1.03963,-0.332555


### Rename Cols

In [16]:
main_df = main_df[['comb_yards-per-pass-attempt', 'curr_yards-per-pass-attempt',
                   'comb_opponent-yards-per-pass-attempt', 'curr_opponent-yards-per-pass-attempt',
                   'comb_yards-per-rush-attempt', 'curr_yards-per-rush-attempt',
                   'comb_opponent-yards-per-rush-attempt', 'curr_opponent-yards-per-rush-attempt',
                   'comb_completion-pct', 'curr_completion-pct',
                   'comb_opponent-completion-pct', 'curr_opponent-completion-pct',
                   'comb_pass-attempts-per-game', 'curr_pass-attempts-per-game',
                   'comb_opponent-pass-attempts-per-game', 'curr_opponent-pass-attempts-per-game',
                   'comb_yards-per-play', 'curr_yards-per-play',
                   'comb_opponent-yards-per-play', 'curr_opponent-yards-per-play',
                   'comb_passing-play-pct', 'curr_passing-play-pct',
                   'comb_opponent-passing-play-pct', 'curr_opponent-passing-play-pct',
                   'comb_points-per-game', 'curr_points-per-game',
                   'comb_opponent-points-per-game', 'curr_opponent-points-per-game',
                   'comb_sos', 'curr_sos']]

main_df.sort_index(inplace=True, ascending=True)

In [17]:
new_col_names = ['pre_off_ypp', 'curr_off_ypp', 'pre_def_ypp', 'curr_def_ypp', 'pre_off_ypr', 'curr_off_ypr',
                 'pre_def_ypr', 'curr_def_ypr', 'pre_off_comp_pct', 'curr_off_comp_pct',
                 'pre_def_comp_pct', 'curr_def_comp_pct', 'pre_off_pass_att', 'curr_off_pass_att',
                 'pre_def_pass_att', 'curr_def_pass_att', 'pre_off_tot_ypp', 'curr_off_tot_ypp',
                 'pre_def_tot_ypp', 'curr_def_tot_ypp', 'pre_off_pass_pct', 'curr_off_pass_pct',
                 'pre_def_pass_pct', 'curr_def_pass_pct', 'pre_off_ppg', 'curr_off_ppg',
                 'pre_def_ppg', 'curr_def_ppg', 'pre_sos', 'curr_sos']

main_df.columns = new_col_names
main_df.head()

Unnamed: 0_level_0,pre_off_ypp,curr_off_ypp,pre_def_ypp,curr_def_ypp,pre_off_ypr,curr_off_ypr,pre_def_ypr,curr_def_ypr,pre_off_comp_pct,curr_off_comp_pct,...,pre_off_pass_pct,curr_off_pass_pct,pre_def_pass_pct,curr_def_pass_pct,pre_off_ppg,curr_off_ppg,pre_def_ppg,curr_def_ppg,pre_sos,curr_sos
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arizona,1.744678,6.569908,0.776379,6.467992,1.335874,4.670959,0.488597,4.579444,0.179811,0.657367,...,0.175772,0.571512,0.063445,0.58485,5.899737,24.065309,2.944233,23.309536,-0.1335,-1.019621
Atlanta,3.918887,7.06054,1.722177,7.378374,2.236088,3.920742,1.086874,4.405176,0.366744,0.66256,...,0.3494,0.631967,0.144497,0.621503,13.072513,24.36425,5.937694,25.470426,0.934437,2.215495
Baltimore,1.766916,6.66353,0.574026,5.909567,1.342062,5.496764,0.403185,4.2918,0.162727,0.629834,...,0.124381,0.457944,0.058648,0.624125,7.613622,27.867796,1.75078,18.480866,-0.007038,0.219446
Buffalo,1.374528,7.162951,1.281237,6.449266,1.004126,4.245642,0.926099,4.606362,0.130287,0.655258,...,0.124194,0.600465,0.124314,0.590455,4.295621,27.372132,4.290202,22.456609,-0.421708,0.233265
Carolina,1.310568,6.862187,0.573995,6.537716,1.037447,4.321503,0.404271,4.716501,0.13424,0.664537,...,0.136552,0.597962,0.048424,0.599656,4.73653,21.860538,2.198152,25.227292,0.328528,0.068497


In [18]:
off_weight_series = [sum(weight_list[:-1]) for weight_list in main_df.index.map(raw_off_weights)]
def_weight_series = [sum(weight_list[:-1]) for weight_list in main_df.index.map(raw_def_weights)]
main_df['off_weights'] = off_weight_series
main_df['def_weights'] = def_weight_series

In [19]:
pre_cols = [col for col in main_df.columns if re.match('pre_', col)]
for col in pre_cols:
    if re.search('off_', col):
        main_df[col] /= main_df['off_weights']
    elif re.search('def_', col):
        main_df[col] /= main_df['def_weights']
    else:
        # divide by the sum of weights except most recent
        main_df[col] = main_df[col] / sum(year_weightings(weeks_played, np.logspace(1, 4, 4, base=2))[:-1])
main_df.head()

Unnamed: 0_level_0,pre_off_ypp,curr_off_ypp,pre_def_ypp,curr_def_ypp,pre_off_ypr,curr_off_ypr,pre_def_ypr,curr_def_ypr,pre_off_comp_pct,curr_off_comp_pct,...,pre_def_pass_pct,curr_def_pass_pct,pre_off_ppg,curr_off_ppg,pre_def_ppg,curr_def_ppg,pre_sos,curr_sos,off_weights,def_weights
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arizona,6.007692,6.569908,7.014286,6.467992,4.6,4.670959,4.414286,4.579444,0.619169,0.657367,...,0.5732,0.58485,20.315385,24.065309,26.6,23.309536,-0.312996,-1.019621,0.290407,0.110685
Atlanta,7.194737,7.06054,7.005263,7.378374,4.105263,3.920742,4.421053,4.405176,0.673311,0.66256,...,0.587768,0.621503,24.0,24.36425,24.152632,25.470426,2.190824,2.215495,0.544688,0.24584
Baltimore,6.846154,6.66353,6.0,5.909567,5.2,5.496764,4.214286,4.2918,0.630508,0.629834,...,0.613014,0.624125,29.5,27.867796,18.3,18.480866,-0.016501,0.219446,0.258089,0.095671
Buffalo,6.023077,7.162951,5.905263,6.449266,4.4,4.245642,4.268421,4.606362,0.570908,0.655258,...,0.572968,0.590455,18.823077,27.372132,19.773684,22.456609,-0.98871,0.233265,0.22821,0.216965
Carolina,6.009524,6.862187,6.957143,6.537716,4.757143,4.321503,4.9,4.716501,0.615548,0.664537,...,0.586929,0.599656,21.719048,21.860538,26.642857,25.227292,0.770246,0.068497,0.218082,0.082504


### Calculate Points Lost to INTs by Incompletion Pct

In [20]:
avg_int_per_incomp = 0.029
avg_pts_per_int_val = 5.0

off_incomp_pct = 1 - main_df['pre_off_comp_pct']
incomps_per_game = main_df['pre_off_pass_att'] * off_incomp_pct
ints_per_game = incomps_per_game * avg_int_per_incomp
points_lost_ints = ints_per_game * avg_pts_per_int_val
main_df['pre_pts_lst_ints'] = points_lost_ints
main_df.drop(['pre_off_comp_pct', 'pre_off_pass_att'], axis=1, inplace=True)

off_incomp_pct = 1 - main_df['curr_off_comp_pct']
incomps_per_game = main_df['curr_off_pass_att'] * off_incomp_pct
ints_per_game = incomps_per_game * avg_int_per_incomp
points_lost_ints = ints_per_game * avg_pts_per_int_val
main_df['curr_pts_lst_ints'] = points_lost_ints
main_df.drop(['curr_off_comp_pct', 'curr_off_pass_att'], axis=1, inplace=True)

def_incomp_pct = 1 - main_df['pre_def_comp_pct']
def_incomps_per_game = main_df['pre_def_pass_att'] * def_incomp_pct
ints_for_per_game = def_incomps_per_game * avg_int_per_incomp
points_gained_from_ints = ints_for_per_game * avg_pts_per_int_val
main_df['pre_pts_from_ints'] = points_gained_from_ints
main_df.drop(['pre_def_comp_pct', 'pre_def_pass_att'], axis=1, inplace=True)

def_incomp_pct = 1 - main_df['curr_def_comp_pct']
def_incomps_per_game = main_df['curr_def_pass_att'] * def_incomp_pct
ints_for_per_game = def_incomps_per_game * avg_int_per_incomp
points_gained_from_ints = ints_for_per_game * avg_pts_per_int_val
main_df['curr_pts_from_ints'] = points_gained_from_ints
main_df.drop(['curr_def_comp_pct', 'curr_def_pass_att'], axis=1, inplace=True)

In [21]:
main_df['pre_sos_z_score'] = main_df['pre_sos']
main_df['curr_sos_z_score'] = main_df['curr_sos']
main_df.drop(['pre_sos', 'curr_sos'], axis=1, inplace=True)

# Confirm 
assert round(abs(main_df['pre_sos_z_score'].mean()), 2) < 0.1
assert round(abs(main_df['curr_sos_z_score'].mean()), 2) < 0.1

main_df.head()

Unnamed: 0_level_0,pre_off_ypp,curr_off_ypp,pre_def_ypp,curr_def_ypp,pre_off_ypr,curr_off_ypr,pre_def_ypr,curr_def_ypr,pre_off_tot_ypp,curr_off_tot_ypp,...,pre_def_ppg,curr_def_ppg,off_weights,def_weights,pre_pts_lst_ints,curr_pts_lst_ints,pre_pts_from_ints,curr_pts_from_ints,pre_sos_z_score,curr_sos_z_score
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arizona,6.007692,6.569908,7.014286,6.467992,4.6,4.670959,4.414286,4.579444,5.161538,5.543627,...,26.6,23.309536,0.290407,0.110685,1.875372,1.755607,1.647716,1.83314,-0.312996,-1.019621
Atlanta,7.194737,7.06054,7.005263,7.378374,4.105263,3.920742,4.421053,4.405176,5.784211,5.654806,...,24.152632,25.470426,0.544688,0.24584,1.867873,1.924179,1.703436,1.789074,2.190824,2.215495
Baltimore,6.846154,6.66353,6.0,5.909567,5.2,5.496764,4.214286,4.2918,5.776923,5.794044,...,18.3,18.480866,0.258089,0.095671,1.661692,1.461019,2.051414,1.931427,-0.016501,0.219446
Buffalo,6.023077,7.162951,5.905263,6.449266,4.4,4.245642,4.268421,4.606362,5.046154,5.782322,...,19.773684,22.456609,0.22821,0.216965,2.014918,1.823894,1.746537,1.866186,-0.98871,0.233265
Carolina,6.009524,6.862187,6.957143,6.537716,4.757143,4.321503,4.9,4.716501,5.252381,5.524191,...,26.642857,25.227292,0.218082,0.082504,2.140365,1.715673,1.730232,1.700453,0.770246,0.068497


### Build 'Comb' Stats with Norm. YPA/YPR w/ League-Avg Pass Pct

In [22]:
# main_df = master_df.copy()

pre_mean_pass_pct = np.mean(main_df['pre_off_pass_pct'])
curr_mean_pass_pct = np.mean(main_df['curr_off_pass_pct'])
pre_mean_run_pct = 1 - pre_mean_pass_pct
curr_mean_run_pct = 1 - curr_mean_pass_pct

main_df['pre_off_comb'] = main_df['pre_off_ypp'].astype(float) * pre_mean_pass_pct
main_df['pre_off_comb'] += main_df['pre_off_ypr'].astype(float) *  pre_mean_run_pct
main_df['curr_off_comb'] = main_df['curr_off_ypp'].astype(float) * curr_mean_pass_pct
main_df['curr_off_comb'] += main_df['curr_off_ypr'].astype(float) *  curr_mean_run_pct

main_df['pre_def_comb'] = main_df['pre_def_ypp'].astype(float) * pre_mean_pass_pct
main_df['pre_def_comb'] += main_df['pre_def_ypr'].astype(float) * pre_mean_run_pct
main_df['curr_def_comb'] = main_df['curr_def_ypp'].astype(float) * curr_mean_pass_pct
main_df['curr_def_comb'] += main_df['curr_def_ypr'].astype(float) * curr_mean_run_pct

master_df = main_df.copy()

In [23]:
main_df.drop(['pre_off_ypr', 'curr_off_ypr'], axis=1, inplace=True)
main_df.drop(['pre_def_ypr', 'curr_def_ypr'], axis=1, inplace=True)

### Adjustment Dictionary

In [24]:
adj_dict = {
            'Arizona'       : (0, 0),
            'Atlanta'       : (0, 0),
            'Baltimore'     : (0, 0),
            'Buffalo'       : (0, 0),
            'Carolina'      : (0, 0),
            'Chicago'       : (0, 0),
            'Cincinnati'    : (0, 0),
            'Cleveland'     : (0, 0),
            'Dallas'        : (0, 0),
            'Denver'        : (0, 0),
            'Detroit'       : (0, 0),
            'Green Bay'     : (0, 0),
            'Houston'       : (0, 0),
            'Indianapolis'  : (0, 0),
            'Jacksonville'  : (0, 0),
            'Kansas City'   : (0, 0),
            'LA Chargers'   : (0, 0),
            'LA Rams'       : (0, 0),
            'Las Vegas'     : (0, 0),
            'Miami'         : (0, 0),
            'Minnesota'     : (0, 0),
            'NY Giants'     : (0, 0),
            'NY Jets'       : (0, 0),
            'New England'   : (0, 0),
            'New Orleans'   : (0, 0),
            'Philadelphia'  : (0, 0),
            'Pittsburgh'    : (0, 0),
            'San Francisco' : (0, 0),
            'Seattle'       : (0, 0),
            'Tampa Bay'     : (0, 0),
            'Tennessee'     : (0, 0),
            'Washington'    : (0, 0)
        }

adj_df = pd.DataFrame.from_dict(adj_dict, orient='index', columns=['off_adj', 'def_adj'])
adj_df.sort_index(inplace=True, ascending=True)
quick_tmp_df = main_df.merge(adj_df, left_index=True, right_index=True)
quick_tmp_df.head()

Unnamed: 0_level_0,pre_off_ypp,curr_off_ypp,pre_def_ypp,curr_def_ypp,pre_off_tot_ypp,curr_off_tot_ypp,pre_def_tot_ypp,curr_def_tot_ypp,pre_off_pass_pct,curr_off_pass_pct,...,pre_pts_from_ints,curr_pts_from_ints,pre_sos_z_score,curr_sos_z_score,pre_off_comb,curr_off_comb,pre_def_comb,curr_def_comb,off_adj,def_adj
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arizona,6.007692,6.569908,7.014286,6.467992,5.161538,5.543627,5.671429,5.341112,0.605262,0.571512,...,1.647716,1.83314,-0.312996,-1.019621,5.426989,5.776792,5.94173,5.679221,0,0
Atlanta,7.194737,7.06054,7.005263,7.378374,5.784211,5.654806,5.736842,6.086137,0.641468,0.631967,...,1.703436,1.789074,2.190824,2.215495,5.920262,5.74917,5.93922,6.136586,0,0
Baltimore,6.846154,6.66353,6.0,5.909567,5.776923,5.794044,5.028571,5.093166,0.481931,0.457944,...,2.051414,1.931427,-0.016501,0.219446,6.16708,6.176218,5.263354,5.23389,0,0
Buffalo,6.023077,7.162951,5.905263,6.449266,5.046154,5.782322,4.984211,5.466395,0.544208,0.600465,...,1.746537,1.866186,-0.98871,0.233265,5.353523,5.944506,5.23003,5.679558,0,0
Carolina,6.009524,6.862187,6.957143,6.537716,5.252381,5.524191,5.8,5.616501,0.626152,0.597962,...,1.730232,1.700453,0.770246,0.068497,5.49289,5.801044,6.108527,5.777067,0,0


### Calculate the Adjustments

In [25]:
main_df = quick_tmp_df.copy()

off_adj, def_adj = main_df['off_adj'], main_df['def_adj']
off_mean, def_mean = np.mean(off_adj), np.mean(def_adj)
off_std, def_std = np.std(off_adj), np.std(def_adj)
diff = off_mean + def_mean

num_list = [diff, off_mean, def_mean, off_std, def_std]
rounded_nums = [round(num, 3) for num in num_list]
print(*rounded_nums)

0.0 0.0 0.0 0.0 0.0


### Add in Home Stadium Info

In [26]:
home_stadium_dict = {
            'Arizona'       : ('dome', 'grass', 'nfc_west'),
            'Atlanta'       : ('dome', 'turf', 'nfc_south'),
            'Baltimore'     : ('mid', 'grass', 'afc_north'),
            'Buffalo'       : ('cold', 'turf', 'afc_east'),
            'Carolina'      : ('warm', 'grass', 'nfc_south'),
            'Chicago'       : ('cold', 'grass', 'nfc_north'),
            'Cincinnati'    : ('mid', 'turf', 'afc_north'),
            'Cleveland'     : ('cold', 'grass', 'afc_north'),
            'Dallas'        : ('dome', 'turf', 'nfc_east'),
            'Denver'        : ('cold', 'grass', 'afc_west'),
            'Detroit'       : ('dome', 'turf', 'nfc_north'),
            'Green Bay'     : ('cold', 'grass', 'nfc_north'),
            'Houston'       : ('dome', 'turf', 'afc_south'),
            'Indianapolis'  : ('dome', 'turf', 'afc_south'),
            'Jacksonville'  : ('warm', 'grass', 'afc_south'),
            'Kansas City'   : ('mid', 'grass', 'afc_west'),
            'LA Chargers'   : ('dome', 'turf', 'afc_west'),
            'LA Rams'       : ('dome', 'turf', 'nfc_west'),
            'Las Vegas'     : ('dome', 'turf', 'afc_west'),
            'Miami'         : ('warm', 'grass', 'afc_east'),
            'Minnesota'     : ('dome', 'turf', 'nfc_north'),
            'NY Giants'     : ('cold', 'turf', 'nfc_east'),
            'NY Jets'       : ('cold', 'turf', 'afc_east'),
            'New England'   : ('cold', 'turf', 'afc_east'),
            'New Orleans'   : ('dome', 'turf', 'nfc_south'),
            'Philadelphia'  : ('cold', 'grass', 'nfc_east'),
            'Pittsburgh'    : ('cold', 'grass', 'afc_north'),
            'San Francisco' : ('warm', 'grass', 'nfc_west'),
            'Seattle'       : ('mid', 'turf', 'nfc_west'),
            'Tampa Bay'     : ('warm', 'grass', 'nfc_south'),
            'Tennessee'     : ('mid', 'grass', 'afc_south'),
            'Washington'    : ('mid', 'grass', 'nfc_east')
}

main_df['home_weather'] = [tupl[0] for tupl in home_stadium_dict.values()]
main_df['home_surface'] = [tupl[1] for tupl in home_stadium_dict.values()]
main_df['division'] = [tupl[2] for tupl in home_stadium_dict.values()]
main_df.head()

Unnamed: 0_level_0,pre_off_ypp,curr_off_ypp,pre_def_ypp,curr_def_ypp,pre_off_tot_ypp,curr_off_tot_ypp,pre_def_tot_ypp,curr_def_tot_ypp,pre_off_pass_pct,curr_off_pass_pct,...,curr_sos_z_score,pre_off_comb,curr_off_comb,pre_def_comb,curr_def_comb,off_adj,def_adj,home_weather,home_surface,division
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arizona,6.007692,6.569908,7.014286,6.467992,5.161538,5.543627,5.671429,5.341112,0.605262,0.571512,...,-1.019621,5.426989,5.776792,5.94173,5.679221,0,0,dome,grass,nfc_west
Atlanta,7.194737,7.06054,7.005263,7.378374,5.784211,5.654806,5.736842,6.086137,0.641468,0.631967,...,2.215495,5.920262,5.74917,5.93922,6.136586,0,0,dome,turf,nfc_south
Baltimore,6.846154,6.66353,6.0,5.909567,5.776923,5.794044,5.028571,5.093166,0.481931,0.457944,...,0.219446,6.16708,6.176218,5.263354,5.23389,0,0,mid,grass,afc_north
Buffalo,6.023077,7.162951,5.905263,6.449266,5.046154,5.782322,4.984211,5.466395,0.544208,0.600465,...,0.233265,5.353523,5.944506,5.23003,5.679558,0,0,cold,turf,afc_east
Carolina,6.009524,6.862187,6.957143,6.537716,5.252381,5.524191,5.8,5.616501,0.626152,0.597962,...,0.068497,5.49289,5.801044,6.108527,5.777067,0,0,warm,grass,nfc_south


### Dome/Field Coefficients

In [27]:
dome_factor_dict = {
    'off': {
        'dome'     : -0.06,
        'non_dome' : 0.06
    },
    'def': {
        'dome'     : -0.03,
        'non_dome' : 0.03
    }
}

In [28]:
main_df['dome'] = main_df['home_weather'].apply(lambda x: 'dome' if x=='dome' else 'non_dome')
main_df.drop('home_weather', axis=1, inplace=True)
tmp_master_df = main_df.copy()
main_df.head()

Unnamed: 0_level_0,pre_off_ypp,curr_off_ypp,pre_def_ypp,curr_def_ypp,pre_off_tot_ypp,curr_off_tot_ypp,pre_def_tot_ypp,curr_def_tot_ypp,pre_off_pass_pct,curr_off_pass_pct,...,curr_sos_z_score,pre_off_comb,curr_off_comb,pre_def_comb,curr_def_comb,off_adj,def_adj,home_surface,division,dome
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arizona,6.007692,6.569908,7.014286,6.467992,5.161538,5.543627,5.671429,5.341112,0.605262,0.571512,...,-1.019621,5.426989,5.776792,5.94173,5.679221,0,0,grass,nfc_west,dome
Atlanta,7.194737,7.06054,7.005263,7.378374,5.784211,5.654806,5.736842,6.086137,0.641468,0.631967,...,2.215495,5.920262,5.74917,5.93922,6.136586,0,0,turf,nfc_south,dome
Baltimore,6.846154,6.66353,6.0,5.909567,5.776923,5.794044,5.028571,5.093166,0.481931,0.457944,...,0.219446,6.16708,6.176218,5.263354,5.23389,0,0,grass,afc_north,non_dome
Buffalo,6.023077,7.162951,5.905263,6.449266,5.046154,5.782322,4.984211,5.466395,0.544208,0.600465,...,0.233265,5.353523,5.944506,5.23003,5.679558,0,0,turf,afc_east,non_dome
Carolina,6.009524,6.862187,6.957143,6.537716,5.252381,5.524191,5.8,5.616501,0.626152,0.597962,...,0.068497,5.49289,5.801044,6.108527,5.777067,0,0,grass,nfc_south,non_dome


### DataFrame of Teams Ranked by Exp Margin

In [31]:
# Metrics
off_metric = 'off_ypp'
def_metric = 'def_ypp'

# Range Len of Min-Max Scale
range_margin = 18.0

# SOS factor
sos_lr = 0.2

# Multiply by SOS factor
pre_sos_scaled = main_df['pre_sos_z_score'] * sos_lr
curr_sos_scaled = main_df['curr_sos_z_score'] * sos_lr
main_df['pre_sos_score'] = pre_sos_scaled
main_df['curr_sos_score'] = curr_sos_scaled

pre_ppg_mean, pre_oppg_mean = main_df['pre_off_ppg'].mean(), main_df['pre_def_ppg'].mean()
pre_ppg_std, pre_oppg_std = main_df['pre_off_ppg'].std(), main_df['pre_def_ppg'].std()
curr_ppg_mean, curr_oppg_mean = main_df['curr_off_ppg'].mean(), main_df['curr_def_ppg'].mean()
curr_ppg_std, curr_oppg_std = main_df['curr_off_ppg'].std(), main_df['curr_def_ppg'].std()

pre_off_metric_mean, pre_def_metric_mean = main_df[f'pre_{off_metric}'].mean(), main_df[f'pre_{def_metric}'].mean()
pre_metric_std = main_df[f'pre_{off_metric}'].std()
curr_off_metric_mean, curr_def_metric_mean = main_df[f'curr_{off_metric}'].mean(), main_df[f'curr_{def_metric}'].mean()
curr_metric_std = main_df[f'curr_{off_metric}'].std()

pre_off_z_scores = (main_df[f'pre_{off_metric}'] - pre_off_metric_mean) / pre_metric_std
pre_def_z_scores = (main_df[f'pre_{def_metric}'] - pre_def_metric_mean) / pre_metric_std
curr_off_z_scores = (main_df[f'curr_{off_metric}'] - curr_off_metric_mean) / curr_metric_std
curr_def_z_scores = (main_df[f'curr_{def_metric}'] - curr_def_metric_mean) / curr_metric_std

home_dome_off_adj = main_df['dome'].apply(lambda x: dome_factor_dict['off'][x])
home_dome_def_adj = main_df['dome'].apply(lambda x: dome_factor_dict['def'][x])

main_df['pre_off_score'] = pre_off_z_scores + home_dome_off_adj + (pre_sos_scaled / 2)
main_df['pre_def_score'] = pre_def_z_scores + home_dome_off_adj - (pre_sos_scaled / 2)
main_df['curr_off_score'] = curr_off_z_scores + main_df['off_adj'] + home_dome_off_adj + (curr_sos_scaled / 2)
main_df['curr_def_score'] = curr_def_z_scores + main_df['def_adj'] + home_dome_def_adj - (curr_sos_scaled / 2)

pre_off_exp_points = (main_df['pre_off_score'] * pre_ppg_std) + pre_ppg_mean
pre_def_exp_points = (main_df['pre_def_score'] * pre_oppg_std) + pre_oppg_mean
curr_off_exp_points = (main_df['curr_off_score'] * curr_ppg_std) + curr_ppg_mean
curr_def_exp_points = (main_df['curr_def_score'] * curr_oppg_std) + curr_oppg_mean

pre_off_exp_points -= main_df['pre_pts_lst_ints']
pre_def_exp_points -= main_df['pre_pts_from_ints']
curr_off_exp_points -= main_df['curr_pts_lst_ints']
curr_def_exp_points -= main_df['curr_pts_from_ints']

main_df['pre_off_exp'] = pre_off_exp_points
main_df['pre_def_exp'] = pre_def_exp_points
main_df['curr_off_exp'] = curr_off_exp_points
main_df['curr_def_exp'] = curr_def_exp_points

# Scale Up to YPP
main_df['pre_exp_margin'] = main_df['pre_off_exp'] - main_df['pre_def_exp']
main_df['curr_exp_margin'] = main_df['curr_off_exp'] - main_df['curr_def_exp']

main_df['off_delta'] = (main_df['curr_off_exp'] - main_df['pre_off_exp'])
main_df['def_delta'] = (main_df['curr_def_exp'] - main_df['pre_def_exp'])
main_df['ovr_delta'] = (main_df['curr_exp_margin'] - main_df['pre_exp_margin'])

new_df = main_df[['curr_exp_margin', 'pre_exp_margin', 'ovr_delta', 'curr_off_score', f'curr_{off_metric}',
                  'curr_def_score', f'curr_{def_metric}', 'pre_off_exp', 'curr_off_exp', 'off_delta',
                  'pre_def_exp', 'curr_def_exp', 'def_delta']]

new_df = new_df.apply(lambda x: round(x, 2))

In [32]:
sort_by = 'curr_exp_margin'

new_df.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,curr_exp_margin,pre_exp_margin,ovr_delta,curr_off_score,curr_off_ypp,curr_def_score,curr_def_ypp,pre_off_exp,curr_off_exp,off_delta,pre_def_exp,curr_def_exp,def_delta
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Kansas City,8.58,9.36,-0.77,1.94,7.85,-0.56,6.6,28.59,29.51,0.92,19.23,20.92,1.69
LA Rams,7.12,4.22,2.9,0.31,6.93,-1.96,5.72,24.18,23.46,-0.72,19.96,16.34,-3.62
Tampa Bay,6.11,5.33,0.77,1.21,7.45,-0.72,6.44,26.94,26.6,-0.33,21.6,20.5,-1.11
New Orleans,5.86,5.69,0.17,0.7,7.25,-0.98,6.28,26.22,25.35,-0.87,20.53,19.48,-1.05
San Francisco,5.81,7.35,-1.54,0.74,7.15,-1.09,6.2,25.72,25.12,-0.6,18.37,19.31,0.94
Green Bay,5.4,-2.76,8.15,1.29,7.53,-0.31,6.67,20.12,27.4,7.28,22.87,22.0,-0.87
Baltimore,4.24,4.75,-0.52,-0.14,6.66,-1.46,5.91,21.91,22.18,0.27,17.16,17.95,0.78
Tennessee,4.06,3.98,0.08,1.28,7.47,0.13,7.0,24.58,27.41,2.83,20.6,23.35,2.75
Buffalo,3.89,-0.92,4.81,0.65,7.16,-0.6,6.45,16.37,24.76,8.39,17.28,20.87,3.59
Houston,2.03,-0.75,2.78,1.69,7.85,1.07,7.58,22.32,28.86,6.53,23.08,26.83,3.75


### Predict Score Func

In [33]:
def predict_score(away_team, home_team, df, metric='comb', adj_dict=adj_dict, sos_lr=0.3, home_adv=2.0,
                  dome_factor_dict=dome_factor_dict, weather='std', off_offset=0, def_offset=0,
                  vegas_spread=None, vegas_total=None):
    
    off_col = 'curr_off_' + metric
    def_col = 'curr_def_' + metric
    
    # Calculate Means and STDs
    ppg_mean, oppg_mean = df['curr_off_ppg'].mean(), df['curr_def_ppg'].mean()
    ppg_std = df['curr_off_ppg'].std()
    off_metric_mean, def_metric_mean = df[off_col].mean(), df[def_col].mean()
    metric_std = df[off_col].std()
    
    # Get Offensive/Defensive Stats
    away_team_off, away_team_def = df.loc[away_team, off_col], df.loc[away_team, def_col]
    home_team_off, home_team_def = df.loc[home_team, off_col], df.loc[home_team, def_col]
    
    # Get SOS Adjustments
    away_sos = (df.loc[away_team, 'curr_sos_z_score'] * sos_lr) / 2
    home_sos = (df.loc[home_team, 'curr_sos_z_score'] * sos_lr) / 2
    
    # Calculate Z Scores and Add Adjustments
    off_a_z_score = ((away_team_off - off_metric_mean) / metric_std) + adj_dict[away_team][0] + away_sos
    def_a_z_score = ((away_team_def - def_metric_mean) / metric_std) + adj_dict[away_team][1] - away_sos
    off_b_z_score = ((home_team_off - off_metric_mean) / metric_std) + adj_dict[home_team][0] + home_sos
    def_b_z_score = ((home_team_def - def_metric_mean) / metric_std) + adj_dict[home_team][1] - home_sos
    
    # Norm. for HFA Factors: Surface and Dome
    away_surface = df.loc[away_team, 'home_surface']
    home_surface = df.loc[home_team, 'home_surface']
    away_dome = df.loc[away_team, 'dome']
    home_dome = df.loc[home_team, 'dome']
    
    off_a_z_score += dome_factor_dict['off'][away_dome]
    def_a_z_score += dome_factor_dict['def'][away_dome]
    off_b_z_score += dome_factor_dict['off'][home_dome]
    def_b_z_score += dome_factor_dict['def'][home_dome]
    
    # Add in Dome/Turf Boost
    dome_boost_off_factor = sum([abs(val) for val in dome_factor_dict['off'].values()])
    dome_boost_def_factor = sum([abs(val) for val in dome_factor_dict['def'].values()])
    
    if home_dome == 'dome':
        off_a_z_score += dome_boost_off_factor
        def_a_z_score += dome_boost_def_factor
        off_b_z_score += dome_boost_off_factor
        def_b_z_score += dome_boost_def_factor
    
    # Calculate Points for Each Team
    away_team_exp_pts = ((off_a_z_score + def_b_z_score) * ppg_std) + ppg_mean
    home_team_exp_pts = ((off_b_z_score + def_b_z_score) * ppg_std) + ppg_mean
    
    # Interception Points
    away_pts_lst_ints = df.loc[away_team, 'curr_pts_lst_ints']
    away_pts_from_ints = df.loc[away_team, 'curr_pts_from_ints']
    home_pts_lst_ints = df.loc[home_team, 'curr_pts_lst_ints']
    home_pts_from_ints = df.loc[home_team, 'curr_pts_from_ints']

    away_team_exp_pts += away_pts_from_ints - away_pts_lst_ints
    home_team_exp_pts += home_pts_from_ints - home_pts_lst_ints
    
    # Add in Offset
    offset = off_offset - def_offset
    away_team_exp_pts -= (offset / 2)
    home_team_exp_pts -= (offset / 2)
    
    # Weather Dictionary
    weather_dict = {
                        'std' :      1.01,
                        'cld' :      0.98,
                        'windy' :    0.97,
                        'iffy':      0.94,
                        'bad' :      0.91,
                        'severe' :   0.85
                   }
    
    # Calculate Weather
    weather_factor = (weather_dict[weather] + 1) / 2
    away_team_exp_pts *= weather_factor
    home_team_exp_pts *= weather_factor
    
    first_margin = home_team_exp_pts - away_team_exp_pts # keeps track of HFA change: 1st part
    
    home_adv_factor = home_adv / ppg_mean
    half_hfa_factor = (home_adv_factor / 2) + 1
    
    away_team_exp_pts /= half_hfa_factor
    home_team_exp_pts *= half_hfa_factor
    
    second_margin = home_team_exp_pts - away_team_exp_pts # keeps track of HFA change: 2nd part
    hfa = second_margin - first_margin
    
    func_return_list = spread_total_calc(away_team, home_team, away_team_exp_pts, home_team_exp_pts,
                                         vegas_spread=vegas_spread, vegas_total=vegas_total)
    
    winner, loser, home_margin, spread_winner, spread_margin, total_side, total_margin = func_return_list
    
    # SET ROUNDER FUNCTION
    rounder = lambda x: round(x, 2)
    
    # TRUE SCORE AND TRUE SPREAD
    print(f'\n{away_team} is expected to score {rounder(away_team_exp_pts)}, while ' +
          f'{home_team} is expected to score {rounder(home_team_exp_pts)}.\n')
    print(f'True spread: {winner.upper()} -{rounder(abs(home_margin))}')
    
    # TRUE O/U
    if total_side:
        if total_side == 'OVER':
            true_total = vegas_total + total_margin
        else:
            true_total = vegas_total - total_margin
        print(f'True total: {rounder(true_total)}\n')
    
    # SPREAD MARGIN AND O/U MARGIN
    v_spread = rounder(vegas_spread)
    plus_minus = '+' if v_spread >=0 else '-'
    
    if spread_winner == home_team:
        print(f'Spread margin: {spread_winner.upper()} {plus_minus}{abs(rounder(v_spread))} to ' +
              f'cover by {spread_margin}')
        print(f'O/U margin: {total_side}  by {rounder(total_margin)} pts ({rounder(vegas_total)})')
        
    else:
        if plus_minus == '+':
            plus_minus = '-'
        else:
            plus_minus = '+'
        print(f'Spread margin: {spread_winner.upper()} {plus_minus}{abs(rounder(vegas_spread))} to ' +
              f'cover by {spread_margin}')
        print(f'O/U margin: {total_side} by {rounder(total_margin)} pts ({rounder(vegas_total)})')
    
    # ANCILLARY INFORMATION
    print(f'Adj. HFA: {rounder(hfa)} ({rounder(100 * (second_margin - first_margin)/home_adv)}%)')
    print(f'{away_team} SOS Adj. Pts.: {rounder((away_sos * 2) * ppg_std)}')
    print(f'{home_team} SOS Adj. Pts.: {rounder((home_sos * 2) * ppg_std)}')
    print('\n\n')
    
    
def spread_total_calc(away_team, home_team, away_exp_pts, home_exp_pts, vegas_spread=None, vegas_total=None):
    
    rounder = lambda x: round(x, 2) # rounding function for aesthetic stdout
    
    home_margin = home_exp_pts - away_exp_pts
    
    # Get winner and loser
    if home_margin >= 0:
        winner, loser = home_team, away_team       
    else:
        winner, loser = away_team, home_team
    
    # Get spread winner and margin
    if vegas_spread:
        home_spread_margin = rounder(home_margin + vegas_spread)
        
        if home_spread_margin > 0:
            spread_winner = home_team
            spread_margin = home_spread_margin
            
        elif home_spread_margin == 0:
            spread_winner = 'PUSH'
            spread_margin = 0
            
        else:
            spread_winner = away_team
            spread_margin = abs(home_spread_margin)
    
    else:
        spread_winner, spread_margin = None, None
    
    # Get total side and margin
    if vegas_total:
        
        total_pts = home_exp_pts + away_exp_pts
        over_margin = total_pts - vegas_total
        if over_margin > 0:
            total_side = 'OVER'
            total_margin = over_margin
        elif over_margin == 0:
            total_side = 'PUSH'
            total_margin = 0
        else:
            total_side = 'UNDER'
            total_margin = abs(over_margin)
    
    else:
        total_side, total_margin = None, None
    
    return [winner, loser, home_margin, spread_winner, spread_margin, total_side, total_margin]

#### Team Lookup Dict

In [34]:
team_name_dict = {
            'Arizona'       : 'crd',
            'Atlanta'       : 'atl',
            'Baltimore'     : 'rav',
            'Buffalo'       : 'buf',
            'Carolina'      : 'car',
            'Chicago'       : 'chi',
            'Cincinnati'    : 'cin',
            'Cleveland'     : 'cle',
            'Dallas'        : 'dal',
            'Denver'        : 'den',
            'Detroit'       : 'det',
            'Green Bay'     : 'gnb',
            'Houston'       : 'htx',
            'Indianapolis'  : 'clt',
            'Jacksonville'  : 'jax',
            'Kansas City'   : 'kan',
            'LA Chargers'   : 'sdg',
            'LA Rams'       : 'ram',
            'Las Vegas'     : 'rai',
            'Miami'         : 'mia',
            'Minnesota'     : 'min',
            'NY Giants'     : 'nyg',
            'NY Jets'       : 'nyj',
            'New England'   : 'nwe',
            'New Orleans'   : 'nor',
            'Philadelphia'  : 'phi',
            'Pittsburgh'    : 'pit',
            'San Francisco' : 'sfo',
            'Seattle'       : 'sea',
            'Tampa Bay'     : 'tam',
            'Tennessee'     : 'oti',
            'Washington'    : 'was',
        }

reverse_name_dict = {val: key for key, val in team_name_dict.items()}

### Games of the Week

In [35]:
game_list = [
                'kan tam 0.25 std 3.5 56.5'
]

### Predictions

In [36]:
for game in game_list:
    
    split_game = game.split()
    away_init, home_init = split_game[0], split_game[1]
    away_team, home_team = reverse_name_dict[away_init], reverse_name_dict[home_init]
    home_adv, weather = float(split_game[2]), split_game[3]
    vegas_spread, vegas_total = float(split_game[4]), float(split_game[5])

    # SET PARAMETERS
    metric = 'ypp'
    sos_lr = 0.2
    off_offset = 0
    def_offset = 0
    
    predict_score(away_team, home_team, main_df, metric=metric, adj_dict=adj_dict, sos_lr=sos_lr,
                  home_adv=home_adv, off_offset=off_offset, def_offset=def_offset, weather=weather,
                  vegas_spread=vegas_spread, vegas_total=vegas_total)


Kansas City is expected to score 28.79, while Tampa Bay is expected to score 26.04.

True spread: KANSAS CITY -2.75
True total: 54.84

Spread margin: TAMPA BAY +3.5 to cover by 0.75
O/U margin: UNDER  by 1.66 pts (56.5)
Adj. HFA: 0.28 (113.18%)
Kansas City SOS Adj. Pts.: 1.6
Tampa Bay SOS Adj. Pts.: 0.96



