## Imports

In [1]:
import pandas as pd
import numpy as np
import nfl_data_py as nfl

## Data And Parameters

In [2]:
## modeling input paramters
K = 20  ##max elo change per game
HFA = 50  ##home field advantage
num_simulations = 10000
starting_seed = 1

In [3]:
## game level data
seasons = [2025]
schedule_df = nfl.import_schedules(years=seasons)
schedule_df = schedule_df[schedule_df['game_type']=='REG']
schedule_df=schedule_df[['week','away_team', 'home_team','result']]

In [4]:
## elo data
elo = pd.read_csv('elo_2025.csv')  ## elo pulled from https://www.nfeloapp.com/nfl-power-ratings/
teams_df = elo.copy()
teams_df = teams_df[['Team', 'nfelo']]
#make team names consistent
teams_df['Team'] = teams_df['Team'].replace({'LAR': 'LA', 'OAK': 'LV'})

## Function Definitions

In [5]:
def simulate_full_season(teams_df=teams_df.copy(), schedule_df=schedule_df.copy(), K=K, HFA=HFA, seed=starting_seed):
    """
    Input:
        teams_df: elo and team name
        schedule_df: week, home_team, away_team
        K: elo change per game
        HFA: home field advantage
        seed: for randomization

    Output:
        Dataframe: 1 row for each game throughout the entire season
            week. home_team, away_team, home_elo_before, away_elo_before, home_elo_after, away_elo_after, home_win_prob, away_win_prob, simulated_winner

    Methodology:
        Generates a record for each game.  Generates odds of each team winning, and simulates a winner based on those odds
        Updates ELO for both teams after the game, so they can be used in future games, allowing sull season to be simulated at a time
    """
    np.random.seed(seed)
    elo_ratings = teams_df.set_index('Team')['nfelo'].to_dict()
    all_game_results = []

    for week in sorted(schedule_df['week'].unique()):
        week_games = schedule_df[schedule_df['week'] == week]

        for df_index, df_game in week_games.iterrows():
            home_elo = elo_ratings[df_game['home_team']]
            away_elo = elo_ratings[df_game['away_team']]

            home_prob = 1/(1+10 **((away_elo-home_elo+HFA)/400))

            #simulate a winner
            winner = df_game['home_team'] if np.random.rand() < home_prob else df_game['away_team']

            if winner==df_game['home_team']:
                new_home_elo = home_elo + K * (1-home_prob)
                new_away_elo = away_elo + K * (home_prob-1)
            else:
                new_home_elo = home_elo + K * (0-home_prob)
                new_away_elo = away_elo + K * (home_prob-0)
            
            elo_ratings[df_game['home_team']] = new_home_elo
            elo_ratings[df_game['away_team']] = new_away_elo

            all_game_results.append({
                'week': week,
                'home_team': df_game['home_team'],
                'away_team': df_game['away_team'],
                'home_elo_before': home_elo,
                'away_elo_before': away_elo,
                'home_elo_after': new_home_elo,
                'away_elo_after': new_away_elo,
                'home_win_prob': home_prob,
                'away_win_prob': (1-home_prob),
                'simulated_winner': winner})
            
    return pd.DataFrame(all_game_results)

In [6]:
single_season_simulation = simulate_full_season(teams_df,schedule_df, 20, 50, 5)
single_season_simulation.head(5)

Unnamed: 0,week,home_team,away_team,home_elo_before,away_elo_before,home_elo_after,away_elo_after,home_win_prob,away_win_prob,simulated_winner
0,1,PHI,DAL,1711.34,1434.14,1715.596875,1429.883125,0.787156,0.212844,PHI
1,1,LAC,KC,1624.07,1626.31,1615.562359,1634.817641,0.425382,0.574618,KC
2,1,ATL,TB,1509.63,1581.23,1522.993693,1567.866307,0.331815,0.668185,ATL
3,1,CLE,CIN,1374.49,1617.08,1371.359418,1620.210582,0.156529,0.843471,CIN
4,1,IND,MIA,1459.33,1482.06,1451.393287,1489.996713,0.396836,0.603164,MIA


In [7]:
def global_survivor_path(simulated_season_df, win_metric='home_win_prob'):
    """
    Input:
        simulated_season_df: output of simulate_full_season
        win_metric: what metric you want to use to calculate path
            home_win_prob: maximze win probability, emphasis on home
            home_elo_difference: maximize elo difference between teams, emphasis on home
            home_elo_before: maximize elo before game.  emphasis on home
    
    Output:
        list of teams in order of week selected.  None is first entry for week 0

    Methodology:
        Scans the season looking for the highest value of metric.
        Then that winning team is added to used_teams, and that week is dropped from data
        Then keep iterating through until a team is selected for each week

    """
    survivor_path = [None] * (19)
    used_teams = set()
    remaining_games = simulated_season_df.copy()

    while remaining_games['week'].nunique()>0:
        if win_metric == 'home_win_prob':
            iteration_df = remaining_games.copy()
            iteration_df_1 = iteration_df[iteration_df['simulated_winner'] == iteration_df['home_team']]
            if not iteration_df_1.empty:
                iteration_df_2 = iteration_df_1[iteration_df_1['home_win_prob'] == iteration_df_1['home_win_prob'].max()]
                win_team = iteration_df_2['home_team'].iloc[0]
                win_week = iteration_df_2['week'].iloc[0]
                survivor_path[win_week] = iteration_df_2['simulated_winner'].iloc[0]
                used_teams.add(win_team)
                remaining_games = remaining_games[remaining_games['week']!=win_week]
                remaining_games = remaining_games[remaining_games['simulated_winner']!=win_team]
            else:
                iteration_df_1 = iteration_df[iteration_df['simulated_winner'] == iteration_df['away_team']]
                iteration_df_2 = iteration_df[iteration_df['away_win_prob'] == iteration_df['away_win_prob'].max()]
                win_team = iteration_df_2['away_win_prob'].iloc[0]
                win_week = iteration_df_2['week'].iloc[0]
                survivor_path[win_week] = iteration_df_2['simulated_winner'].iloc[0]
                used_teams.add(win_team)
                remaining_games = remaining_games[remaining_games['week']!=win_week]
                remaining_games = remaining_games[remaining_games['simulated_winner']!=win_team]

        
        if win_metric == 'home_elo_difference':
            iteration_df = remaining_games.copy()
            iteration_df_1 = iteration_df[iteration_df['simulated_winner'] == iteration_df['home_team']]
            if not iteration_df_1.empty:
                iteration_df_2 = iteration_df_1[(iteration_df_1['home_elo_before']-iteration_df_1['away_elo_before']) == (iteration_df_1['home_elo_before'] - iteration_df_1['away_elo_before']).max()]
                win_team = iteration_df_2['home_team'].iloc[0]
                win_week = iteration_df_2['week'].iloc[0]
                survivor_path[win_week] = iteration_df_2['simulated_winner'].iloc[0]
                used_teams.add(win_team)
                remaining_games = remaining_games[remaining_games['week']!=win_week]
                remaining_games = remaining_games[remaining_games['simulated_winner']!=win_team]
            else:
                iteration_df_1 = iteration_df[iteration_df['simulated_winner'] == iteration_df['away_team']]
                iteration_df_2 = iteration_df_1[(iteration_df_1['home_elo_before']-iteration_df_1['away_elo_before']) == (iteration_df_1['home_elo_before'] - iteration_df_1['away_elo_before']).min()]
                win_team = iteration_df_2['away_win_prob'].iloc[0]
                win_week = iteration_df_2['week'].iloc[0]
                survivor_path[win_week] = iteration_df_2['simulated_winner'].iloc[0]
                used_teams.add(win_team)
                remaining_games = remaining_games[remaining_games['week']!=win_week]
                remaining_games = remaining_games[remaining_games['simulated_winner']!=win_team]
        
        if win_metric == 'home_elo_before':
            iteration_df = remaining_games.copy()
            iteration_df_1 = iteration_df[iteration_df['simulated_winner'] == iteration_df['home_team']]
            if not iteration_df_1.empty:
                iteration_df_2 = iteration_df_1[iteration_df_1['home_elo_before'] == iteration_df_1['home_elo_before'].max()]
                win_team = iteration_df_2['home_team'].iloc[0]
                win_week = iteration_df_2['week'].iloc[0]
                survivor_path[win_week] = iteration_df_2['simulated_winner'].iloc[0]
                used_teams.add(win_team)
                remaining_games = remaining_games[remaining_games['week']!=win_week]
                remaining_games = remaining_games[remaining_games['simulated_winner']!=win_team]
            else:
                iteration_df_1 = iteration_df[iteration_df['simulated_winner'] == iteration_df['away_team']]
                iteration_df_2 = iteration_df_1[iteration_df_1['away_elo_before'] == iteration_df_1['away_elo_before'].max()]
                win_team = iteration_df_2['away_win_prob'].iloc[0]
                win_week = iteration_df_2['week'].iloc[0]
                survivor_path[win_week] = iteration_df_2['simulated_winner'].iloc[0]
                used_teams.add(win_team)
                remaining_games = remaining_games[remaining_games['week']!=win_week]
                remaining_games = remaining_games[remaining_games['simulated_winner']!=win_team]

    return survivor_path

In [8]:
single_season_path = global_survivor_path(single_season_simulation, 'home_win_prob')
single_season_path

[None,
 'WAS',
 'CIN',
 'CHI',
 'NE',
 'BUF',
 'PIT',
 'DEN',
 'PHI',
 'GB',
 'HOU',
 'ARI',
 'DET',
 'MIA',
 'TB',
 'SEA',
 'BAL',
 'NYJ',
 'ATL']

In [9]:
def simulate_full_season_multiple(teams_df=teams_df.copy(), schedule_df=schedule_df.copy(), K=K, HFA=HFA, simulations=num_simulations, seed = starting_seed, win_metric='home_win_prob'):
    """
    Input:
        teams_df: elo and team name
        schedule_df: week, home_team, away_team
        K: elo change per game
        HFA: home field advantage
        simulations: number of seasons you want to simulate
        seed: for randomization
            win_metric: what metric you want to use to calculate path
            home_win_prob: maximze win probability, emphasis on home
            home_elo_difference: maximize elo difference between teams, emphasis on home
            home_elo_before: maximize elo before game.  emphasis on home

    Output:
        List for each season simualted: contains all game results and best path
            Dictionary 'sim': get the number of the simulation
            Dictionary 'sim_games': get the game data for each simulated game.  Same output as simulate_full_season
            Dictionary 'sim_path': list of the teams selected by week.  Same output as global_survivor_path
    Methodology:
        Runs the simulate_full_season function and then finds the best path for that season global_survivor_path
        Increase the seed by 1, so each simulation is of a different season
    """
    all_season_results = []
    np.random.seed()

    for sim in range(simulations):
        sim_seed = seed+sim
        sim_games = simulate_full_season(teams_df.copy(), schedule_df.copy(), K, HFA, seed=sim_seed)
        sim_path = global_survivor_path(sim_games.copy(), win_metric=win_metric)

        all_season_results.append({
            'sim': sim+1,
            'sim_games': sim_games,
            'sim_path': sim_path})
    return all_season_results

In [10]:
many_season_simulation = simulate_full_season_multiple(teams_df, schedule_df, 20, 50, 100, 1, 'home_win_prob')

In [11]:
## example single season output
many_season_simulation[0]

{'sim': 1,
 'sim_games':      week home_team away_team  home_elo_before  away_elo_before  \
 0       1       PHI       DAL      1711.340000      1434.140000   
 1       1       LAC        KC      1624.070000      1626.310000   
 2       1       ATL        TB      1509.630000      1581.230000   
 3       1       CLE       CIN      1374.490000      1617.080000   
 4       1       IND       MIA      1459.330000      1482.060000   
 ..    ...       ...       ...              ...              ...   
 267    18       NYG       DAL      1348.342838      1461.049593   
 268    18       PHI       WAS      1737.672307      1583.519082   
 269    18       PIT       BAL      1566.018090      1757.074250   
 270    18        SF       SEA      1485.213707      1543.367708   
 271    18        TB       CAR      1514.865644      1319.272745   
 
      home_elo_after  away_elo_after  home_win_prob  away_win_prob  \
 0       1715.596875     1429.883125       0.787156       0.212844   
 1       1615.5623

In [12]:
## example of simulated season number
many_season_simulation[0]['sim']

1

In [13]:
## example of output of each game in simulated season
many_season_simulation[0]['sim_games']

Unnamed: 0,week,home_team,away_team,home_elo_before,away_elo_before,home_elo_after,away_elo_after,home_win_prob,away_win_prob,simulated_winner
0,1,PHI,DAL,1711.340000,1434.140000,1715.596875,1429.883125,0.787156,0.212844,PHI
1,1,LAC,KC,1624.070000,1626.310000,1615.562359,1634.817641,0.425382,0.574618,KC
2,1,ATL,TB,1509.630000,1581.230000,1522.993693,1567.866307,0.331815,0.668185,ATL
3,1,CLE,CIN,1374.490000,1617.080000,1371.359418,1620.210582,0.156529,0.843471,CIN
4,1,IND,MIA,1459.330000,1482.060000,1471.393287,1469.996713,0.396836,0.603164,IND
...,...,...,...,...,...,...,...,...,...,...
267,18,NYG,DAL,1348.342838,1461.049593,1342.711147,1466.681284,0.281585,0.718415,DAL
268,18,PHI,WAS,1737.672307,1583.519082,1744.761221,1576.430168,0.645554,0.354446,PHI
269,18,PIT,BAL,1566.018090,1757.074250,1562.022365,1761.069975,0.199786,0.800214,BAL
270,18,SF,SEA,1485.213707,1543.367708,1478.229829,1550.351586,0.349194,0.650806,SEA


In [14]:
## example of optimal survivor path for simulated season
many_season_simulation[0]['sim_path']

[None,
 'WAS',
 'CIN',
 'CHI',
 'BUF',
 'ARI',
 'DAL',
 'DEN',
 'PHI',
 'LA',
 'TB',
 'MIN',
 'DET',
 'LAC',
 'GB',
 'KC',
 'BAL',
 'TEN',
 'ATL']

In [15]:
def pull_path_from_sim(seasons_list):
    """
    Inputs:
        seasons_list: output of simulate_full_season_multiple

    Outputs: 
        dataframe: simulation number as index, each week is a column
            so 1 row for each simulation with the team selected for that week ina  column

    Methodology:
        Extract the best paths data.
        Then create dataframe of simulations to make for easier analysis
    """
    all_paths = []
    for i in range(len(seasons_list)):
        sim_path = seasons_list[i]['sim_path']
        all_paths.append(sim_path)
    paths_df = pd.DataFrame(all_paths)
    paths_df.index = [f'sim_{i+1}' for i in range(len(all_paths))]
    paths_df.columns = [f'week_{i}' for i in range(paths_df.shape[1])]
    paths_df.drop('week_0', axis=1, inplace=True)

    return paths_df

In [16]:
many_season_simulation_paths = pull_path_from_sim(many_season_simulation)
many_season_simulation_paths


Unnamed: 0,week_1,week_2,week_3,week_4,week_5,week_6,week_7,week_8,week_9,week_10,week_11,week_12,week_13,week_14,week_15,week_16,week_17,week_18
sim_1,WAS,CIN,CHI,BUF,ARI,DAL,DEN,PHI,LA,TB,MIN,DET,LAC,GB,KC,BAL,TEN,ATL
sim_2,WAS,MIA,SEA,LA,BUF,PIT,DEN,PHI,GB,TB,MIN,DET,NE,LV,KC,BAL,TEN,ATL
sim_3,WAS,CIN,SEA,HOU,BUF,PIT,DEN,PHI,GB,TB,ATL,DET,LAC,KC,CHI,BAL,NYJ,LA
sim_4,WAS,ARI,SEA,LAC,BUF,TB,DEN,PHI,LA,HOU,MIN,DET,MIA,KC,CHI,BAL,NYJ,CIN
sim_5,WAS,CIN,CHI,NE,BUF,PIT,DEN,PHI,GB,HOU,ARI,DET,MIA,TB,SEA,BAL,NYJ,ATL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sim_96,WAS,ARI,SEA,BUF,LA,PIT,DEN,PHI,GB,HOU,TEN,DET,LAC,TB,CHI,BAL,NYJ,CIN
sim_97,WAS,ARI,SEA,DET,BUF,PIT,LAC,PHI,GB,TB,LA,CIN,MIA,CLE,HOU,BAL,NYJ,ATL
sim_98,WAS,MIA,SEA,HOU,BUF,PIT,DEN,PHI,GB,TB,ATL,DET,LAC,KC,CHI,BAL,NYJ,CIN
sim_99,WAS,MIA,SEA,KC,BUF,PIT,DEN,PHI,LA,TB,ATL,DET,LAC,GB,SF,BAL,NYJ,MIN


## Running Simulations And Analysis

In [17]:
## week 1 picks over simulations
simulated_data_home_elo_before = simulate_full_season_multiple(teams_df.copy(), schedule_df.copy(), K, HFA, simulations=10000, seed = starting_seed, win_metric='home_elo_before')
simulated_paths_home_elo_before = pull_path_from_sim(simulated_data_home_elo_before)
simulated_paths_home_elo_before.value_counts('week_1')

week_1
DEN    1820
PHI    1778
WAS    1710
SEA     798
LAC     627
LA      576
IND     478
GB      439
ATL     419
JAX     346
NYJ     299
BUF     173
CHI     148
NE       67
NO       61
CLE      52
ARI      44
PIT      41
HOU      38
MIN      31
MIA      14
CIN      13
KC       11
TB       10
SF        4
DAL       2
LV        1
Name: count, dtype: int64

In [18]:
## exmaple simulation
simulated_single_path_home_elo_before = simulated_paths_home_elo_before.iloc[[0]]
simulated_single_path_home_elo_before

Unnamed: 0,week_1,week_2,week_3,week_4,week_5,week_6,week_7,week_8,week_9,week_10,week_11,week_12,week_13,week_14,week_15,week_16,week_17,week_18
sim_1,DEN,GB,LAC,BUF,BAL,LA,CHI,PIT,CIN,TB,MIN,HOU,DET,CLE,KC,SEA,WAS,PHI


In [19]:
## week 1 picks over simulations
simulated_data_home_win_prob = simulate_full_season_multiple(teams_df.copy(), schedule_df.copy(), K, HFA, simulations=10000, seed = 1, win_metric='home_win_prob')
simulated_paths_home_win_prob = pull_path_from_sim(simulated_data_home_win_prob)
simulated_paths_home_win_prob.value_counts('week_1')

week_1
WAS    7301
JAX    1003
PHI     459
DEN     240
IND     195
LA      134
LAC     118
ARI     101
SEA      98
NE       95
NYJ      48
ATL      36
NO       33
LV       27
CHI      27
GB       25
CLE      25
MIN      16
CIN      15
MIA       2
HOU       1
KC        1
Name: count, dtype: int64

In [20]:
## exmaple simulation
simulated_sinlge_path_home_win_prob = simulated_paths_home_win_prob.iloc[[0]]
simulated_sinlge_path_home_win_prob

Unnamed: 0,week_1,week_2,week_3,week_4,week_5,week_6,week_7,week_8,week_9,week_10,week_11,week_12,week_13,week_14,week_15,week_16,week_17,week_18
sim_1,WAS,CIN,CHI,BUF,ARI,DAL,DEN,PHI,LA,TB,MIN,DET,LAC,GB,KC,BAL,TEN,ATL


In [22]:
## week 1 picks over simulations
simulated_data_home_elo_difference = simulate_full_season_multiple(teams_df.copy(), schedule_df.copy(), K, HFA, simulations=10000, seed = 1, win_metric='home_elo_difference')
simulated_paths_home_elo_difference = pull_path_from_sim(simulated_data_home_elo_difference)
simulated_paths_home_elo_difference.value_counts('week_1')

week_1
WAS    7301
JAX    1003
PHI     459
DEN     240
IND     195
LA      134
LAC     118
ARI     101
SEA      98
NE       95
NYJ      48
ATL      36
NO       33
LV       27
CHI      27
GB       25
CLE      25
MIN      16
CIN      15
MIA       2
HOU       1
KC        1
Name: count, dtype: int64

In [23]:
## exmaple simulation
simulated_sinlge_path_home_elo_difference = simulated_paths_home_elo_difference.iloc[[0]]
simulated_sinlge_path_home_elo_difference

Unnamed: 0,week_1,week_2,week_3,week_4,week_5,week_6,week_7,week_8,week_9,week_10,week_11,week_12,week_13,week_14,week_15,week_16,week_17,week_18
sim_1,WAS,CIN,CHI,BUF,ARI,DAL,DEN,PHI,LA,TB,MIN,DET,LAC,GB,KC,BAL,TEN,ATL
