In [2]:
import pandas as pd
import networkx as nx
from collections import defaultdict

In [119]:
def create_graph(game_data, week_num):
    G = nx.DiGraph()
    margin_totals = defaultdict(float)
    game_counts = defaultdict(int)
    
    # Iterate over each game result
    for _, row in game_data.iterrows():
        winner = row['winner']
        loser = row['loser']
        week = row['week']
        # abs margin or percept, linear decay, reward away wins
        margin = row['margin_victory'] * week / week_num * (2 if not row['home_team_winner'] else 1) 

        key = (loser, winner)
        margin_totals[key] += margin
        game_counts[key] += 1

    # Add edges with average margin as weight
    for (loser, winner), total_margin in margin_totals.items():
        avg_margin = total_margin / game_counts[(loser, winner)]
        G.add_edge(loser, winner, weight=avg_margin)
    
    return G

In [53]:
# Function to calculate rankings based on the weighted graph
def calculate_rankings(graph):
    # Calculate PageRank with weights
    return pd.DataFrame(nx.pagerank(graph, weight='weight').items(), columns=['Team', 'Ranking'])

In [69]:
# Assume 'game_results' is your complete dataset of NFL game results
game_results = pd.read_csv('../data/nfl_games.csv')

In [120]:
# Initialize an empty list to store rankings DataFrames
ranking_dfs = []

# Loop over each distinct season in the dataset
for season in game_results['season'].unique():
    # Filter the game data for the current season
    season_data = game_results[game_results['season'] == season]
    
    # Loop over the weeks for this season
    for week in range(2, season_data['week'].max() + 1):
        # Filter the game data up to the current week for the current season
        filtered_data = season_data[season_data['week'] <= week]
        
        # Create the directed graph for the current season and weeks
        G = create_graph(filtered_data, week)
        
        # Calculate the rankings based on the weighted graph
        rankings = calculate_rankings(G)

        # Round the rankings to 4 decimal places
        rankings['Ranking'] = rankings['Ranking'].round(5)
        
        # Add columns indicating the season and week, with a leading zero for weeks
        rankings['SeasonWeek'] = f"{season}_W{str(week).zfill(2)}"
        rankings['Season'] = season
        rankings['Week'] = week
        
        # Append the rankings to the list
        ranking_dfs.append(rankings)

# Concatenate all rankings DataFrames into a single DataFrame
final_rankings_df = pd.concat(ranking_dfs)

# Create a cross-tab view (pivot table) where each row is a team, and each column is Season+Week
pivot_df = final_rankings_df.pivot(index='Team', columns='SeasonWeek', values='Ranking')

# Display the cross-tab DataFrame
print(final_rankings_df.head())
print(pivot_df.head())


  Team  Ranking SeasonWeek  Season  Week
0  PHI  0.03934   2024_W02    2024     2
1  ATL  0.04988   2024_W02    2024     2
2  CHI  0.01924   2024_W02    2024     2
3  HOU  0.03630   2024_W02    2024     2
4  DEN  0.01645   2024_W02    2024     2
SeasonWeek  2006_W02  2006_W03  2006_W04  2006_W05  2006_W06  2006_W07  \
Team                                                                     
ARI          0.05767   0.01694   0.00924   0.00897   0.00870   0.00762   
ATL          0.03417   0.02544   0.05557   0.05373   0.05325   0.03257   
BAL          0.03486   0.02559   0.02524   0.02340   0.04358   0.02593   
BUF          0.02926   0.02200   0.02135   0.01775   0.01950   0.03467   
CAR          0.01756   0.01424   0.05788   0.05823   0.05609   0.03767   

SeasonWeek  2006_W08  2006_W09  2006_W10  2006_W11  ...  2023_W11  2023_W12  \
Team                                                ...                       
ARI          0.00841   0.00782   0.00707   0.00591  ...   0.00798   0.00792  

In [14]:
# Output the results and pivot table to a CSV file
pivot_df.to_csv('../data/nfl_rankings_pivot.csv')
final_rankings_df.to_csv('../data/nfl_rankings.csv')

In [None]:
print(final_rankings_df.dtypes)
print(game_results.dtypes)

In [31]:
def get_rank(team, season, week, rankings_df):
    # Check if the week is 1 or if the team/season/week combination exists in the rankings
    if week == 1:
        return None  # No ranking data for week 1
    rank_row = rankings_df[(rankings_df['Team'] == team) & 
                           (rankings_df['Season'] == season) & 
                           (rankings_df['Week'] == week)]
    if rank_row.empty:
        return None  # Return None if no ranking data is available for this team/season/week
    return rank_row['Ranking'].values[0]

def predict_winner(team_1, team_2, rank_1, rank_2):
    # Handle cases where either rank_1 or rank_2 is None
    if rank_1 is None or rank_2 is None:
        # print(f"Warning: Missing ranking for {team_1} or {team_2}. Skipping prediction.")
        return None  # No prediction for this game if any rank is None
    
    # If both ranks are available, predict the winner
    return team_1 if rank_1 > rank_2 else team_2

In [125]:
correct_predictions = 0
total_games = 0

game_results['rank_1'] = None  # Ranking for team_1
game_results['rank_2'] = None  # Ranking for team_2
game_results['rank_diff'] = None  # Ranking for team_2
game_results['predicted_winner'] = None  # Predicted winner
game_results['correct'] = None  # Predicted winner

for index, row in game_results.iterrows():
    if row['week'] > 4:
        rank_1 = get_rank(row['Home Team Abbr'], row['season'], row['week'] - 1, final_rankings_df)
        rank_2 = get_rank(row['Away Team Abbr'], row['season'], row['week'] - 1, final_rankings_df)

        # Store the rankings in the DataFrame
        game_results.at[index, 'rank_1'] = rank_1
        game_results.at[index, 'rank_2'] = rank_2
        if rank_1 is not None and rank_2 is not None:
            rank_diff = round(rank_1 - rank_2,5)
            game_results.at[index, 'rank_diff'] = rank_diff
            if abs(rank_diff) < 0.05:
                continue
        else:
            game_results.at[index, 'rank_diff'] = None  # Or some default value like 0
            continue
        
        predicted_winner = predict_winner(row['Home Team Abbr'], row['Away Team Abbr'], rank_1, rank_2)
        game_results.at[index, 'predicted_winner'] = predicted_winner

        if predicted_winner is None:
            continue  # Skip this game if no prediction could be made

        actual_winner = row['winner']
        if actual_winner is None:
            continue  # Skip this game if no prediction could be made
        
        if predicted_winner == actual_winner:
            correct_predictions += 1
            game_results.at[index, 'correct'] = True
        else:
            game_results.at[index, 'correct'] = False
        total_games += 1 

accuracy = correct_predictions / total_games
print(f"Prediction accuracy: {accuracy:.2%}")
print(f"Games predicted: {total_games}")

Prediction accuracy: 70.29%
Games predicted: 441


In [39]:
# Output the results to a CSV file
game_results.to_csv('../data/nfl_games_predictions.csv')