In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from bs4 import BeautifulSoup
import requests
from scipy.stats import zscore

class Elo:
    WIN = 1.0
    DRAW = 0.5
    LOSS = 0.0
    K_FACTOR = 10
    INITIAL = 1200
    BETA = 200

    def __init__(self, k_factor=K_FACTOR, initial=INITIAL, beta=BETA):
        self.k_factor = k_factor
        self.initial = initial
        self.beta = beta

    def expect(self, rating, other_rating):
        """The 'E' function in Elo. Calculates the expected score of the first rating by the second rating."""
        diff = float(other_rating) - float(rating)
        f_factor = 2 * self.beta  # rating disparity
        return 1.0 / (1 + 10 ** (diff / f_factor))

    def adjust(self, rating, series):
        """Calculates the adjustment value based on a series of matches."""
        return sum(score - self.expect(rating, other_rating) for score, other_rating in series)

    def rate(self, rating, series):
        """Calculates a new rating based on the match results."""
        new_rating = float(rating) + self.k_factor * self.adjust(rating, series)
        return new_rating

    def rate_1vs1(self, rating1, rating2, drawn=False):
        """Calculates the new ratings for a one-on-one match."""
        scores = (self.DRAW, self.DRAW) if drawn else (self.WIN, self.LOSS)
        new_rating1 = self.rate(rating1, [(scores[0], rating2)])
        new_rating2 = self.rate(rating2, [(scores[1], rating1)])
        return new_rating1, new_rating2

elo_system = Elo()

In [6]:
# Load the CSV data
nfl = pd.read_csv("spreadspoke_scores.csv", parse_dates=['schedule_date'], dayfirst=True)
nfl = nfl.dropna(subset=['over_under_line'])
nfl['over_under_line'] = pd.to_numeric(nfl['over_under_line'], errors='coerce')

# Load team data
teams = pd.read_csv("nfl_teams.csv")
team_names = teams['team_name'].tolist()
team_ids = teams['team_id'].tolist()

# Assign team IDs to home and away teams
nfl['team_home_id'] = nfl['team_home'].apply(lambda x: teams.loc[teams['team_name'] == x, 'team_id'].values[0] if x in team_names else np.nan)
nfl['team_away_id'] = nfl['team_away'].apply(lambda x: teams.loc[teams['team_name'] == x, 'team_id'].values[0] if x in team_names else np.nan)

# Create a unique game ID
nfl['game_id'] = nfl.apply(lambda row: f"{row['schedule_date'].strftime('%Y%m%d')}{row['team_away_id']}{row['team_home_id']}", axis=1)

# Load stadium data
stadiums = pd.read_csv("nfl_stadiums.csv")
stadium_names = stadiums['stadium_name'].tolist()
stadium_types = stadiums['stadium_type'].tolist()

# Assign stadium types
nfl['stadium_type'] = nfl['stadium'].apply(lambda x: stadium_types[stadium_names.index(x)] if x in stadium_names else np.nan)

# Add columns for first and last week of the season
nfl['schedule_week_1'] = nfl['schedule_week'] == 1
nfl['schedule_week_last'] = nfl.apply(lambda row: row['schedule_week'] == (18 if row['schedule_season'] in [1993, 1999] else 17), axis=1)

# Add day of week and month
nfl['schedule_day'] = nfl['schedule_date'].dt.day_name()
nfl['schedule_month'] = nfl['schedule_date'].dt.month_name()
nfl['schedule_sunday'] = nfl['schedule_day'] == 'Sunday'

# Add divisional information
nfl['team_home_division'] = nfl['team_home'].apply(lambda x: teams.loc[teams['team_name'] == x, 'team_division'].values[0] if x in team_names else np.nan)
nfl['team_away_division'] = nfl['team_away'].apply(lambda x: teams.loc[teams['team_name'] == x, 'team_division'].values[0] if x in team_names else np.nan)

nfl['division_matchup'] = nfl.apply(lambda row: row['team_home_division'] == row['team_away_division'], axis=1)

# Spread and over/under analysis
nfl['team_home_favorite'] = nfl['team_favorite_id'] == nfl['team_home_id']
nfl['spread_home'] = np.where(nfl['team_home_favorite'], nfl['spread_favorite'], -nfl['spread_favorite'])
nfl['spread_away'] = -nfl['spread_home']

nfl['spread_type'] = nfl['spread_home'].apply(lambda x: 'Pick' if x == 0 else ('Home Underdog' if x > 0 else 'Home Favorite'))
nfl['spread_outlier'] = nfl['spread_favorite'].apply(lambda x: '2TD+' if abs(x) > 14.1 else ('1TD1FG+' if abs(x) > 10.1 else ('1TD+' if abs(x) > 7.1 else 'No Outlier')))

nfl['over_under_outlier'] = nfl['over_under_line'].apply(lambda x: 'Under 2sd' if x < 33 else ('Under 1sd' if x < 37 else ('Over 2sd' if x > 50 else ('Over 1sd' if x > 46 else 'No Outlier'))))

# Elo calculation
elo_ratings = {}
initial_elo = 1000

def update_elo(winner, loser, tie=False):
    if tie:
        return elo_system.rate_1vs1(elo_ratings[winner], elo_ratings[loser], drawn=True)
    else:
        return elo_system.rate_1vs1(elo_ratings[winner], elo_ratings[loser])

for index, row in nfl.iterrows():
    home_team = row['team_home_id']
    away_team = row['team_away_id']
    
    # Initialize elo if not already set
    if home_team not in elo_ratings:
        elo_ratings[home_team] = initial_elo
    if away_team not in elo_ratings:
        elo_ratings[away_team] = initial_elo
    
    if row['score_home'] > row['score_away']:
        winner, loser = home_team, away_team
    elif row['score_away'] > row['score_home']:
        winner, loser = away_team, home_team
    else:
        winner, loser = home_team, away_team
        tie = True
    
    elo_ratings[winner], elo_ratings[loser] = update_elo(winner, loser, tie=False)

nfl['team_home_elo_pre'] = nfl['team_home_id'].apply(lambda x: elo_ratings.get(x, initial_elo))
nfl['team_away_elo_pre'] = nfl['team_away_id'].apply(lambda x: elo_ratings.get(x, initial_elo))

# Calculate pre-game elo difference
nfl['elo_pre_difference'] = nfl['team_home_elo_pre'] - nfl['team_away_elo_pre']

# Probability of home team win
def win_probability(home_elo, away_elo):
    return 1 / (1 + 10 ** ((away_elo - home_elo + 55) / 400))

nfl['team_home_win_prob'] = nfl.apply(lambda row: win_probability(row['team_home_elo_pre'], row['team_away_elo_pre']), axis=1)
nfl['team_away_win_prob'] = 1 - nfl['team_home_win_prob']

# Game outcomes
nfl['team_home_result'] = nfl.apply(lambda row: 'Win' if row['score_home'] > row['score_away'] else ('Tie' if row['score_home'] == row['score_away'] else 'Loss'), axis=1)
nfl['team_away_result'] = nfl.apply(lambda row: 'Win' if row['score_away'] > row['score_home'] else ('Tie' if row['score_away'] == row['score_home'] else 'Loss'), axis=1)

# Calculate rolling averages and statistics using pandas
nfl = nfl.sort_values(['team_home_id', 'schedule_date'])

print(nfl.head())

nfl['win_pct'] = nfl.groupby('team_home_id')['team_home_result'].apply(lambda x: x.eq('Win').rolling(window=16, min_periods=1).mean())
nfl['cover_pct'] = nfl.groupby('team_home_id')['spread_home_cover_count'].rolling(window=16, min_periods=1).mean().reset_index(0, drop=True)
nfl['over_pct'] = nfl.groupby('team_home_id')['over_under_result_count'].rolling(window=16, min_periods=1).mean().reset_index(0, drop=True)

nfl['score_avg_pts_for'] = nfl.groupby('team_home_id')['score_home'].rolling(window=16, min_periods=1).mean().reset_index(0, drop=True)
nfl['score_avg_pts_against'] = nfl.groupby('team_home_id')['score_away'].rolling(window=16, min_periods=1).mean().reset_index(0, drop=True)

# Write to CSV
nfl.to_csv("nfl_calculated.csv", index=False)

# Create summary for teams
team_summary = nfl.groupby(['team_home_id', 'schedule_season']).agg({
    'team_home_result': ['sum'],
    'spread_home_cover_count': ['mean'],
    'over_under_result_count': ['mean'],
    'score_avg_pts_for': ['mean'],
    'score_avg_pts_against': ['mean']
}).reset_index()

team_summary.columns = ['Team', 'Season', 'Wins', 'Cover %', 'Over %', 'Off Pts/G', 'Def Pts/G']
team_summary.to_csv("team_summary.csv", index=False)

  nfl = pd.read_csv("spreadspoke_scores.csv", parse_dates=['schedule_date'], dayfirst=True)


     schedule_date  schedule_season schedule_week  schedule_playoff  \
2512    1979-02-09             1979             1             False   
2693    1979-02-12             1979            14             False   
2639    1979-04-11             1979            10             False   
2708    1979-09-12             1979            15             False   
2541    1979-09-16             1979             3             False   

                team_home  score_home  score_away            team_away  \
2512  St. Louis Cardinals        21.0        22.0       Dallas Cowboys   
2693  St. Louis Cardinals        13.0        10.0  San Francisco 49ers   
2639  St. Louis Cardinals        37.0         7.0    Minnesota Vikings   
2708  St. Louis Cardinals        29.0        20.0      New York Giants   
2541  St. Louis Cardinals        21.0        24.0  Pittsburgh Steelers   

     team_favorite_id  spread_favorite  ...    spread_type spread_outlier  \
2512              DAL             -4.0  ...  Home U

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  nfl['win_pct'] = nfl.groupby('team_home_id')['team_home_result'].apply(lambda x: x.eq('Win').rolling(window=16, min_periods=1).mean())


KeyError: 'Column not found: spread_home_cover_count'