<a href="https://colab.research.google.com/github/natelove02/NFL-Model/blob/main/NFL_Betting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from datetime import datetime

def convert_time_to_seconds(time_str):
    """Convert MM:SS format to seconds"""
    try:
        minutes, seconds = map(int, time_str.split(':'))
        return minutes * 60 + seconds
    except:
        return 0

def convert_game_time_to_hours(time_str):
    """Convert game time to 24-hour format decimal"""
    try:
        time_str = time_str.strip().upper()
        time = pd.to_datetime(time_str, format='%I:%M %p').time()
        return time.hour + time.minute/60.0
    except:
        return 0

class TeamStatsProcessor:
    def __init__(self, csv_path, years_of_data=3):
        self.data = pd.read_csv(csv_path)

        # Convert date and time columns
        self.data['date'] = pd.to_datetime(self.data['date'])
        self.data['game_hour'] = self.data['time_et'].apply(convert_game_time_to_hours)

        # Create time brackets
        self.data['time_bracket'] = pd.cut(
            self.data['game_hour'],
            bins=[0, 14, 17, 20, 24],
            labels=['morning', 'afternoon', 'evening', 'night']
        )

        # Filter for last N years if specified
        if years_of_data:
            latest_date = self.data['date'].max()
            cutoff_date = latest_date - pd.DateOffset(years=years_of_data)
            self.data = self.data[self.data['date'] >= cutoff_date]
            print(f"Using data from {cutoff_date.strftime('%Y-%m-%d')} to {latest_date.strftime('%Y-%m-%d')}")

        print(f"Total games in dataset: {len(self.data)}")
        self.team_stats = {}
        self.calculate_team_stats()

    def calculate_team_stats(self):
        """Calculate average statistics for each team from historical data"""
        # Convert possession time to seconds
        self.data['possession_away'] = self.data['possession_away'].apply(convert_time_to_seconds)
        self.data['possession_home'] = self.data['possession_home'].apply(convert_time_to_seconds)

        offensive_stats = [
            'yards', 'pass_yards', 'rush_yards', 'first_downs',
            'third_down_comp', 'third_down_att', 'possession'
        ]

        teams = pd.concat([self.data['away'], self.data['home']]).unique()
        print(f"\nTeams in dataset: {len(teams)}")

        for team in teams:
            self.team_stats[team] = {}

            away_games = self.data[self.data['away'] == team]
            home_games = self.data[self.data['home'] == team]

            # Calculate time-based performance
            for time_bracket in ['morning', 'afternoon', 'evening', 'night']:
                away_time_games = away_games[away_games['time_bracket'] == time_bracket]
                home_time_games = home_games[home_games['time_bracket'] == time_bracket]

                total_time_games = len(away_time_games) + len(home_time_games)
                if total_time_games > 0:
                    away_time_wins = away_time_games[away_time_games['score_away'] > away_time_games['score_home']].shape[0]
                    home_time_wins = home_time_games[home_time_games['score_home'] > home_time_games['score_away']].shape[0]
                    self.team_stats[team][f'{time_bracket}_win_pct'] = (away_time_wins + home_time_wins) / total_time_games
                else:
                    self.team_stats[team][f'{time_bracket}_win_pct'] = 0.5

            # Offensive stats
            for stat in offensive_stats:
                away_val = away_games[f'{stat}_away'].mean()
                home_val = home_games[f'{stat}_home'].mean()
                self.team_stats[team][stat] = np.mean([away_val, home_val])

            # Defensive stats
            for stat in ['yards', 'pass_yards', 'rush_yards', 'first_downs']:
                away_def = away_games[f'{stat}_home'].mean()
                home_def = home_games[f'{stat}_away'].mean()
                self.team_stats[team][f'{stat}_allowed'] = np.mean([away_def, home_def])

            # Calculate winning percentage
            away_wins = away_games[away_games['score_away'] > away_games['score_home']].shape[0]
            home_wins = home_games[home_games['score_home'] > home_games['score_away']].shape[0]
            total_games = away_games.shape[0] + home_games.shape[0]

            self.team_stats[team]['win_pct'] = (away_wins + home_wins) / total_games if total_games > 0 else 0.5

    def get_game_features(self, away_team, home_team, game_time):
        """Get features for a specific matchup including game time"""
        if away_team not in self.team_stats or home_team not in self.team_stats:
            raise ValueError("One or both teams not found in historical data")

        # Convert game time to decimal hours and determine time bracket
        game_hour = convert_game_time_to_hours(game_time)
        if game_hour < 14:
            time_bracket = 'morning'
        elif game_hour < 17:
            time_bracket = 'afternoon'
        elif game_hour < 20:
            time_bracket = 'evening'
        else:
            time_bracket = 'night'

        away_stats = self.team_stats[away_team]
        home_stats = self.team_stats[home_team]

        feature_dict = {
            'game_hour': game_hour,
            'away_time_win_pct': away_stats[f'{time_bracket}_win_pct'],
            'home_time_win_pct': home_stats[f'{time_bracket}_win_pct'],
            'away_yards_per_game': away_stats['yards'],
            'home_yards_per_game': home_stats['yards'],
            'away_pass_yards_per_game': away_stats['pass_yards'],
            'home_pass_yards_per_game': home_stats['pass_yards'],
            'away_rush_yards_per_game': away_stats['rush_yards'],
            'home_rush_yards_per_game': home_stats['rush_yards'],
            'away_def_yards_per_game': away_stats['yards_allowed'],
            'home_def_yards_per_game': home_stats['yards_allowed'],
            'away_win_pct': away_stats['win_pct'],
            'home_win_pct': home_stats['win_pct'],
            'away_third_down_pct': away_stats['third_down_comp'] / away_stats['third_down_att'] if away_stats['third_down_att'] > 0 else 0,
            'home_third_down_pct': home_stats['third_down_comp'] / home_stats['third_down_att'] if home_stats['third_down_att'] > 0 else 0,
            'away_possession': away_stats['possession'] / 60,
            'home_possession': home_stats['possession'] / 60,
        }

        return pd.Series(feature_dict)

class NFLPredictor(nn.Module):
    def __init__(self, input_size):
        super(NFLPredictor, self).__init__()
        self.layer1 = nn.Linear(input_size, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.dropout(self.relu(self.layer1(x)))
        x = self.dropout(self.relu(self.layer2(x)))
        x = self.sigmoid(self.layer3(x))
        return x

def prepare_training_data(stats_processor):
    """Prepare training data from all historical matchups"""
    all_features = []
    all_labels = []

    for _, game in stats_processor.data.iterrows():
        try:
            features = stats_processor.get_game_features(game['away'], game['home'], game['time_et'])
            label = 1 if game['score_away'] > game['score_home'] else 0
            all_features.append(features)
            all_labels.append(label)
        except ValueError:
            continue

    X = pd.DataFrame(all_features)
    y = np.array(all_labels)

    return X.values, y

def train_model(X, y, epochs=1000, learning_rate=0.001):
    X = torch.FloatTensor(X)
    y = torch.FloatTensor(y).reshape(-1, 1)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train = torch.FloatTensor(scaler.fit_transform(X_train))
    X_test = torch.FloatTensor(scaler.transform(X_test))

    model = NFLPredictor(X_train.shape[1])
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        model.train()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test)
        predictions = (test_outputs >= 0.5).float()
        accuracy = (predictions == y_test).float().mean()
        print(f'Test Accuracy: {accuracy.item():.4f}')

    return model, scaler

def predict_game(model, scaler, stats_processor, away_team, home_team, game_time):
    features = stats_processor.get_game_features(away_team, home_team, game_time)
    input_tensor = torch.FloatTensor(scaler.transform([features]))

    model.eval()
    with torch.no_grad():
        prediction = model(input_tensor)
        probability = prediction.item()

    return {
        'winner': away_team if probability >= 0.5 else home_team,
        'win_probability': probability if probability >= 0.5 else 1 - probability,
        'away_win_probability': probability,
        'home_win_probability': 1 - probability
    }

# Example usage
if __name__ == "__main__":
    # Initialize
    csv_path = "NFLstats.csv"
    stats_processor = TeamStatsProcessor(csv_path, years_of_data=1)

    # Prepare and train model
    X, y = prepare_training_data(stats_processor)
    model, scaler = train_model(X, y, epochs=500)

    # Interactive prediction loop
    while True:
        print("\nEnter team names and game time (or 'quit' to exit):")
        away_team = input("Away team: ")
        if away_team.lower() == 'quit':
            break

        home_team = input("Home team: ")
        if home_team.lower() == 'quit':
            break

        game_time = input("Game time (e.g., '1:00 PM', '4:25 PM', '8:20 PM'): ")
        if game_time.lower() == 'quit':
            break

        try:
            prediction = predict_game(model, scaler, stats_processor, away_team, home_team, game_time)
            print(f"\nPrediction for {away_team} @ {home_team} ({game_time} ET):")
            print(f"Predicted Winner: {prediction['winner']}")
            print(f"Win Probability: {prediction['win_probability']:.2%}")
            print(f"\nDetailed Probabilities:")
            print(f"{away_team} Win Probability: {prediction['away_win_probability']:.2%}")
            print(f"{home_team} Win Probability: {prediction['home_win_probability']:.2%}")

            # Print team stats
            away_stats = stats_processor.team_stats[away_team]
            home_stats = stats_processor.team_stats[home_team]

            print(f"\nTeam Stats (Last 3 Years):")
            print(f"{away_team}:")
            print(f"Win %: {away_stats['win_pct']:.1%}")
            print(f"Yards Per Game: {away_stats['yards']:.1f}")

            print(f"\n{home_team}:")
            print(f"Win %: {home_stats['win_pct']:.1%}")
            print(f"Yards Per Game: {home_stats['yards']:.1f}")

        except ValueError as e:
            print(f"Error: {e}")

Using data from 2023-02-11 to 2024-02-11
Total games in dataset: 286

Teams in dataset: 32
Epoch [10/500], Loss: 0.6583
Epoch [20/500], Loss: 0.6243
Epoch [30/500], Loss: 0.5859
Epoch [40/500], Loss: 0.5368
Epoch [50/500], Loss: 0.4969
Epoch [60/500], Loss: 0.4687
Epoch [70/500], Loss: 0.4394
Epoch [80/500], Loss: 0.4122
Epoch [90/500], Loss: 0.4149
Epoch [100/500], Loss: 0.3732
Epoch [110/500], Loss: 0.3479
Epoch [120/500], Loss: 0.3633
Epoch [130/500], Loss: 0.3585
Epoch [140/500], Loss: 0.3292
Epoch [150/500], Loss: 0.3114
Epoch [160/500], Loss: 0.3089
Epoch [170/500], Loss: 0.2834
Epoch [180/500], Loss: 0.2834
Epoch [190/500], Loss: 0.2721
Epoch [200/500], Loss: 0.2688
Epoch [210/500], Loss: 0.2468
Epoch [220/500], Loss: 0.2342
Epoch [230/500], Loss: 0.2145
Epoch [240/500], Loss: 0.2195
Epoch [250/500], Loss: 0.1915
Epoch [260/500], Loss: 0.2262
Epoch [270/500], Loss: 0.1964
Epoch [280/500], Loss: 0.2042
Epoch [290/500], Loss: 0.1890
Epoch [300/500], Loss: 0.1796
Epoch [310/500], L

KeyboardInterrupt: Interrupted by user