This is the script that will actually query data and read in the machine learning model to output predictions. The weekly_run script needs to be run before to pull the most recent boxscores.

In [1]:
import os
from datetime import datetime, timedelta
import time
import pandas as pd
import psycopg2
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Set current date and determine how many days to filter the run
season = 2023
filter_days = 200

current_date = datetime.today().strftime("%Y-%m-%d")
start_date = (datetime.today() - timedelta(days=filter_days)).strftime("%Y-%m-%d")

In [3]:
current_date = '2023-12-16'

In [4]:
# Specify the number of games to average
num_games_average = 3

In [5]:
try:
    # Establish a connection to the PostgreSQL database
    connection = psycopg2.connect(
        host = "localhost",
        dbname = "nfl",
        user = "postgres",
        password = "Plenoir2002!", # Include correct password
        port = 5432
    )
    print("Connected to PostgreSQL database.")
except (Exception, psycopg2.Error) as error:
    print("Error connecting to PostgreSQL database: ", error)

# Create a cursor object to interact with the database
cursor = connection.cursor()

Connected to PostgreSQL database.


In [6]:
# Query to fetch column names from the nfl_data table
query = """
    SELECT column_name 
    FROM information_schema.columns 
    WHERE table_name = 'nfl_data'
    ORDER BY ordinal_position;
"""

cursor.execute(query)

column_names = [column[0] for column in cursor.fetchall()]

# Initialize an empty Pandas DataFrame with the fetched column names
nfl_df = pd.DataFrame(columns=column_names)

In [7]:
# Execute your query to fetch data from the nfl_data table for the specified date range
game_query = f"""
    SELECT gameid, tm, home, won, season, date, tm_opp
    FROM nfl_data
    WHERE Date BETWEEN '{start_date}' AND '{current_date}';
"""

cursor.execute(game_query)

rows = cursor.fetchall()

column_names = [desc[0] for desc in cursor.description]

# Convert the fetched data to a Pandas DataFrame
df_filtered = pd.DataFrame(rows, columns=column_names)

# Append the filtered data to the existing nfl_df DataFrame
nfl_df = pd.concat([nfl_df, df_filtered], ignore_index=True)

In [8]:
offensive_stats = nfl_df.loc[:, 'frdwns':'fga']
defensive_stats = nfl_df.loc[:, 'score_opp':'fga_opp']

# Create empty pandas DF
col_names = list(offensive_stats.columns) + list(defensive_stats.columns)
testing_df = pd.DataFrame(columns=col_names)

In [9]:
# Function to calculate the rolling average for a team
def calculate_rolling_average(offensive_team, defensive_opponent, current_date, num_games_avg):
    # SQL query to fetch the previous games for the offensive team
    offense_query = f"""
    SELECT {', '.join(offensive_stats)}
    FROM {offensive_team}
    WHERE date < %s
    ORDER BY date DESC
    LIMIT %s;
    """
    
    cursor.execute(offense_query, (current_date, num_games_avg))
    offense_games = cursor.fetchall()

    # SQL query to fetch the previous games for the defensive opponent
    defense_query = f"""
    SELECT {', '.join(defensive_stats)}
    FROM {defensive_opponent}
    WHERE date < %s
    ORDER BY date DESC
    LIMIT %s;
    """
    cursor.execute(defense_query, (current_date, num_games_avg))
    defense_games = cursor.fetchall()

    # Calculate the average for each defensive stat and convert to tuple
    avg_offense_game = tuple(round(sum(col) / len(col), 2) for col in zip(*offense_games))
    avg_defense_game = tuple(round(sum(col) / len(col), 2) for col in zip(*defense_games))

    # Concatenate the offensive and defensive values from the previous games (rounded to 2 decimal pts)
    game_avgs = avg_offense_game + avg_defense_game

    return game_avgs

In [10]:
# Iterate through your data and populate the new DataFrame from the start date
for index, row in nfl_df.iterrows():
    team = row['tm']
    opponent = row['tm_opp']
    current_date = row['date']
    
    avg_values = calculate_rolling_average(team, opponent, current_date, num_games_average)
    testing_df.loc[index] = avg_values

In [11]:
# Your SQL query
score_query = "SELECT gameid, score FROM nfl_data WHERE gameid IN %s;"

# Loop through each row in the DataFrame
for index, row in nfl_df.iterrows():
    gameid = row['gameid']

    # Execute the SQL query for the current gameid
    with connection.cursor() as cursor:
        cursor.execute(score_query, ((gameid,),))
        result = cursor.fetchone()

        if result:
            # Update the 'score' column in the DataFrame
            nfl_df.at[index, 'score'] = result[1]  # Assuming 'score' is the second column in the query result


In [12]:
# Close the cursor and connection
cursor.close()
connection.close()

In [13]:
nfl_df = nfl_df[['gameid', 'tm', 'home', 'won', 'season', 'date', 'tm_opp', 'score']]
nfl_df = pd.concat([nfl_df, testing_df], axis=1)

In [14]:
# Extract the numerical columns for normalization
numerical_columns = nfl_df.columns.difference(['gameid', 'tm', 'tm_opp', 'date', 'home', 'won', 'season', 'score'])

# Normalize the numerical values using Min-Max scaling
scaler = MinMaxScaler()
nfl_df[numerical_columns] = scaler.fit_transform(nfl_df[numerical_columns])

In [15]:
# Define your custom neural network class
class NFLPredictor(nn.Module):
    def __init__(self, input_size):
        super(NFLPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(16, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [17]:
# Load the pre-trained PyTorch model
model_path = 'NFL_model.pth'
model = NFLPredictor(input_size=60)  # Replace with the actual input size
model.load_state_dict(torch.load(model_path))
model.eval()

X = nfl_df.drop(['gameid', 'tm', 'tm_opp', 'won', 'season', 'score', 'date'], axis=1).values
X_tensor = torch.FloatTensor(X)

# Make predictions
with torch.no_grad():
    predictions = model(X_tensor).numpy()
    

# Add predictions to the DataFrame
nfl_df['predictions'] = predictions

In [18]:
from sklearn.metrics import mean_absolute_error

# Assuming 'score' and 'predictions' are the column names
mae = mean_absolute_error(nfl_df['score'], nfl_df['predictions'])
print(f'Mean Absolute Error: {mae}')

Mean Absolute Error: 8.934075361611892


In [19]:
check = nfl_df[['score', 'predictions']]
print(check)

    score  predictions
0    21.0    32.080547
1    20.0    32.469166
2    34.0    27.562565
3     0.0    25.015821
4    16.0    29.489244
..    ...          ...
779  13.0    25.570448
780  30.0    16.318514
781  17.0    26.571470
782  27.0    26.390909
783  24.0    26.872770

[784 rows x 2 columns]
