In [2]:
headers = [
    'game_id', 'is_regular_season', 'is_playoffs', 'is_pre_season', 
    # match up averages from last 5 match ups
    'm_days_ago', 'm_games_ago', 'm_is_home_team', 'm_is_regular_season_matchup', 'm_is_playoffs_matchup', 'm_is_pre_season_matchup', 'm_wl', 'm_pts_for', 'm_fg_pct_for', 'm_fg3_pct_for', 'm_fg3m_for', 'm_ft_pct_for', 'm_ftm_for', 'm_reb_for', 'm_ast_for', 'm_stl_for', 'm_blk_for', 'm_tov_for', 'm_pts_against', 'm_fg_pct_against', 'm_fg3_pct_against', 'm_fg3m_against', 'm_ft_pct_against', 'm_ftm_against', 'm_reb_against', 'm_ast_against', 'm_stl_against', 'm_blk_against', 'm_tov_against', 
    # team averages from team's last 10 games
    't_days_ago', 't_games_ago', 't_is_home_team', 't_is_regular_season_matchup', 't_is_playoffs_matchup', 't_is_pre_season_matchup', 't_wl', 't_pts_for', 't_fg_pct_for', 't_fg3_pct_for', 't_fg3m_for', 't_ft_pct_for', 't_ftm_for', 't_reb_for', 't_ast_for', 't_stl_for', 't_blk_for', 't_tov_for', 't_pts_against', 't_fg_pct_against', 't_fg3_pct_against', 't_fg3m_against', 't_ft_pct_against', 't_ftm_against', 't_reb_against', 't_ast_against', 't_stl_against', 't_blk_against', 't_tov_against', 
    # opponent averages from opponent's last 10 games
    'o_days_ago', 'o_games_ago', 'o_is_home_team', 'o_is_regular_season_matchup', 'o_is_playoffs_matchup', 'o_is_pre_season_matchup', 'o_wl', 'o_pts_for', 'o_fg_pct_for', 'o_fg3_pct_for', 'o_fg3m_for', 'o_ft_pct_for', 'o_ftm_for', 'o_reb_for', 'o_ast_for', 'o_stl_for', 'o_blk_for', 'o_tov_for', 'o_pts_against', 'o_fg_pct_against', 'o_fg3_pct_against', 'o_fg3m_against', 'o_ft_pct_against', 'o_ftm_against', 'o_reb_against', 'o_ast_against', 'o_stl_against', 'o_blk_against', 'o_tov_against', 
    # target
    'wl_home'
]

features = [
    # pts_for
    'm_pts_for', 'm_pts_against', 't_pts_for', 't_pts_against', 'o_pts_for', 'o_pts_against',
    # wl
    'm_wl', 't_wl', 'o_wl',
]

file_path = 'fnn_data.csv'

In [4]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the data
file_path = 'fnn_data.csv'
data = pd.read_csv(file_path)

# Ensure game_date is in datetime format
data['game_date'] = pd.to_datetime(data['game_date'])

# Split the data into train and test based on game_date
train_data = data[(data['game_date'].dt.year >= 2001) & (data['game_date'].dt.year <= 2019)]
test_data = data[(data['game_date'].dt.year >= 2020) & (data['game_date'].dt.year <= 2023)]

# Extract features and target
X_train = train_data[features]
X_test = test_data[features]
y_train = train_data['wl_home']
y_test = test_data['wl_home']

# print first row and num of features
print(X_train.head(1))

# TODO create new features by combining existing features
# X_train['pts_diff'] = X_train['pts_for'] - X_train['pts_against']
# X_test['pts_diff'] = X_test['pts_for'] - X_test['pts_against']
# features.append('pts_diff')



# Normalize the feature data
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

# Convert back to DataFrame for convenience
X_train_normalized = pd.DataFrame(X_train_normalized, columns=features)
X_test_normalized = pd.DataFrame(X_test_normalized, columns=features)

# Verify results
print("Training data shape:", X_train_normalized.shape, y_train.shape)
print("Testing data shape:", X_test_normalized.shape, y_test.shape)


   m_pts_for  m_pts_against  t_pts_for  t_pts_against  o_pts_for   
0      104.4           88.6       93.4           98.6       98.9  \

   o_pts_against  m_wl  t_wl  o_wl  
0           92.8   1.0   0.4   0.9  
Training data shape: (24188, 9) (24188,)
Testing data shape: (4615, 9) (4615,)


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_normalized.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)  # Reshape for single output
X_test_tensor = torch.tensor(X_test_normalized.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Define a PyTorch dataset and dataloader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_size = X_train_normalized.shape[1]

# Function to train and evaluate a model
def train_and_evaluate_model(model, criterion, optimizer, num_epochs=20):
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            train_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss /= len(train_loader)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {train_loss:.4f}")

    # Evaluate the model
    model.eval()
    with torch.no_grad():
        y_pred = []
        y_true = []
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            y_pred.extend(outputs.numpy())
            y_true.extend(y_batch.numpy())
        
        # Convert predictions to binary outcomes
        y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
    
    # Calculate metrics (e.g., accuracy)
    accuracy = accuracy_score(y_true, y_pred_binary)
    print(f"Test Accuracy: {accuracy:.2%}")
    return accuracy

# Define multiple models to test
class Model1(nn.Module):
    def __init__(self, input_size):
        super(Model1, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 6),
            nn.ReLU(),
            nn.Linear(6, 4),
            nn.ReLU(),
            nn.Linear(4, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

class Model2(nn.Module):
    def __init__(self, input_size):
        super(Model2, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

class Model3(nn.Module):
    def __init__(self, input_size):
        super(Model3, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)
    
class Model4(nn.Module):
    def __init__(self, input_size):
        super(Model4, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

class Model5(nn.Module):
    def __init__(self, input_size):
        super(Model5, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

class Model6(nn.Module):
    def __init__(self, input_size):
        super(Model6, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)


# Add additional models as needed...
models = [
    ("Model1", lambda: Model1(input_size)),
    ("Model2", lambda: Model2(input_size)),
    ("Model3", lambda: Model3(input_size)),
    ("Model4", lambda: Model4(input_size)),
    ("Model5", lambda: Model5(input_size)),
    ("Model6", lambda: Model6(input_size)),
]

# Evaluate all models
results = {}
for model_name, model_fn in models:
    print(f"Training {model_name}")
    model = model_fn()
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    accuracy = train_and_evaluate_model(model, criterion, optimizer)
    results[model_name] = accuracy

# Print results
print("\nModel Evaluation Results:")
for model_name, accuracy in results.items():
    print(f"{model_name}: {accuracy:.2%}")


Training Model1
Epoch 1/20, Loss: 0.6654
Epoch 2/20, Loss: 0.6463
Epoch 3/20, Loss: 0.6421
Epoch 4/20, Loss: 0.6411
Epoch 5/20, Loss: 0.6404
Epoch 6/20, Loss: 0.6401
Epoch 7/20, Loss: 0.6397
Epoch 8/20, Loss: 0.6395
Epoch 9/20, Loss: 0.6395
Epoch 10/20, Loss: 0.6391
Epoch 11/20, Loss: 0.6391
Epoch 12/20, Loss: 0.6390
Epoch 13/20, Loss: 0.6390
Epoch 14/20, Loss: 0.6386
Epoch 15/20, Loss: 0.6385
Epoch 16/20, Loss: 0.6384
Epoch 17/20, Loss: 0.6385
Epoch 18/20, Loss: 0.6382
Epoch 19/20, Loss: 0.6382
Epoch 20/20, Loss: 0.6380
Test Accuracy: 60.54%
Training Model2
Epoch 1/20, Loss: 0.6466
Epoch 2/20, Loss: 0.6410
Epoch 3/20, Loss: 0.6395
Epoch 4/20, Loss: 0.6397
Epoch 5/20, Loss: 0.6391
Epoch 6/20, Loss: 0.6388
Epoch 7/20, Loss: 0.6386
Epoch 8/20, Loss: 0.6380
Epoch 9/20, Loss: 0.6379
Epoch 10/20, Loss: 0.6375
Epoch 11/20, Loss: 0.6376
Epoch 12/20, Loss: 0.6376
Epoch 13/20, Loss: 0.6369
Epoch 14/20, Loss: 0.6372
Epoch 15/20, Loss: 0.6363
Epoch 16/20, Loss: 0.6368
Epoch 17/20, Loss: 0.6362
Ep