In [21]:
# %pip install torch
%pip install torchvision


Collecting torchvision
  Downloading torchvision-0.15.1-cp39-cp39-macosx_11_0_arm64.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting requests (from torchvision)
  Using cached requests-2.28.2-py3-none-any.whl (62 kB)
Collecting charset-normalizer<4,>=2 (from requests->torchvision)
  Downloading charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl (122 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.0/123.0 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting idna<4,>=2.5 (from requests->torchvision)
  Using cached idna-3.4-py3-none-any.whl (61 kB)
Collecting urllib3<1.27,>=1.21.1 (from requests->torchvision)
  Using cached urllib3-1.26.15-py2.py3-none-any.whl (140 kB)
Collecting certifi>=2017.4.17 (from requests->torchvision)
  Downloading certifi-2022.12.7-py3-none-any.whl (155 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import *
from sklearn.tree import *
from sklearn.ensemble import *
from sklearn.svm import *
from sklearn.linear_model import *
from sklearn.model_selection import *
from sklearn.metrics import *
import pickle
import torchvision
import gc
from tqdm import tqdm
from IPython.display import display_html
from typing import Iterable
import warnings
from sklearn.exceptions import * 

warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [5]:
data_prior = pd.read_csv('./data/00-22/all_seasons_cumu_net_total_prior.csv').drop(columns='Unnamed: 0')
data_true = pd.read_csv('./data/00-22/all_seasons_gamelog_facts.csv').drop(columns='Unnamed: 0')
index = data_prior[data_prior['game_season'] > 10].dropna().sample(10000,random_state=1).index
columns = data_prior.columns[data_prior.columns.str.endswith('net')]

X = data_prior.loc[index,columns]
y = data_true.loc[index,'game_result']
X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=data_prior.loc[index,'month'],random_state=1)

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.base import BaseEstimator, ClassifierMixin

class NeuralNetwork(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.001, batch_size=32, dropout=0.5, num_epochs=100, scaler = StandardScaler()):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.dropout = dropout
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid()
        ).to(self.device)
        self.criterion = nn.BCELoss()  # Binary cross-entropy loss
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.scaler = scaler


    def fit(self, X, y):
        X_tensor = torch.tensor(self.scaler.fit_transform(X), dtype=torch.float32).to(self.device)
        # X_tensor = torchvision.transforms.Nor
        y_tensor = torch.tensor(np.array(y), dtype=torch.float32).to(self.device)
        num_samples = X.shape[0]
        num_batches = (num_samples + self.batch_size - 1) // self.batch_size

        epoch_range = tqdm(range(self.num_epochs))
        for epoch in epoch_range:
            running_loss = 0.0
            corr_preds = 0
            for batch in range(num_batches):
                start_idx = batch * self.batch_size
                end_idx = min(start_idx + self.batch_size, num_samples)
                batch_X = X_tensor[start_idx:end_idx,:]
                batch_y = y_tensor[start_idx:end_idx]

                self.optimizer.zero_grad()

                # Forward pass
                outputs = self.model(batch_X)
                loss = self.criterion(outputs.squeeze(), batch_y)

                # Backward pass and optimization
                loss.backward()
                self.optimizer.step()

                running_loss += loss.item()
                corr_preds += sum(torch.round(outputs).cpu().squeeze().detach().numpy() == batch_y.numpy())

            # Print the loss for this epoch
            epoch_range.set_description('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}'.format(epoch+1, self.num_epochs, running_loss/num_batches, corr_preds/len(y_tensor)))

    def predict(self, X):
        X_tensor = torch.tensor(self.scaler.fit_transform(X), dtype=torch.float32).to(self.device)
        with torch.no_grad():
            outputs = self.model(X_tensor)
            y_pred = torch.round(outputs).cpu().numpy()
        return y_pred

    def predict_proba(self, X):
        X_tensor = torch.tensor(self.scaler.fit_transform(X), dtype=torch.float32).to(self.device)
        with torch.no_grad():
            outputs = self.model(X_tensor)
            proba = outputs.cpu().numpy()
        return proba


In [25]:
model = NeuralNetwork(input_dim=X.values.shape[1], hidden_dim=128, output_dim=1, 
                       learning_rate = 0.001, dropout=0.3, num_epochs=500)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print(confusion_matrix(y_test,y_pred))

Epoch [500/500], Train Loss: 0.5064, Train Acc: 0.7552: 100%|██████████| 500/500 [01:31<00:00,  5.45it/s]

[[701 558]
 [420 821]]





In [26]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))

[[701 558]
 [420 821]]
0.6088


In [27]:
y_proba = model.predict_proba(X_test)
y_proba

array([[0.7710141 ],
       [0.7144489 ],
       [0.65372354],
       ...,
       [0.47556797],
       [0.6358679 ],
       [0.05204496]], dtype=float32)

In [None]:
confident_preds = (y_proba > 0.7) | (y_proba <  0.3)
print(confusion_matrix(y_test[confident_preds],y_pred[confident_preds]))
print(accuracy_score(y_test[confident_preds],y_pred[confident_preds]))


In [None]:
confident_preds = y_proba.max(1) <= 0.7
print(confusion_matrix(y_test[confident_preds],y_pred[confident_preds]))
print(accuracy_score(y_test[confident_preds],y_pred[confident_preds]))

[[90 71]
 [71 74]]
0.5359477124183006


In [None]:
y_proba.max(1)

array([0.9999622 , 0.9999974 , 0.9978237 , ..., 0.99999714, 0.9999876 ,
       1.        ], dtype=float32)

Unnamed: 0,game_result_net,pts_net,opp_pts_net,fg_net,fga_net,fg_pct_net,fg3_net,fg3a_net,fg3_pct_net,ft_net,...,efg_pct_net,tov_pct_net,orb_pct_net,ft_rate_net,opp_efg_pct_net,opp_tov_pct_net,drb_pct_net,opp_ft_rate_net,net_pts_net,net_rtg_net
11285,0.030769,3.661538,1.492308,1.738462,3.307692,0.004123,2.753846,6.169231,0.019231,-2.569231,...,0.018646,-2.349231,-4.401538,-0.040400,0.011338,-0.470769,1.756923,-0.063062,2.169231,2.563077
40130,-0.039216,3.705882,4.921569,0.529412,1.705882,-0.003451,1.745098,2.352941,0.070980,0.901961,...,0.005784,-0.700000,3.254902,0.006000,0.013510,-1.149020,-3.313725,0.022000,-1.215686,-1.354902
42805,-0.112072,-2.234775,2.155315,-0.418018,1.288108,-0.011710,1.046306,1.471171,0.026527,-2.445045,...,-0.006108,1.320919,1.519063,-0.035483,0.012416,-0.039874,1.023297,0.020154,-4.390090,-4.592865
12090,0.479640,6.736269,-5.585701,1.241004,0.996686,0.010694,0.750000,1.802083,0.015102,3.504261,...,0.015204,-1.881250,4.502036,0.039678,-0.035384,-1.673106,2.087169,-0.002315,12.321970,13.546780
54427,0.131670,7.348579,3.966736,1.765766,3.580042,-0.000622,2.115038,3.637561,0.054407,1.702010,...,0.010295,-2.682536,-5.212405,0.012788,0.013784,0.994387,-4.293001,-0.010658,3.381843,3.266528
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51460,0.343750,8.812500,-3.500000,2.531250,4.312500,0.007094,4.000000,5.968750,0.040344,-0.250000,...,0.026250,-1.668750,7.309375,-0.014156,-0.034562,-2.825000,0.968750,-0.023031,12.312500,12.490625
40738,-0.240260,-5.465368,0.138528,-5.071429,-5.792208,-0.031455,3.965368,10.917749,0.029002,0.712121,...,-0.004855,2.368398,-4.180519,0.023253,-0.030019,-0.857792,-3.537662,0.056831,-5.603896,-6.050000
39254,0.009519,2.409572,3.093337,-0.910894,1.000000,-0.019380,4.664463,11.771285,0.019032,-0.433104,...,0.009855,-0.190349,6.137758,-0.010226,0.033569,-1.271840,3.124564,-0.032308,-0.683765,-0.836753
34428,-0.172414,-16.051724,-7.982759,-7.672414,-9.965517,-0.037431,-6.068966,-14.448276,-0.036638,5.362069,...,-0.069259,0.486207,4.305172,0.092483,0.012155,1.375862,0.700000,0.046845,-8.068966,-8.594828
