In [23]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


### Train/Test Split

In [74]:
away_last10 = np.load("data/data_np/away_last10_X.npy")
home_last10 = np.load("data/data_np/home_last10_X.npy")
matchups_last3 = np.load("data/data_np/matchups_last3_X.npy")
seasonal = np.load("data/data_np/seasonal_stats.npy")
labels = np.load("data/data_np/last10_labels.npy")

In [75]:
away_last10.shape

(67131, 10, 117)

In [76]:
split = int(0.8 * away_last10.shape[0])

away_last10_train = away_last10[:split]
away_last10_test = away_last10[split:]
home_last10_train = home_last10[:split]
home_last10_test = home_last10[split:]
matchups_train = matchups_last3[:split]
matchups_test = matchups_last3[split:]
seasonal_train = seasonal[:split]
seasonal_test = seasonal[split:]

y_train = labels[:split]
y_test = labels[split:]

In [77]:
# Apply StandardScaler

# split arrays into categorical/continuous features
cat_ind = [1, 3, 4, 61, 62]
away_l10_train_cat = away_last10_train[:, :, cat_ind]
away_l10_train_con = np.delete(away_last10_train, cat_ind, axis=2)

away_l10_test_cat = away_last10_test[:, :, cat_ind]
away_l10_test_con = np.delete(away_last10_test, cat_ind, axis=2)

home_l10_train_cat = home_last10_train[:, :, cat_ind]
home_l10_train_con = np.delete(home_last10_train, cat_ind, axis=2)

home_l10_test_cat = home_last10_test[:, :, cat_ind]
home_l10_test_con = np.delete(home_last10_test, cat_ind, axis=2)

match_train_cat = matchups_train[:, :, cat_ind]
match_train_con = np.delete(matchups_train, cat_ind, axis=2)

match_test_cat = matchups_test[:, :, cat_ind]
match_test_con = np.delete(matchups_test, cat_ind, axis=2)

cat_ind = [0, 1, 2]
season_train_cat = seasonal_train[:, cat_ind]
season_train_con = np.delete(seasonal_train, cat_ind, axis=1)

season_test_cat = seasonal_test[:, cat_ind]
season_test_con = np.delete(seasonal_test, cat_ind, axis=1)

# Flatten arrays, fit/apply standard scalar to continuous features
n1 = away_l10_train_con.shape[0]
s1 = away_l10_train_con.shape[1]
d1 = away_l10_train_con.shape[2]

n2 = away_l10_test_con.shape[0]
s2 = away_l10_test_con.shape[1]
d2 = away_l10_test_con.shape[2]

away10_scaler = StandardScaler()

away_l10_train_con = away_l10_train_con.reshape((n1 * s1, d1))
away_l10_train_con = away10_scaler.fit_transform(away_l10_train_con)
away_l10_train_con = away_l10_train_con.reshape((n1, s1, d1))

away_l10_test_con = away_l10_test_con.reshape((n2 * s2, d2))
away_l10_test_con = away10_scaler.transform(away_l10_test_con)
away_l10_test_con = away_l10_test_con.reshape((n2, s2, d2))

home10_scaler = StandardScaler()

home_l10_train_con = home_l10_train_con.reshape((n1 * s1, d1))
home_l10_train_con = home10_scaler.fit_transform(home_l10_train_con)
home_l10_train_con = home_l10_train_con.reshape((n1, s1, d1))

home_l10_test_con = home_l10_test_con.reshape((n2 * s2, d2))
home_l10_test_con = home10_scaler.transform(home_l10_test_con)
home_l10_test_con = home_l10_test_con.reshape((n2, s2, d2))

n1 = match_train_con.shape[0]
s1 = match_train_con.shape[1]
d1 = match_train_con.shape[2]

n2 = match_test_con.shape[0]
s2 = match_test_con.shape[1]
d2 = match_test_con.shape[2]

matchup_scaler = StandardScaler()

match_train_con = match_train_con.reshape((n1 * s1, d1))
match_train_con = matchup_scaler.fit_transform(match_train_con)
match_train_con = match_train_con.reshape((n1, s1, d1))

match_test_con = match_test_con.reshape((n2 * s2, d2))
match_test_con = matchup_scaler.fit_transform(match_test_con)
match_test_con = match_test_con.reshape((n2, s2, d2))

season_scaler = StandardScaler()

season_train_con = season_scaler.fit_transform(season_train_con)
season_test_con = season_scaler.transform(season_test_con)

# Re-concatenate the categorical/continuous features
away_last10_train = np.concatenate((away_l10_train_con, away_l10_train_cat), axis=2)
away_last10_test = np.concatenate((away_l10_test_con, away_l10_test_cat), axis=2)
home_last10_train = np.concatenate((home_l10_train_con, home_l10_train_cat), axis=2)
home_last10_test = np.concatenate((away_l10_test_con, away_l10_test_cat), axis=2)

matchups_train = np.concatenate((match_train_con, match_train_cat), axis=2)
matchups_test = np.concatenate((match_test_con, match_test_cat), axis=2)

seasonal_train = np.concatenate((season_train_con, season_train_cat), axis=1)
seasonal_test = np.concatenate((season_test_con, season_test_cat), axis=1)

In [78]:
# Split Train/val
val_split = int(0.8 * away_last10_train.shape[0])

away_last10_val = away_last10_train[val_split:]
away_last10_train = away_last10_train[:val_split]

home_last10_val = home_last10_train[val_split:]
home_last10_train = home_last10_train[:val_split]

matchups_val = matchups_train[val_split:]
matchups_train = matchups_train[:val_split]

seasonal_val = seasonal_train[val_split:]
seasonal_train = seasonal_train[:val_split]

y_val = y_train[val_split:]
y_train = y_train[:val_split]

In [79]:
print("train_shape:", away_last10_train.shape, "test_shape", away_last10_test.shape)

train_shape: (42963, 10, 117) test_shape (13427, 10, 117)


In [80]:
# Save Datasets
np.save("data/data_np/away_last10_train.npy", away_last10_train)
np.save("data/data_np/away_last10_val.npy", away_last10_val)
np.save("data/data_np/away_last10_test.npy", away_last10_test)

np.save("data/data_np/home_last10_train.npy", home_last10_train)
np.save("data/data_np/home_last10_val.npy", home_last10_val)
np.save("data/data_np/home_last10_test.npy", home_last10_test)

np.save("data/data_np/matchups_train.npy", matchups_train)
np.save("data/data_np/matchups_val.npy", matchups_val)
np.save("data/data_np/matchups_test.npy", matchups_test)

np.save("data/data_np/seasonal_train.npy", seasonal_train)
np.save("data/data_np/seasonal_val.npy", seasonal_val)
np.save("data/data_np/seasonal_test.npy", seasonal_test)

np.save("data/data_np/y_train.npy", y_train)
np.save("data/data_np/y_val.npy", y_val)
np.save("data/data_np/y_test.npy", y_test)

In [59]:
away_last10_test.shape

(13427, 10, 117)

### Training Loop

In [82]:
away_last10_train = np.load("data/data_np/away_last10_train.npy")
away_last10_val = np.load("data/data_np/away_last10_val.npy")
away_last10_test = np.load("data/data_np/away_last10_test.npy")

home_last10_train = np.load("data/data_np/home_last10_train.npy")
home_last10_val = np.load("data/data_np/home_last10_val.npy")
home_last10_test = np.load("data/data_np/home_last10_test.npy")

matchups_train = np.load("data/data_np/matchups_train.npy")
matchups_val = np.load("data/data_np/matchups_val.npy")
matchups_test = np.load("data/data_np/matchups_test.npy")

seasonal_train = np.load("data/data_np/seasonal_train.npy")
seasonal_val = np.load("data/data_np/seasonal_val.npy")
seasonal_test = np.load("data/data_np/seasonal_test.npy")

y_train = np.load("data/data_np/y_train.npy")
y_val = np.load("data/data_np/y_val.npy")
y_test = np.load("data/data_np/y_test.npy")

# Convert to torch tensors
X1_train_tensor = torch.tensor(away_last10_train, dtype=torch.float32).to(device)
X1_val_tensor = torch.tensor(away_last10_val, dtype=torch.float32).to(device)
X1_test_tensor = torch.tensor(away_last10_test, dtype=torch.float32).to(device)

X2_train_tensor = torch.tensor(home_last10_train, dtype=torch.float32).to(device)
X2_val_tensor = torch.tensor(home_last10_val, dtype=torch.float32).to(device)
X2_test_tensor = torch.tensor(home_last10_test, dtype=torch.float32).to(device)

X3_train_tensor = torch.tensor(matchups_train, dtype=torch.float32).to(device)
X3_val_tensor = torch.tensor(matchups_val, dtype=torch.float32).to(device)
X3_test_tensor = torch.tensor(matchups_test, dtype=torch.float32).to(device)

X4_train_tensor = torch.tensor(seasonal_train, dtype=torch.float32).to(device)
X4_val_tensor = torch.tensor(seasonal_val, dtype=torch.float32).to(device)
X4_test_tensor = torch.tensor(seasonal_test, dtype=torch.float32).to(device)

y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

# Make dataset
class MultiInputDataset(Dataset):
	def __init__(self, X1, X2, X3, X4, y):
		self.X1 = X1
		self.X2 = X2
		self.X3 = X3
		self.X4 = X4
		self.y = y
	
	def __len__(self):
		return len(self.X1)
	
	def __getitem__(self, idx):
		return self.X1[idx], self.X2[idx], self.X3[idx], self.X4[idx], self.y[idx]
	
train_dataset = MultiInputDataset(X1_train_tensor, X2_train_tensor, X3_train_tensor, X4_train_tensor, y_train_tensor)
val_dataset = MultiInputDataset(X1_val_tensor, X2_val_tensor, X3_val_tensor, X4_val_tensor, y_val_tensor)
test_dataset = MultiInputDataset(X1_test_tensor, X2_test_tensor, X3_test_tensor, X4_test_tensor, y_test_tensor)

batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Train DataLoader batches: {len(train_loader)}, Test DataLoader batches: {len(test_loader)}")

Train DataLoader batches: 672, Test DataLoader batches: 210


In [85]:
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, d_model, num_heads, num_layers, dropout):
        super().__init__()
        self.d_model = d_model
        self.input = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dropout=dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

    def sinusoidal_encode(self, sequence_len, d_model):
        pos = np.arange(sequence_len)[:, np.newaxis]
        div_term = np.exp(np.arange(0, d_model, 2) * -(np.log(10000.0) / d_model))

        pe = np.zeros((sequence_len, d_model))
        pe[:, 0::2] = np.sin(pos * div_term)
        pe[:, 1::2] = np.cos(pos * div_term)

        # return torch.tensor(pe, dtype=torch.float32).to(device)
        pe_tensor = torch.tensor(pe, dtype=torch.float32).to(device)
        return pe_tensor.unsqueeze(0)
    
    def forward(self, x):
        sequence_len = x.shape[1]
        encode = self.sinusoidal_encode(sequence_len, self.d_model)

        x = self.input(x)
        x = x + encode
        x = self.encoder(x)
        x = x.mean(dim=1)
        # encoder_out = encoder_out[:, -1, :]
        return x

class TransformerPredictor(nn.Module):
    def __init__(self, feat_dim1, feat_dim2, feat_dim3, d_model, num_heads, num_layers, dropout):
        super(TransformerPredictor, self).__init__()
        
        self.d_model = d_model

        self.encoder1 = TimeSeriesTransformer(feat_dim1, d_model, num_heads, num_layers, dropout)
        self.encoder2 = TimeSeriesTransformer(feat_dim1, d_model, num_heads, num_layers, dropout)
        self.encoder3 = TimeSeriesTransformer(feat_dim2, d_model, num_heads, num_layers, dropout)

        self.seasonal_layers = nn.Sequential(
            nn.Linear(feat_dim3, 128),
            nn.ReLU(),
            nn.Linear(128, d_model)
        )

        self.combined_layers = nn.Sequential(
            nn.Linear(d_model * 4, d_model),
            nn.ReLU(),
            nn.Linear(d_model, 1),
            nn.Sigmoid()
        )

    def forward(self, x1, x2, x3, x4):
        x1 = self.encoder1(x1)
        x2 = self.encoder2(x2)
        x3 = self.encoder3(x3)
        x4 = self.seasonal_layers(x4)

        x = torch.cat([x1, x2, x3, x4], dim=-1)
        x = self.combined_layers(x)
        return x

In [86]:
# input_dim = X_train_tensor.shape[2]
# hidden_dim = 256
# num_layers = 2
# output_dim = 1
# dropout = 0.5

# model = LSTMPredictor(input_dim, hidden_dim, num_layers, output_dim, dropout).to(device)
feat_dim1 = X1_train_tensor.shape[2]
feat_dim2 = X3_train_tensor.shape[2]
feat_dim3 = X4_train_tensor.shape[1]
d_model = 256
num_heads = 4
num_layers = 2
dropout = 0.2

model = TransformerPredictor(feat_dim1, feat_dim2, feat_dim3, d_model, num_heads, num_layers, dropout)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

train_loss = []
val_loss = []
num_epochs = 100
for epoch in range(num_epochs):
    epoch_loss = 0.0
    model.train()

    for X1_batch, X2_batch, X3_batch, X4_batch, y_batch in train_loader:
        X1_batch = X1_batch.to(device)
        X2_batch = X2_batch.to(device)
        X3_batch = X3_batch.to(device)
        X4_batch = X4_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X1_batch, X2_batch, X3_batch, X4_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        # nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5)
        optimizer.step()
        epoch_loss += loss.item()

    model.eval()
    epoch_val_loss = 0.0
    with torch.no_grad():
        for X1_val_batch, X2_val_batch, X3_val_batch, X4_val_batch, y_val_batch in val_loader:
            val_outputs = model(X1_val_batch, X2_val_batch, X3_val_batch, X4_val_batch)
            val_loss_batch = criterion(val_outputs, y_val_batch)
            epoch_val_loss += val_loss_batch.item()
    epoch_val_loss /= len(val_loader)
    val_loss.append(epoch_val_loss)

    epoch_loss /= len(train_loader)
    train_loss.append(epoch_loss)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.9f}, Val Loss: {epoch_val_loss:.9f}")

KeyboardInterrupt: 