In [1]:
import numpy as np
import torch
import torch.nn as nn
import json
from sklearn.model_selection import train_test_split
from dotabet.TransformerClassifier import TransformerClassifier
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader


In [11]:
def preprocess_and_split(data, test_size=0.2, holdout_size=0.1):
    X = np.array([d['radiant_team'] + d['dire_team'] for d in data])
    y = np.array([1 if d['radiant_win'] else 0 for d in data])
    
     # First split: separate out the holdout set
    X_temp, X_holdout, y_temp, y_holdout = train_test_split(X, y, test_size=holdout_size, random_state=42)
    
    adjusted_test_size = test_size / (1 - holdout_size)

    # Second split
    X_train, X_test, y_train, y_test = train_test_split(X_temp, y_temp, test_size=adjusted_test_size, random_state=42)
    
    return X_train, X_test, X_holdout, y_train, y_test, y_holdout


with open('pub_games.json', 'r') as file:
    data = json.load(file)

X_train, X_test, X_holdout, y_train, y_test, y_holdout = preprocess_and_split(data, test_size=0.2, holdout_size=0.1)

X_train_tensor, X_test_tensor, X_holdout_tensor = map(lambda x: torch.tensor(x, dtype=torch.long), (X_train, X_test, X_holdout))
y_train_tensor, y_test_tensor, y_holdout_tensor = map(lambda x: torch.tensor(x, dtype=torch.float32), (y_train, y_test, y_holdout))

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
teset_dataset = TensorDataset(X_test_tensor, y_test_tensor)
holdout_dataset = TensorDataset(X_holdout_tensor, y_holdout_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(teset_dataset, batch_size=32, shuffle=True)
holdout_loader = DataLoader(holdout_dataset, batch_size=32, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f'{len(train_dataset)=} {len(teset_dataset)=} {len(holdout_dataset)=}')

N, seq_len = X_train.shape

In [9]:

vocab_size = 140
emb_dim = 32
num_heads = 4
num_layers = 2
d_ff = 4*emb_dim
batch_size = 32
lr = 1e-3
num_epochs = 10


model = TransformerClassifier(vocab_size, emb_dim, num_heads, num_layers)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

def evaluate_model(model, data_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0.0
    with torch.no_grad():  # No gradient computation
        for inputs, labels in data_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * inputs.size(0)
    return total_loss / len(data_loader.dataset)

def count_trainable_parameters(model):
    for p in model.parameters():
        
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

trainable_params = count_trainable_parameters(model)
print(f"Trainable Parameters: {trainable_params}")
print(f"Total Model Size: {trainable_params*4/(1024**2):.2f} MB")

Trainable Parameters: 279521
Total Model Size: 1.07 MB


In [12]:
for p in model.parameters():
    print(p.name)
    break

None


In [19]:
model

TransformerClassifier(
  (embedding): Embedding(140, 32)
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
        )
        (linear1): Linear(in_features=32, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=32, bias=True)
        (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (fc): Linear(in_features=32, out_features=1, bias=True)
)

AttributeError: 'Parameter' object has no attribute 'nam'

In [None]:
%%time

train_losses = []
test_losses = []

for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item() * inputs.size(0)
        
     # Calculate average losses
    avg_train_loss = total_train_loss / len(train_loader.dataset)
    avg_test_loss = evaluate_model(model, test_loader, criterion)

    # Append to lists
    train_losses.append(avg_train_loss)
    test_losses.append(avg_test_loss)
    
    print(f'Epoch {epoch+1}, {avg_train_loss=:.4f}, {avg_test_loss=:.4f}')



In [None]:
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Learning Curve')
plt.legend()