In [1]:
import torch
import dataset_cleaning
import polars as pl

In [2]:
df, CHAMP_TO_IDX = dataset_cleaning.generate_dataset()

In [3]:
# Define the function
def standardize_matchup(blue, red):
    blue_str, red_str = '-'.join(map(str, sorted(blue))), '-'.join(map(str, sorted(red)))
    
    if blue_str < red_str:
        return f"{blue_str}/{red_str}"
    else:
        return f"{red_str}/{blue_str}"
    
def standardize_matchup(blue, red):
    return '-'.join(map(str, sorted(blue + red)))


In [4]:
df = df.with_columns(
   pl.struct(["Blue_champions", "Red_champions"])\
   .apply(lambda x: standardize_matchup(x['Blue_champions'], x['Red_champions']))\
   .alias('matchup_id'))

In [5]:
df.n_unique('matchup_id'), df.shape[0]

(34871, 34896)

In [35]:
from torch.utils.data import Dataset

class LeagueOfLegendsDataset(Dataset):
    def __init__(self, type="train"):
        
        assert type in ["train", "test"]
        
        self.blue_champions = torch.load(f=f"dataset\\blue_champions_{type}.pt")
        self.red_champions = torch.load(f=f"dataset\\red_champions_{type}.pt")
        self.result = torch.load(f=f"dataset\\result_{type}.pt")
        print("Dataset loaded")

    def __len__(self):
        assert len(self.blue_champions) == len(self.red_champions) == len(self.result)
        return len(self.blue_champions)

    def __getitem__(self, idx):
        return self.blue_champions[idx], self.red_champions[idx], self.result[idx]


In [36]:
from torch.utils.data import DataLoader

dataset_train = LeagueOfLegendsDataset(type="train")
data_loader_train = DataLoader(dataset_train, batch_size=12, shuffle=True)

dataset_test = LeagueOfLegendsDataset(type="test")
data_loader_test = DataLoader(dataset_test, batch_size=12, shuffle=True)

Dataset loaded
Dataset loaded


In [17]:
len(dataset_train), len(dataset_test)

(7137, 1784)

In [18]:
import torch.nn as nn
import torch.nn.functional as F

In [19]:
class TeamPredictor(nn.Module):
    def __init__(self, n_champions, n_dim):
        super(TeamPredictor, self).__init__()
        
        # Define an embedding layer for champion synergies and good-against properties
        self.synergy_embeddings = nn.Embedding(n_champions, n_dim)
    
    def forward(self, blue_team, red_team):
        # Embed the blue team champions
        blue_team_synergies = self.synergy_embeddings(blue_team)
        red_team_synergies = self.synergy_embeddings(red_team)
        
        # M * M^T, M = (batch, n_champions, dim)
        blue_team_synergies = torch.matmul(blue_team_synergies, blue_team_synergies.transpose(1, 2))
        red_team_synergies = torch.matmul(red_team_synergies, red_team_synergies.transpose(1, 2))
        
        # Sum whole everything but the diagonal
        final_blue_score = torch.sum(blue_team_synergies, dim=(1, 2)) - torch.sum(torch.diagonal(blue_team_synergies, dim1=1, dim2=2), dim=1)
        final_red_score = torch.sum(red_team_synergies, dim=(1, 2)) - torch.sum(torch.diagonal(red_team_synergies, dim1=1, dim2=2), dim=1)
        
        # Concatenate the blue and red team scores
        scores = torch.cat((final_red_score.unsqueeze(-1), final_blue_score.unsqueeze(-1)), dim=1)
        
        return scores


In [None]:
import torch.optim as optim
from torch.nn import CrossEntropyLoss
from torch.optim.lr_scheduler import ReduceLROnPlateau  # Import for the scheduler

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = TeamPredictor(n_champions=len(CHAMP_TO_IDX), n_dim=125).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)  # L2 regularization

# Initialize the scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)

criterion = CrossEntropyLoss()

n_epochs = 100

best_test_loss = float('inf')

for epoch in range(n_epochs):
    model.train()
    running_loss = 0.0
    train_correct = 0  # Track correct predictions for training
    train_samples = 0  # Track total number of training samples
    for i, data in enumerate(data_loader_train):
        blue_champions, red_champions, labels = data
        blue_champions, red_champions, labels = blue_champions.to(device), red_champions.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(blue_champions, red_champions)
        _, predicted = torch.max(outputs.data, 1)
        train_samples += labels.size(0)
        train_correct += (predicted == labels).sum().item()

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(data_loader_train)
    train_accuracy = 100 * train_correct / train_samples  # Compute training accuracy
    print(f"Epoch: {epoch + 1}\nTrain Loss: {avg_train_loss:.2f}, Train Accuracy: {train_accuracy:.2f} %")

    model.eval()
    total_correct = 0
    total_samples = 0
    running_test_loss = 0.0
    with torch.no_grad():
        for data in data_loader_test:
            blue_champions, red_champions, labels = data
            blue_champions, red_champions, labels = blue_champions.to(device), red_champions.to(device), labels.to(device)
            outputs = model(blue_champions, red_champions)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()
            test_loss = criterion(outputs, labels)
            running_test_loss += test_loss.item()

    avg_test_loss = running_test_loss / len(data_loader_test)
    test_accuracy = 100 * total_correct / total_samples
    print(f"Train Loss: {avg_test_loss:.2f}, Train Accuracy: {test_accuracy:.2f} %")

    # Step the scheduler
    scheduler.step(avg_test_loss)


In [41]:
import torch.nn as nn
import torch

class TeamPredictor_v2(nn.Module):
    def __init__(self, n_champions, n_dim, hidden_layers=[256, 128, 64]):
        super(TeamPredictor_v2, self).__init__()

        # Define an embedding layer for champion synergies and good-against properties
        self.embeddings = nn.Embedding(n_champions, n_dim)
        self.dropout = nn.Dropout(0.7)

        # A sequence of dense layers
        layers = []
        input_dim = 10 * n_dim  # 5 champions from each team times the embedding size
        
        for hidden_dim in hidden_layers:
            layers.append(nn.Linear(input_dim, hidden_dim))
            
            layers.append(nn.ReLU())
            layers.append(self.dropout)  # Apply dropout after activation
            
            input_dim = hidden_dim

        # Final layer to get the probability score
        layers.append(nn.Linear(hidden_layers[-1], 1))

        self.layers = nn.Sequential(*layers)
    
    def forward(self, blue_team, red_team):
        # Embed the blue team and red team champions
        blue_team_synergies = self.embeddings(blue_team).view(blue_team.size(0), -1)
        red_team_synergies = self.embeddings(red_team).view(red_team.size(0), -1)

        # Concatenate the blue and red team champions, so it is (batch_size, n_champions*embedding_dimension*2)
        input_tensor = torch.cat((blue_team_synergies, red_team_synergies), dim=1)

        scores = self.layers(input_tensor)
        
        return scores


In [42]:
import torch.optim as optim
from torch.nn import BCEWithLogitsLoss
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts
from tqdm import tqdm

device = torch.device('cpu')

model = TeamPredictor_v2(n_champions=len(CHAMP_TO_IDX), n_dim=64).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001)  # L2 regularization

# Initialize the scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)

criterion = BCEWithLogitsLoss()

n_epochs = 100

for epoch in range(n_epochs):
    model.train()
    running_loss = 0.0
    train_correct = 0  # Track correct predictions for training
    train_samples = 0  # Track total number of training samples
    
    how_many_time_pred_blue = 0
    how_many_time_pred_red = 0

    for i, data in enumerate(tqdm(data_loader_train)):
        blue_champions, red_champions, labels = data
        blue_champions, red_champions, labels = blue_champions.to(device), red_champions.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(blue_champions, red_champions)
        predicted = (torch.sigmoid(outputs) > 0.5).float()  # Threshold at 0.5

        train_samples += labels.size(0)
        train_correct += (predicted.squeeze() == labels.float()).sum().item()
            
        # how_many_time_pred_blue += (predicted.squeeze() == 1).sum().item()
        # how_many_time_pred_red += (predicted.squeeze() == 0).sum().item()

        loss = criterion(outputs.squeeze(), labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # print(f"% how_many_time_pred_blue: {how_many_time_pred_blue / len(data_loader_train.dataset)}")
    # print(f"% how_many_time_pred_red: {how_many_time_pred_red / len(data_loader_train.dataset)}")
    
    avg_train_loss = running_loss / len(data_loader_train)
    train_accuracy = 100 * train_correct / train_samples  # Compute training accuracy
    print(f"Epoch: {epoch + 1}\nTrain Loss: {avg_train_loss:.2f}, Train Accuracy: {train_accuracy:.2f} %")

    model.eval()
    total_correct = 0
    total_samples = 0
    running_test_loss = 0.0

    with torch.no_grad():
        for data in data_loader_test:
            blue_champions, red_champions, labels = data
            blue_champions, red_champions, labels = blue_champions.to(device), red_champions.to(device), labels.to(device)
            outputs = model(blue_champions, red_champions)
            predicted = (torch.sigmoid(outputs) > 0.5).float()  # Threshold at 0.5
            
            total_samples += labels.size(0)
            total_correct += (predicted.squeeze() == labels.float()).sum().item()

            test_loss = criterion(outputs.squeeze(), labels.float())
            running_test_loss += test_loss.item()

    avg_test_loss = running_test_loss / len(data_loader_test)
    test_accuracy = 100 * total_correct / total_samples
    print(f"Test Loss: {avg_test_loss:.2f}, Test Accuracy: {test_accuracy:.2f} %")

    # Step the scheduler
    scheduler.step(avg_test_loss)


  0%|          | 0/595 [00:00<?, ?it/s]

100%|██████████| 595/595 [00:02<00:00, 237.07it/s]


Epoch: 1
Train Loss: 0.70, Train Accuracy: 51.25 %
Test Loss: 0.69, Test Accuracy: 53.03 %


100%|██████████| 595/595 [00:02<00:00, 288.02it/s]


Epoch: 2
Train Loss: 0.69, Train Accuracy: 52.15 %
Test Loss: 0.69, Test Accuracy: 52.97 %


100%|██████████| 595/595 [00:02<00:00, 294.89it/s]


Epoch: 3
Train Loss: 0.69, Train Accuracy: 53.45 %
Test Loss: 0.69, Test Accuracy: 53.53 %


100%|██████████| 595/595 [00:02<00:00, 297.37it/s]


Epoch: 4
Train Loss: 0.69, Train Accuracy: 52.59 %
Test Loss: 0.69, Test Accuracy: 56.39 %


100%|██████████| 595/595 [00:02<00:00, 295.54it/s]


Epoch: 5
Train Loss: 0.69, Train Accuracy: 53.54 %
Test Loss: 0.69, Test Accuracy: 53.48 %


100%|██████████| 595/595 [00:02<00:00, 288.78it/s]


Epoch: 6
Train Loss: 0.69, Train Accuracy: 54.56 %
Test Loss: 0.69, Test Accuracy: 53.70 %


100%|██████████| 595/595 [00:02<00:00, 295.25it/s]


Epoch: 7
Train Loss: 0.69, Train Accuracy: 54.98 %
Test Loss: 0.69, Test Accuracy: 55.04 %


100%|██████████| 595/595 [00:02<00:00, 286.37it/s]


Epoch: 8
Train Loss: 0.69, Train Accuracy: 56.07 %
Test Loss: 0.69, Test Accuracy: 54.88 %


100%|██████████| 595/595 [00:02<00:00, 294.16it/s]


Epoch: 9
Train Loss: 0.68, Train Accuracy: 55.61 %
Test Loss: 0.69, Test Accuracy: 54.37 %


100%|██████████| 595/595 [00:02<00:00, 289.20it/s]


Epoch: 10
Train Loss: 0.68, Train Accuracy: 55.77 %
Test Loss: 0.69, Test Accuracy: 54.26 %


100%|██████████| 595/595 [00:02<00:00, 289.13it/s]


Epoch: 11
Train Loss: 0.68, Train Accuracy: 55.60 %
Test Loss: 0.69, Test Accuracy: 54.43 %


100%|██████████| 595/595 [00:02<00:00, 292.23it/s]


Epoch: 12
Train Loss: 0.68, Train Accuracy: 56.80 %
Test Loss: 0.69, Test Accuracy: 54.93 %


100%|██████████| 595/595 [00:02<00:00, 289.41it/s]


Epoch: 13
Train Loss: 0.68, Train Accuracy: 57.17 %
Test Loss: 0.69, Test Accuracy: 55.55 %


100%|██████████| 595/595 [00:02<00:00, 294.09it/s]


Epoch: 14
Train Loss: 0.68, Train Accuracy: 58.36 %
Test Loss: 0.69, Test Accuracy: 55.94 %


100%|██████████| 595/595 [00:02<00:00, 292.51it/s]


Epoch: 15
Train Loss: 0.67, Train Accuracy: 59.38 %
Test Loss: 0.69, Test Accuracy: 54.37 %


 64%|██████▍   | 382/595 [00:01<00:00, 244.19it/s]


KeyboardInterrupt: 