TODO:
- Make a test set.
- Make more data.
- Look into what methods are good for using a transformer to predict an embedding. (we aren't doing softmax style)

In [1]:
%pip install pandas torch
import torch



Note: you may need to restart the kernel to use updated packages.


In [2]:
import os

data_path = os.path.join(os.getcwd(),"transformer_training_data", "ONE", "train_data.pt")
inputs, targets = torch.load(data_path)
print(f"inputs shape: {inputs.shape}")
print(f"targets shape: {targets.shape}")
print(f"Number of input sequences: {len(inputs)}")
print(f"Number of target vectors: {len(targets)}")

inputs shape: torch.Size([101, 23, 256])
targets shape: torch.Size([101, 256])
Number of input sequences: 101
Number of target vectors: 101


In [3]:
import torch.nn as nn

class BasicTransformer(nn.Module):
    def __init__(self, input_dim, embed_dim=256, seq_len=100, num_heads=4, num_layers=2):
        super().__init__()
        self.embedding = nn.Linear(input_dim, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim, nhead=num_heads, batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.output = nn.Linear(embed_dim, embed_dim)  # output is a 256-dim embedding

    def forward(self, x):
        # x: (batch, seq_len, input_dim)
        x = self.embedding(x)
        x = self.transformer_encoder(x)
        # Pool over sequence dimension
        x = x.transpose(1, 2)  # (batch, embed_dim, seq_len)
        x = self.pool(x).squeeze(-1)  # (batch, embed_dim)
        x = self.output(x)
        return x

# Example usage:
# model = BasicTransformer(input_dim=inputs.shape[2])
# output = model(inputs[:, :100, :])  # limit sequence length to 100

In [4]:
# Training setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BasicTransformer(input_dim=inputs.shape[2]).to(device)
inputs_train = inputs[:, :100, :].to(device)  # limit sequence length to 100
targets_train = targets.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

num_epochs = 500
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(inputs_train)
    loss = criterion(outputs, targets_train)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

Epoch 1/500, Loss: 0.3103
Epoch 2/500, Loss: 0.4006
Epoch 3/500, Loss: 0.1473
Epoch 4/500, Loss: 0.0683
Epoch 5/500, Loss: 0.0423
Epoch 6/500, Loss: 0.0332
Epoch 7/500, Loss: 0.0332
Epoch 8/500, Loss: 0.0353
Epoch 9/500, Loss: 0.0348
Epoch 10/500, Loss: 0.0312
Epoch 11/500, Loss: 0.0264
Epoch 12/500, Loss: 0.0219
Epoch 13/500, Loss: 0.0184
Epoch 14/500, Loss: 0.0162
Epoch 15/500, Loss: 0.0147
Epoch 16/500, Loss: 0.0141
Epoch 17/500, Loss: 0.0138
Epoch 18/500, Loss: 0.0137
Epoch 19/500, Loss: 0.0138
Epoch 20/500, Loss: 0.0138
Epoch 21/500, Loss: 0.0137
Epoch 22/500, Loss: 0.0131
Epoch 23/500, Loss: 0.0123
Epoch 24/500, Loss: 0.0112
Epoch 25/500, Loss: 0.0104
Epoch 26/500, Loss: 0.0100
Epoch 27/500, Loss: 0.0099
Epoch 28/500, Loss: 0.0100
Epoch 29/500, Loss: 0.0102
Epoch 30/500, Loss: 0.0102
Epoch 31/500, Loss: 0.0102
Epoch 32/500, Loss: 0.0100
Epoch 33/500, Loss: 0.0096
Epoch 34/500, Loss: 0.0094
Epoch 35/500, Loss: 0.0091
Epoch 36/500, Loss: 0.0090
Epoch 37/500, Loss: 0.0090
Epoch 38/5

In [5]:
model.eval()
with torch.no_grad():
    single_input = inputs[0, :100, :].unsqueeze(0).to(device)  # shape: (1, 100, input_dim)
    output = model(single_input)
print("Model output for one datum:", output.cpu().numpy())
print("Output size:", output.size())

Model output for one datum: [[ 1.44911520e-02 -1.84102997e-01  5.74414432e-02 -3.20142359e-02
   2.61188000e-02  3.46162580e-02  2.96733826e-02  5.29220849e-02
   1.43081948e-01 -3.20722871e-02  1.66428193e-01 -5.60479611e-02
  -2.62854341e-02  5.04216813e-02  1.29039630e-01 -3.56406718e-03
  -2.34667771e-02 -1.00048847e-01 -1.09580860e-01 -1.15351416e-01
  -3.05411182e-02 -2.61618137e-01  8.49955678e-02 -5.30369245e-02
   5.85940480e-03 -8.46836120e-02 -7.85928816e-02  1.34819940e-01
   2.49203712e-01  5.35501428e-02 -2.54595205e-02  1.33899093e-01
   1.37564927e-01  2.29550153e-02 -1.12142287e-01  1.03828117e-01
   8.26263428e-03  6.64909780e-02 -2.67749988e-02  8.84292424e-02
   1.47625655e-01  2.58991010e-02  1.31726772e-01  5.48251495e-02
  -9.43736359e-03  7.53351599e-02  7.45906681e-03  1.90244727e-02
   1.06093790e-02 -4.46383730e-02 -1.33795843e-01  8.92596692e-02
   5.15896082e-02  3.05640437e-02  1.00529775e-01  1.87410697e-01
   5.01280092e-02  1.09381601e-03  1.61742419e-0

Convert that output back into a card

In [7]:
import pandas as pd
import torch

# Load embeddings CSV
embeddings_path = 'C:/Users/shwes/Projects/ML/mtg deckbuilding/New/MTG-card2vec/embeddings/ONE/ONE_embeddings.csv'
embeddings_df = pd.read_csv(embeddings_path, index_col=0)

# Create card_embeddings dict: {card_name: torch.tensor(embedding_vector)}
card_embeddings = {
    card: torch.tensor(embeddings_df.loc[card].values, dtype=torch.float32)
    for card in embeddings_df.index
}
# print(len(card_embeddings))
# print(len(next(iter(card_embeddings.values()))))
print(sorted(list(card_embeddings.keys())))

['Adaptive~Sporesinger', 'Against~All~Odds', 'All~Will~Be~One', 'Ambulatory~Edifice', 'Annex~Sentry', 'Annihilating~Glare', 'Anoint~with~Affliction', 'Apostle~of~Invasion', 'Archfiend~of~the~Dross', 'Argentum~Masticore', 'Armored~Scrapgorger', "Aspirant's~Ascent", 'Atmosphere~Surgeon', "Atraxa's~Skitterfang", 'Atraxa,~Grand~Unifier', 'Awaken~the~Sleeper', 'Axiom~Engraver', 'Barbed~Batterfist', 'Basilica~Shepherd', 'Basilica~Skullbomb', 'Bilious~Skulldweller', 'Blackcleave~Cliffs', "Black~Sun's~Twilight", 'Bladed~Ambassador', 'Bladegraft~Aspirant', 'Bladehold~War-Whip', 'Blade~of~Shared~Souls', 'Blazing~Crescendo', 'Blightbelly~Rat', 'Bloated~Contaminator', "Blue~Sun's~Twilight", 'Bonepicker~Skirge', 'Branchblight~Stalker', 'Bring~the~Ending', 'Cacophony~Scamp', 'Cankerbloom', 'Capricious~Hellraiser', 'Carnivorous~Canopy', 'Cephalopod~Sentry', 'Charforger', 'Charge~of~the~Mites', 'Chimney~Rabble', 'Chittering~Skitterling', 'Chrome~Prowler', 'Churning~Reservoir', 'Cinderslash~Ravager', '

In [10]:
import torch

# Find the nearest card embedding to the model output
output_vec = output.squeeze().cpu()  # shape: (256,)
embedding_matrix = torch.tensor(embeddings_df.values, dtype=torch.float32)  # shape: (num_cards, 256)

# Compute cosine similarity
cos_sim = torch.nn.functional.cosine_similarity(output_vec.unsqueeze(0), embedding_matrix)
nearest_idx = torch.argmax(cos_sim).item()
nearest_card = embeddings_df.index[nearest_idx]

print("Nearest card:", nearest_card)
print("Cosine similarity:", cos_sim[nearest_idx].item())

Nearest card: Armored~Scrapgorger
Cosine similarity: 0.9148054122924805
