# Setup

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

# Make Game Dataset

In [2]:
class GamesDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        # Select only numerical columns (you might need to adjust based on your CSV)
        self.features = self.data.select_dtypes(include=[np.number]).values.astype(np.float32)
        self.transform = transform
        
        # Normalize features: standardize to mean=0 and std=1
        self.mean = self.features.mean(axis=0)
        self.std = self.features.std(axis=0) + 1e-8  # add a small value to avoid division by zero
        self.features = (self.features - self.mean) / self.std

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        sample = self.features[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample

In [6]:
csv_file = "../bgg_data/overall_games_mini.csv"
batch_size = 32
dataset = GamesDataset(csv_file)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Make Autoencoder

In [7]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim=32):
        super(Autoencoder, self).__init__()
        # encoder compresses input to latent_dim in bottleneck layer
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, latent_dim)
        )
        # decoder reconstructs the input from embeddings in the bottleneck layer
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim)
        )
        
    def forward(self, x):
        latent = self.encoder(x)
        reconstructed = self.decoder(latent)
        return reconstructed, latent

# Define Training Procedure, Loss Function, and Hyperparamters

In [9]:
def train_autoencoder(model, dataloader, criterion, optimizer, num_epochs=20, device='cpu'):
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for batch in dataloader:
            batch = batch.to(device)
            optimizer.zero_grad()
            reconstructed, latent = model(batch)
            loss = criterion(reconstructed, batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * batch.size(0)
        avg_loss = total_loss / len(dataloader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")
    return model

In [None]:

def composite_loss(reconstructed, target, continuous_idx, binary_idx, bce_weight=1.0):
    # MSE for continuous features
    mse_loss = F.mse_loss(reconstructed[:, continuous_idx], target[:, continuous_idx])
    
    # BCE for binary flags
    # Use BCEWithLogitsLoss if your model's decoder outputs raw scores (logits)
    bce_loss = F.binary_cross_entropy_with_logits(reconstructed[:, binary_idx], target[:, binary_idx])
    
    # Weighted sum of losses
    total_loss = mse_loss + bce_weight * bce_loss
    return total_loss

In [None]:
epochs = 20
learning_rate = .001
input_dim = dataset.features.shape[1]
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
latent_dim = 64
