In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torchsummary import summary
from torch.cuda.amp import autocast, GradScaler

import numpy as np
import gzip
import pickle
import os
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import gc
import time
import random



import sys
sys.path.append('..')
# from slp_package.slp_functions import create_merged_game_data_df
from slp_package.input_dataset import InputDataSet
import slp_package.pytorch_functions as slp_pytorch_functions

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using CUDA
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [2]:
source_data = ['ranked','public','mango']

general_features = {
    'stage_name': ['FOUNTAIN_OF_DREAMS','FINAL_DESTINATION','BATTLEFIELD','YOSHIS_STORY','POKEMON_STADIUM','DREAMLAND'],
    'num_players': [2],
    'conclusive': [True]
}
player_features = {
    # 'netplay_code': ['MANG#0'],
    # 'character_name': ['FALCO'],
    'character_name': ['FOX', 'FALCO', 'MARTH', 'CAPTAIN_FALCON', 'SHEIK'],
    # 'character_name': ['FOX', 'CAPTAIN_FALCON', 'SHEIK', 'FALCO', 'GAME_AND_WATCH', 'MARTH', 'LINK', 'ICE_CLIMBERS', 'SAMUS', 'GANONDORF', 'BOWSER', 'MEWTWO', 'YOSHI', 'PIKACHU', 'JIGGLYPUFF', 'NESS', 'DR_MARIO', 'MARIO', 'PEACH', 'ROY', 'LUIGI', 'YOUNG_LINK', 'DONKEY_KONG', 'PICHU', 'KIRBY'],
    # 'character_name': ['FOX', 'CAPTAIN_FALCON', 'SHEIK', 'FALCO', 'GAME_AND_WATCH', 'MARTH', 'LINK', 'ICE_CLIMBERS', 'SAMUS', 'GANONDORF', 'BOWSER', 'MEWTWO', 'YOSHI', 'PIKACHU', 'JIGGLYPUFF', 'NESS', 'DR_MARIO', 'PEACH', 'LUIGI', 'DONKEY_KONG'],
    # 'type_name': ['HUMAN']
    
}
opposing_player_features = {
    # 'character_name': ['MARTH'],
    # 'netplay_code': ['KOD#0', 'ZAIN#0']
    'type_name': ['HUMAN']
}
label_info = {
    'source': ['player'], # Can be 'general', 'player
    # 'feature': ['netplay_code']
    'feature': ['character_name']
}

In [3]:
dataset = InputDataSet(source_data, general_features, player_features, opposing_player_features, label_info)

print(dataset.dataset['labels'].value_counts())

  processed_df = pd.concat([player_1_df, player_2_df], ignore_index=True)


FOX               103744
FALCO              90727
MARTH              53731
CAPTAIN_FALCON     38024
SHEIK              27623
Name: labels, dtype: int64


In [4]:
labels_order =  dataset.number_of_segments_per_game(6,1000)
print(labels_order)
labels_order = labels_order['Label'].values


            Label   Count   Shift
0             FOX  103744  970124
1           FALCO   90725  840304
2           MARTH   53731  532644
3  CAPTAIN_FALCON   38024  350581
4           SHEIK   27623  295614


In [5]:
train_df, test_df  = dataset.train_test_split_dataframes(test_ratio = .20, val = False)

In [6]:
train_df.head()

Unnamed: 0,player_inputs_np_sub_path,labels,encoded_labels,segment_start_index,segment_index,segment_length
0,mango\FALCO\b3c63d9d-efb7-4544-bdd6-9da7e221f1...,FALCO,1,0,0,64
1,mango\FALCO\24b523a3-18da-4ba2-a986-d0c99b6228...,FALCO,1,0,0,64
2,mango\FALCO\a24ef3f0-ab56-47e6-af18-5905aa43af...,FALCO,1,0,0,64
3,public\FALCO\b0925bbb-c009-49db-80e6-6985d4756...,FALCO,1,0,0,64
4,ranked\FALCO\757b0c1d-b656-4d85-9fcb-296b2e9fd...,FALCO,1,0,0,64


In [7]:
class Autoencoder2(nn.Module):
    def __init__(self):
        super(Autoencoder2, self).__init__()
        dropout = .2
        self.encoder = nn.Sequential(
            nn.LazyConv1d(9*2, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.LazyConv1d(9*2, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = 4),
            nn.Dropout(dropout),
            
            nn.LazyConv1d(9*4, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.LazyConv1d(9*4, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = 4),
            nn.Dropout(dropout),
            
            nn.LazyConv1d(9*8, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.LazyConv1d(9*8, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = 4),
            nn.Dropout(dropout),
            
            nn.LazyConv1d(9*16, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.LazyConv1d(9*16, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = 4),
            nn.Dropout(dropout),
            
            # nn.LazyConv1d(16, kernel_size = 9, stride = 1, padding = 4),
            # nn.ReLU(),
            # nn.LazyConv1d(16, kernel_size = 9, stride = 1, padding = 4),
            # nn.ReLU(),
            # nn.MaxPool1d(kernel_size = 2),
            # nn.Dropout(dropout),
            
        )
        self.decoder = nn.Sequential(
            # Upsampling and Conv1d to gradually increase dimensions back to original
            
            nn.ConvTranspose1d(9*16, 9*16, kernel_size=9, stride=4, padding=3,output_padding = 1),
            nn.ReLU(),
            nn.Conv1d(9*16, 9*16, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.Dropout(dropout),
            
            nn.ConvTranspose1d(9*16,9*8, kernel_size=9, stride=4, padding=3,output_padding = 1),
            nn.ReLU(),
            nn.Conv1d(9*8, 9*8, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.ConvTranspose1d(9*8, 9*4, kernel_size=9, stride=4, padding=3,output_padding = 1),
            nn.ReLU(),
            nn.Conv1d(9*4, 9*4, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.ConvTranspose1d(9*4, 9*2, kernel_size=9, stride=4, padding=3,output_padding = 1),
            nn.ReLU(),
            nn.Conv1d(9*2, 9*2, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.Dropout(dropout),
            
            nn.Conv1d(9*2, 9, kernel_size=9, stride=1, padding=4)
        )
        
    def forward(self, x):
        """Defines the forward pass of the model."""
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [8]:
class TrainingDataset(Dataset):
    """
    Custom dataset for loading game segments from compressed numpy files.
    """
    def __init__(self, df, transform=None):
        self.file_paths = df['player_inputs_np_sub_path'].to_numpy()
        self.encoded_labels = df['encoded_labels'].to_numpy()
        self.segment_start_index = df['segment_start_index'].to_numpy()
        # self.segment_index = df['segment_index'].to_numpy()
        self.segment_length = df['segment_length'].to_numpy()
        self.transform = transform

    def __len__(self):
        """Returns the total number of samples in the dataset."""
        return len(self.file_paths)
    

    def __getitem__(self, idx):
        """Loads and returns a sample from the dataset at the specified index."""
        with gzip.open('/workspace/melee_project_data/input_np/' + self.file_paths[idx].replace('\\','/'), 'rb') as f:
            segment = np.load(f)

        if self.transform:
            segment = self.transform(segment)
        
        # Start and end of the segment
        segment_start = self.segment_start_index[idx]
        segment_end = self.segment_start_index[idx] + self.segment_length[idx]
        
        # Convert to PyTorch tensors
        segment_tensor = torch.from_numpy(segment[:,segment_start:segment_end]).float()
        # label_tensor = torch.tensor(self.encoded_labels[idx], dtype=torch.long)
        return segment_tensor#, label_tensor
    
def prepare_data_loaders(train_df, test_df, batch_size, num_workers):
    # Initialize datasets
    train_dataset = TrainingDataset(train_df)
    # val_dataset = TrainingDataset(file_paths_val, labels_val)
    test_dataset = TrainingDataset(test_df)

    # Initialize data loaders
    loaders = {
        'train': DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True,persistent_workers=True),
        'test': DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True,persistent_workers=True),
        # 'val': DataLoader(val_dataset, batch_size=2**9, num_workers=num_workers, shuffle=False, pin_memory=True,persistent_workers=True)
    }
    return loaders



# ''' Get a batch of data to see the size if we want that information. ''' 
# data_loader_iterator = iter(loaders['train'])
# first_batch = next(data_loader_iterator)
# print(first_batch.shape)



In [9]:
def train_model(model, criterion, optimizer, loaders, device, num_epochs=1):
    scaler = GradScaler()  # Initialize the gradient scaler

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        train_loader_tqdm = tqdm(loaders['train'], desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')
        total = 0
        
        for batch_number, target_cpu in enumerate(train_loader_tqdm):
            target_gpu = target_cpu.to(device)
            
            # Resets the optimizer
            optimizer.zero_grad()
            
            # Runs the forward pass with autocasting.
            with autocast():
                output_gpu = model(target_gpu)
                loss = criterion(output_gpu, target_gpu)
            
            # Scales loss and calls backward() to create scaled gradients
            scaler.scale(loss).backward()
            
            # Before calling step(), check for inf or NaN values in the gradients
            if any(torch.isinf(p.grad).any() or torch.isnan(p.grad).any() for p in model.parameters() if p.grad is not None):
                print("Warning: inf or NaN values in gradients!")
                
            # scaler.step() first unscales the gradients of the optimizer's assigned params.
            # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
            # otherwise, optimizer.step() is skipped.
            scaler.step(optimizer)
            
            # Updates the scale for next iteration.
            scaler.update()

            # Update progress
            train_loss += loss.item()
            total += target_gpu.size(0)
            train_loader_tqdm.set_postfix(loss=f'{train_loss / (total):.4f}')


def evaluate_model(model, criterion, loaders, loader, device):
    model.eval()
    eval_loss = 0
    total = 0
    with torch.no_grad():
        eval_loader_tqdm = tqdm(loaders[loader], unit = 'batch')
        
        for batch_number, target_cpu in enumerate(eval_loader_tqdm):
            target_gpu = target_cpu.to(device)
            output_gpu = model(target_gpu)
            
            eval_loss += criterion(output_gpu, target_gpu).item()
            total += target_gpu.size(0)
            eval_loader_tqdm.set_postfix(loss=f'{eval_loss / (total):.4f}') 
            
    print(f'Evaluated Loss: {eval_loss / total:.6f}')
    

In [13]:


# Build model
model = Autoencoder2().to('cuda')

# With the size of an input we can get a model summary.
summary(model, input_size=(9, 64))

# Check that the output shape and target shape match
# training_example = torch.rand(9, 2 ** 12).to('cuda')
# print('Target shape:', training_example.shape)
# model.eval()
# output = model(training_example)
# print('Output shape:', output.shape)

## Optionally compile the model
# import torch_tensorrt
model = torch.compile(model, mode = 'default')
# model = torch.compile(model,mode = 'max-autotune')
# model = torch.compile(model, backend="torch_tensorrt")
# model = torch.compile(model, backend="torch_tensorrt",mode = 'max-autotune')




RuntimeError: max_pool1d() Invalid computed output size: 0

In [12]:
import torch._dynamo
torch._dynamo.config.suppress_errors = True


# Pepare data loaders
batch_size =  32
num_workers = 16
loaders = prepare_data_loaders(train_df, test_df, batch_size, num_workers)

criterion = nn.MSELoss(reduction = 'sum')
optimizer = Adam(model.parameters(), lr=0.001)
num_epochs = 10

# # This seems to sometimes help
# gc.collect()
# torch.cuda.empty_cache()

# Train the model
# start_time = time.time()
train_model(model, criterion, optimizer, loaders, 'cuda', num_epochs)
# print(f'Batch Size: {batch_size}, Training time: {time.time() - start_time:.2f}')

# Again, this sometimes seems to help
# gc.collect()
# torch.cuda.empty_cache()

# Evaluate the trained model
evaluate_model(model, criterion, loaders, 'test', 'cuda')

Epoch 1/10:   0%|          | 0/125 [00:00<?, ?batch/s]


RuntimeError: max_pool1d() Invalid computed output size: 0