In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torchsummary import summary
from torch.cuda.amp import autocast, GradScaler

import numpy as np
import gzip
import pickle
import os
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import gc
import time


import sys
sys.path.append('..')
# from slp_package.slp_functions import create_merged_game_data_df
from slp_package.input_dataset import InputDataSet
import slp_package.pytorch_functions as slp_pytorch_functions


In [2]:
source_data = ['ranked','public']

general_features = {
    # 'stage_name': ['FOUNTAIN_OF_DREAMS','FINAL_DESTINATION','BATTLEFIELD','YOSHIS_STORY','POKEMON_STADIUM','DREAMLAND'],
    'num_players': [2],
    'conclusive': [True]
}
player_features = {
    # 'netplay_code': ['MANG#0'],
    # 'character_name': ['FOX', 'FALCO', 'MARTH', 'CAPTAIN_FALCON', 'SHEIK'],
    # 'character_name': ['FOX', 'CAPTAIN_FALCON', 'SHEIK', 'FALCO', 'GAME_AND_WATCH', 'MARTH', 'LINK', 'ICE_CLIMBERS', 'SAMUS', 'GANONDORF', 'BOWSER', 'MEWTWO', 'YOSHI', 'PIKACHU', 'JIGGLYPUFF', 'NESS', 'DR_MARIO', 'MARIO', 'PEACH', 'ROY', 'LUIGI', 'YOUNG_LINK', 'DONKEY_KONG', 'PICHU', 'KIRBY'],
    'character_name': ['FOX', 'CAPTAIN_FALCON', 'SHEIK', 'FALCO', 'GAME_AND_WATCH', 'MARTH', 'LINK', 'ICE_CLIMBERS', 'SAMUS', 'GANONDORF', 'BOWSER', 'MEWTWO', 'YOSHI', 'PIKACHU', 'JIGGLYPUFF', 'NESS', 'DR_MARIO', 'PEACH', 'LUIGI', 'DONKEY_KONG'],
    # 'type_name': ['HUMAN']
    
}
opposing_player_features = {
    # 'character_name': ['MARTH'],
    # 'netplay_code': ['KOD#0', 'ZAIN#0']
    'type_name': ['HUMAN']
}
label_info = {
    'source': ['player'], # Can be 'general', 'player
    # 'feature': ['netplay_code']
    'feature': ['character_name']
}

In [3]:
dataset = InputDataSet(source_data, general_features, player_features, opposing_player_features, label_info)

print(dataset.dataset['labels'].value_counts())

  processed_df = pd.concat([player_1_df, player_2_df], ignore_index=True)


FOX               101341
FALCO              79412
MARTH              50788
CAPTAIN_FALCON     39858
SHEIK              29972
PEACH              19338
JIGGLYPUFF         17390
SAMUS              10499
ICE_CLIMBERS        7696
GANONDORF           7524
LUIGI               5777
YOSHI               5567
PIKACHU             4488
DR_MARIO            4150
LINK                2693
NESS                2554
GAME_AND_WATCH      2196
DONKEY_KONG         2183
MEWTWO              1883
BOWSER               994
Name: labels, dtype: int64


In [4]:
labels_order =  dataset.number_of_segments_per_game(12,2000)
print(labels_order)
labels_order = labels_order['Label'].values


             Label   Count   Shift
0              FOX  100351  282403
1            FALCO   78612  219519
2            MARTH   50387  154283
3   CAPTAIN_FALCON   39443  106456
4            SHEIK   29811  102660
5            PEACH   19215   72021
6       JIGGLYPUFF   17103   61146
7            SAMUS   10438   43361
8     ICE_CLIMBERS    7649   28907
9        GANONDORF    7448   21670
10           LUIGI    5737   20492
11           YOSHI    5537   19333
12         PIKACHU    4446   15860
13        DR_MARIO    4114   14173
14            LINK    2678    9791
15            NESS    2536   11125
16  GAME_AND_WATCH    2171    5949
17     DONKEY_KONG    2156    7413
18          MEWTWO    1863    8306
19          BOWSER     987    3891


In [5]:
train_df, test_df  = dataset.train_test_split_dataframes(test_ratio = .20, val = False)

In [6]:
train_df.head()

Unnamed: 0,player_inputs_np_sub_path,labels,encoded_labels,segment_start_index,segment_index,segment_length
0,public\FOX\0080ac84-c059-426a-83b1-3b71efc4881...,FOX,5,0,0,4096
1,public\FOX\61719d70-d256-48c3-9583-293c47ceffb...,FOX,5,0,0,4096
2,public\FOX\88b39d55-8f2e-4c2e-8d7c-8d45c276df9...,FOX,5,0,0,4096
3,public\FOX\6c9db7b1-0207-470a-954a-94d05b5e81c...,FOX,5,0,0,4096
4,public\FOX\2dfd4f0f-2ade-4535-84a5-8f87c5b3e75...,FOX,5,0,0,4096


In [7]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        dropout = 0
        self.encoder = nn.Sequential(
            nn.LazyConv1d(128, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.LazyConv1d(128, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = 2),
            nn.Dropout(dropout),
            
            nn.LazyConv1d(64, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.LazyConv1d(64, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = 2),
            nn.Dropout(dropout),
            
            nn.LazyConv1d(32, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.LazyConv1d(32, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = 2),
            nn.Dropout(dropout),
            
            nn.LazyConv1d(16, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.LazyConv1d(16, kernel_size = 9, stride = 1, padding = 4),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size = 2),
            nn.Dropout(dropout),

        )
        self.decoder = nn.Sequential(
            # Upsampling and Conv1d to gradually increase dimensions back to original
            nn.ConvTranspose1d(16, 16, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(16, 16, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.Dropout(dropout),
            
            nn.ConvTranspose1d(16, 32, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(32, 32, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.ConvTranspose1d(32, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(64, 64, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.Dropout(dropout),

            nn.ConvTranspose1d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(128, 128, kernel_size=9, stride=1, padding=4),
            nn.ReLU(),
            nn.Dropout(dropout),
            
            nn.Conv1d(128, 9, kernel_size=9, stride=1, padding=4)
        )
        
    def forward(self, x):
        """Defines the forward pass of the model."""
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    


In [8]:
class TrainingDataset(Dataset):
    """
    Custom dataset for loading game segments from compressed numpy files.
    """
    def __init__(self, df, transform=None):
        self.file_paths = df['player_inputs_np_sub_path'].to_numpy()
        self.encoded_labels = df['encoded_labels'].to_numpy()
        self.segment_start_index = df['segment_start_index'].to_numpy()
        # self.segment_index = df['segment_index'].to_numpy()
        self.segment_length = df['segment_length'].to_numpy()
        self.transform = transform

    def __len__(self):
        """Returns the total number of samples in the dataset."""
        return len(self.file_paths)
    

    def __getitem__(self, idx):
        """Loads and returns a sample from the dataset at the specified index."""
        with gzip.open('/workspace/melee_project_data/input_np/' + self.file_paths[idx].replace('\\','/'), 'rb') as f:
            segment = np.load(f)

        if self.transform:
            segment = self.transform(segment)
        
        # Start and end of the segment
        segment_start = self.segment_start_index[idx]
        segment_end = self.segment_start_index[idx] + self.segment_length[idx]
        
        # Convert to PyTorch tensors
        segment_tensor = torch.from_numpy(segment[:,segment_start:segment_end]).float()
        # label_tensor = torch.tensor(self.encoded_labels[idx], dtype=torch.long)
        return segment_tensor#, label_tensor
    
def prepare_data_loaders(train_df, test_df, batch_size, num_workers):
    # Initialize datasets
    train_dataset = TrainingDataset(train_df)
    # val_dataset = TrainingDataset(file_paths_val, labels_val)
    test_dataset = TrainingDataset(test_df)

    # Initialize data loaders
    loaders = {
        'train': DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True,persistent_workers=True),
        'test': DataLoader(test_dataset, batch_size=2**9, num_workers=num_workers, shuffle=True, pin_memory=True,persistent_workers=True),
        # 'val': DataLoader(val_dataset, batch_size=2**9, num_workers=num_workers, shuffle=False, pin_memory=True,persistent_workers=True)
    }
    return loaders



''' Get a batch of data to see the size if we want that information. ''' 
# data_loader_iterator = iter(loaders['train'])
# first_batch = next(data_loader_iterator)
# print(first_batch.shape)



' Get a batch of data to see the size if we want that information. '

In [9]:
def train_model(model, criterion, optimizer, loaders, device, num_epochs=1):
    scaler = GradScaler()  # Initialize the gradient scaler

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        total = 0
        train_loader_tqdm = tqdm(loaders['train'], desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')
        
        for batch_number, target_cpu in enumerate(train_loader_tqdm):
            target_gpu = target_cpu.to(device)
            
            # Resets the optimizer
            optimizer.zero_grad()
            
            # Runs the forward pass with autocasting.
            with autocast():
                output_gpu = model(target_gpu)
                loss = criterion(output_gpu, target_gpu)
            
            # Scales loss and calls backward() to create scaled gradients
            scaler.scale(loss).backward()

            # scaler.step() first unscales the gradients of the optimizer's assigned params.
            # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
            # otherwise, optimizer.step() is skipped.
            scaler.step(optimizer)
            
            # Updates the scale for next iteration.
            scaler.update()

            # Update progress
            train_loss += loss.item()
            train_loader_tqdm.set_postfix(loss=f'{train_loss / (batch_number + 1):.4f}')


def evaluate_model(model, criterion, loaders, loader, device):
    model.eval()
    eval_loss = 0
    with torch.no_grad():
        eval_loader_tqdm = tqdm(loaders[loader], unit = 'batch')
        
        for batch_number, target_cpu in enumerate(eval_loader_tqdm):
            target_gpu = target_cpu.to(device)
            output_gpu = model(target_gpu)
            
            eval_loss += criterion(output_gpu, target_gpu).item()
            eval_loader_tqdm.set_postfix(loss=f'{eval_loss / (batch_number + 1):.4f}') 
            
    print(f'Evaluated Loss: {eval_loss / len(loaders[loader]):.6f}')
    

In [10]:
# Build model
model = Autoencoder().to('cuda')

# With the size of an input we can get a model summary.
summary(model, input_size=(9, 4096))

# Check that the output shape and target shape match
training_example = torch.rand(9, 2 ** 12).to('cuda')
print('Target shape:', training_example.shape)
model.eval()
output = model(training_example)
print('Output shape:', output.shape)

## Optionally compile the model
# import torch_tensorrt
# model = torch.compile(model, mode = 'default')
# model = torch.compile(model,mode = 'max-autotune')
# model = torch.compile(model, backend="torch_tensorrt")
# model = torch.compile(model, backend="torch_tensorrt",mode = 'max-autotune')


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1            [-1, 128, 4096]          10,496
              ReLU-2            [-1, 128, 4096]               0
            Conv1d-3            [-1, 128, 4096]         147,584
              ReLU-4            [-1, 128, 4096]               0
         MaxPool1d-5            [-1, 128, 2048]               0
           Dropout-6            [-1, 128, 2048]               0
            Conv1d-7             [-1, 64, 2048]          73,792
              ReLU-8             [-1, 64, 2048]               0
            Conv1d-9             [-1, 64, 2048]          36,928
             ReLU-10             [-1, 64, 2048]               0
        MaxPool1d-11             [-1, 64, 1024]               0
          Dropout-12             [-1, 64, 1024]               0
           Conv1d-13             [-1, 32, 1024]          18,464
             ReLU-14             [-1, 3



In [11]:
# Pepare data loaders
batch_size = 16
num_workers = 16
loaders = prepare_data_loaders(train_df, test_df, batch_size, num_workers)

criterion = nn.MSELoss(reduction = 'mean')
optimizer = Adam(model.parameters(), lr=0.001)
num_epochs = 2

# This seems to sometimes help
gc.collect()
torch.cuda.empty_cache()

# Train the model
start_time = time.time()
train_model(model, criterion, optimizer, loaders, 'cuda', num_epochs)
print(f'Batch Size: {batch_size}, Training time: {time.time() - start_time:.2f}')

# Again, this sometimes seems to help
gc.collect()
torch.cuda.empty_cache()

# Evaluate the trained model
evaluate_model(model, criterion, loaders, 'test', 'cuda')

Epoch 1/2:  53%|█████▎    | 1059/2000 [00:06<00:05, 161.14batch/s, loss=0.1100]


KeyboardInterrupt: 