# Revised Autoencoder

## Import


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torchsummary import summary
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau

import numpy as np
import gzip
import pickle
import os
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import gc
import time
import random
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from collections import deque
import pandas as pd
import math


import sys
sys.path.append('..')
from slp_package.input_dataset import InputDataSet
import slp_package.pytorch_functions as slp_pytorch_functions

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using CUDA
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)
torch.cuda.is_available()

True

In [2]:
source_data = ['ranked','public','mango']

general_features = {
    'stage_name': ['FOUNTAIN_OF_DREAMS','FINAL_DESTINATION','BATTLEFIELD','YOSHIS_STORY','POKEMON_STADIUM','DREAMLAND'],
    'num_players': [2],
    'conclusive': [True],
}
player_features = {
    'character_name': ['FOX', 'CAPTAIN_FALCON', 'SHEIK', 'FALCO', 'GAME_AND_WATCH', 'MARTH', 'LINK', 'ICE_CLIMBERS', 'SAMUS', 'GANONDORF', 'BOWSER', 'MEWTWO', 'YOSHI', 'PIKACHU', 'JIGGLYPUFF', 'NESS', 'DR_MARIO', 'MARIO', 'PEACH', 'ROY', 'LUIGI', 'YOUNG_LINK', 'DONKEY_KONG', 'PICHU', 'KIRBY'],
    'type_name': ['HUMAN']
}
opposing_player_features = {
    # 'character_name': ['MARTH'],
    # 'netplay_code': ['KOD#0', 'ZAIN#0']
    'type_name': ['HUMAN']
}

# We will not be training with a label.
label_info = {
    'source': ['player'], # Can be 'general', 'player
    'feature': ['character_name']
}

In [3]:
dataset = InputDataSet(source_data, general_features, player_features, opposing_player_features, label_info)
dataset.dataset.head()

  processed_df = pd.concat([player_1_df, player_2_df], ignore_index=True)


Unnamed: 0,stage_name,num_players,conclusive,player_character_name,player_type_name,opposing_player_type_name,player_inputs_np_sub_path,length,labels
0,FINAL_DESTINATION,2,True,FALCO,HUMAN,HUMAN,mango\FALCO\727e819f-8cb3-4c3f-bf0a-ceefa9e41c...,5606,FALCO
1,FINAL_DESTINATION,2,True,FALCO,HUMAN,HUMAN,mango\FALCO\76fe3db5-60de-46bb-8f0d-80d48822a8...,5754,FALCO
2,POKEMON_STADIUM,2,True,MARTH,HUMAN,HUMAN,mango\MARTH\7e6b417f-249d-4629-b6dc-2fe1d95d8f...,6213,MARTH
3,FOUNTAIN_OF_DREAMS,2,True,FOX,HUMAN,HUMAN,mango\FOX\32305eaf-71d8-46e5-a8a1-2c7c890a9baf...,7621,FOX
4,FINAL_DESTINATION,2,True,FALCO,HUMAN,HUMAN,mango\FALCO\a5396c32-6f2c-4b88-8582-f8b875bb55...,7840,FALCO


In [4]:
segment_length = 3600
shift = 1800

train_df, test_df = dataset.all_segments_train_test_split_dataframes(segment_length,shift=shift, proportion_of_segments=1, test_ratio = .1, val = False)
porportion = 1
train_df = train_df.sample(frac=porportion, random_state = 42)
porportion = .5
test_df = test_df.sample(frac=porportion, random_state = 42)
print(train_df.shape)
print(test_df.shape)
train_df.head()


(1472456, 8)
(81775, 8)


Unnamed: 0,player_inputs_np_sub_path,length,num_segments,labels,encoded_labels,segment_index,segment_start_index,segment_length
328970,mango\MARTH\bdcc275f-6c5b-48c1-8a32-d8641335e7...,9817,4,MARTH,14,3,5400,3600
700726,public\FOX\08a81341-be91-4149-a92a-833097df689...,8214,3,FOX,5,2,3600,3600
639275,public\FOX\bec5d2b8-a142-4b88-ab76-0db69e671b8...,8009,3,FOX,5,2,3600,3600
720107,ranked\FOX\cb41809a-778a-4cb0-82dc-f43f28e39f1...,11254,5,FOX,5,1,1800,3600
784662,mango\FOX\37a24d1b-1df5-428d-a938-e6dc0c1fba35...,11049,5,FOX,5,0,0,3600


In [5]:
class TrainingDataset(Dataset):
    """
    Custom dataset for loading and optionally transforming game segments from compressed NumPy files.
    
    Parameters
    ----------
    df : pd.DataFrame
        Must include the following columns:
          - 'player_inputs_np_sub_path': file paths to the compressed NumPy files
          - 'encoded_labels': integer-encoded labels
          - 'segment_start_index': start index for each segment
          - 'segment_length': length of each segment in frames
    transform : bool, default=False
        If True, applies a specific transformation to each loaded segment (e.g., scaling analog inputs).
    """
    def __init__(self, df, transform=False):
        self.file_paths = df['player_inputs_np_sub_path'].to_numpy()
        self.encoded_labels = df['encoded_labels'].to_numpy()
        self.segment_start_index = df['segment_start_index'].to_numpy()
        self.segment_length = df['segment_length'].to_numpy()
        self.transform = transform

        # Optional: you can store a shape attribute to document the shape 
        # of data that __getitem__ will return. 
        # We'll initialize it to None and fill it when the first item is fetched.
        self.sample_shape = None

    def __len__(self):
        """
        Returns the total number of samples in the dataset.
        """
        return len(self.file_paths)

    def __getitem__(self, idx):
        """
        Retrieves the sample (and possibly label) from the dataset at index 'idx'.

        In this custom dataset:
          1. We open the compressed file corresponding to self.file_paths[idx].
          2. We slice out the segment using self.segment_start_index[idx] and
             self.segment_length[idx].
          3. If transform=True, we apply additional transformations (shifting, scaling, etc.).
          4. We return a PyTorch tensor containing the processed segment.

        Parameters
        ----------
        idx : int
            Index of the sample to be fetched.

        Returns
        -------
        torch.Tensor
            A tensor representing the selected segment, after optional transformations.
        """
        # Load the uncompressed file
        file_path = self.file_paths[idx].replace('\\', '/')
        with gzip.open('/workspace/melee_project_data/input_np/' + file_path, 'rb') as f:
            segment = np.load(f)

        # Determine slice boundaries
        start = int(self.segment_start_index[idx])
        end = start + int(self.segment_length[idx])

        # Extract the segment
        segment = segment[:, start:end]

        # Apply transformations if requested
        if self.transform:
            # Example transformation: shape = (9+4, 3600) for some reason
            transformed = np.zeros((9 + 4, int(self.segment_length[idx])))

            # 1) Shift and scale analog inputs to [0, 1]
            analog_transformed = np.copy(segment[0:4])
            analog_transformed[analog_transformed > 0] -= 0.2875 + 0.0125
            analog_transformed[analog_transformed < 0] += 0.2875 - 0.0125
            analog_transformed *= 0.5 / 0.725
            analog_transformed += 0.5
            transformed[0:4] = analog_transformed

            # 2) Mark positions where analog inputs are zero
            transformed[4:8] += (segment[:4] == 0)

            # # Possible additional transformations:
            # # 3) Some custom “transition” measure on last 5 rows
            # prepend = np.expand_dims(segment[-5:, 0], axis=1)
            # transitions = np.abs(np.diff(segment[-5:], axis=1, prepend=prepend))
            # transformed[8:13] += transitions

            # 4) Add button inputs
            transformed[-5:] += segment[-5:]

        else:
            # If not transforming, produce something simpler (9 x 60)
            transformed = np.zeros((9, int(self.segment_length[idx])))

            # 1) Shift and scale analog inputs to [0, 1]
            analog_transformed = np.copy(segment[0:4])
            analog_transformed[analog_transformed > 0] -= 0.2875 + 0.0125
            analog_transformed[analog_transformed < 0] += 0.2875 - 0.0125
            analog_transformed *= 0.5 / 0.725
            analog_transformed += 0.5
            transformed[0:4] = analog_transformed

            # 2) Transform the Trigger to 0/1
            transformed[-5] += (segment[-5] > 0.5)

            # 3) The last 4 rows become button inputs
            transformed[-4:] += segment[-4:]

        # Convert to PyTorch tensor
        segment_tensor = torch.from_numpy(transformed).float()

        # Optionally store the shape of the output the first time __getitem__ is called
        if self.sample_shape is None:
            self.sample_shape = segment_tensor.shape

        return segment_tensor


def prepare_data_loaders(train_df, test_df, batch_size, num_workers,  transform = True):
    """
    Creates DataLoader objects for training and testing sets.

    Parameters
    ----------
    train_df : pd.DataFrame
    test_df : pd.DataFrame
    batch_size : int
    num_workers : int

    Returns
    -------
    dict of DataLoader
        'train' -> training DataLoader
        'test' -> testing DataLoader
    """
    train_dataset = TrainingDataset(train_df, transform=transform)
    test_dataset = TrainingDataset(test_df, transform=transform)

    loaders = {
        'train': DataLoader(
            train_dataset, 
            batch_size=batch_size, 
            shuffle=True, 
            num_workers=num_workers, 
            pin_memory=True,
            persistent_workers=True
        ),
        'test': DataLoader(
            test_dataset, 
            batch_size=batch_size, 
            shuffle=False, 
            num_workers=num_workers, 
            pin_memory=True,
            persistent_workers=True
        )
    }
    return loaders


In [6]:
def train_model_with_virtual_epochs(model, criterion, optimizer, loaders, device, channels, segment_length, num_epochs=1, bce_scale=100):
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=15, factor=0.1)
    best_loss = float('inf')
    best_model = None
    
    vepoch_total = 0
    vepoch_loss_sum = 0
    best_vepoch_loss = float('inf')
    early_stopping_patience = 0

    for epoch in range(num_epochs):
        model.train()
        train_loader_tqdm = tqdm(loaders['train'], desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')
        virtual_epoch_start_time = time.time()

        # Initialize variables for tracking gradient and parameter stats
        grad_max = float('-inf')
        grad_min = float('inf')
        param_max = float('-inf')
        param_min = float('inf')

        for batch_number, target_cpu in enumerate(train_loader_tqdm):
            target_gpu = target_cpu.to(device)
            optimizer.zero_grad()
            output_gpu = model(target_gpu)
            loss = criterion(output_gpu, target_gpu) / (channels * segment_length * target_cpu.size(0))
            
            loss.backward()

            # Track max and min of gradients
            batch_grad_max = max((p.grad.max().item() for p in model.parameters() if p.grad is not None), default=grad_max)
            batch_grad_min = min((p.grad.min().item() for p in model.parameters() if p.grad is not None), default=grad_min)
            grad_max = max(grad_max, batch_grad_max)
            grad_min = min(grad_min, batch_grad_min)

            optimizer.step()

            vepoch_total += target_cpu.size(0)
            vepoch_loss_sum += loss.item() * target_cpu.size(0)

            if time.time() - virtual_epoch_start_time > 60:
                vepoch_loss = vepoch_loss_sum / vepoch_total
                if best_vepoch_loss > vepoch_loss:
                    best_vepoch_loss = vepoch_loss
                else:
                    early_stopping_patience += 1

                # Calculate max and min of model parameters at the end of the virtual epoch
                param_max = max(p.data.max().item() for p in model.parameters())
                param_min = min(p.data.min().item() for p in model.parameters())

                train_loader_tqdm.set_postfix(
                    Best=f'{best_vepoch_loss * bce_scale:.10f}',
                    Vepoch=f'{vepoch_loss * bce_scale:.10f}',
                    patience=early_stopping_patience,
                    Grad_Max=grad_max,
                    Grad_Min=grad_min,
                    Param_Max=param_max,
                    Param_Min=param_min
                )
                # print('Grad Max:', grad_max, ' Grad Min:', grad_min)
                virtual_epoch_start_time = time.time()
                vepoch_total = 0
                vepoch_loss_sum = 0
                grad_max = float('-inf')  # Reset for next virtual epoch
                grad_min = float('inf')   # Reset for next virtual epoch

    return best_model

def predict(model, loaders, loader, device):
    model.eval()
    predictions = []
    targets = []
    
    with torch.no_grad():
        eval_loader_tqdm = tqdm(loaders[loader], unit='batch')
        
        for _, target_cpu in enumerate(eval_loader_tqdm):
            target_gpu = target_cpu.to(device)
            output_gpu = model(target_gpu)
            # output_gpu = torch.sigmoid(output_gpu)
            
            predictions.append(torch.sigmoid(output_gpu).cpu().numpy())
            targets.append(target_cpu.numpy())
    
    predictions = np.concatenate(predictions, axis=0)
    targets = np.concatenate(targets, axis=0)
    
    

    return predictions, targets

import torch
import torch.nn as nn

class CustomLoss(nn.Module):
    def __init__(self, bce_scale=100, transform=False, weighted=False, channels=13, segment_length=3600):
        super(CustomLoss, self).__init__()
        
        # Fraction of times each button is pressed in your sample
        buttons_sample_mean = [
            0.16908772957310006,  # TRIGGER_LOGICAL
            0.008974353071937505, # Z
            0.060945588829374495, # A
            0.04591526858731047,  # B
            0.09663690337362206   # X_or_Y
        ]
        # If transform == True, you also have additional ones for jstick/cstick?
        trigger_logical_sample_mean = [
            0.45849791926398437,  # JSTICK_X_LOGICAL
            0.6879025510132348,   # JSTICK_Y_LOGICAL
            0.9726537459234259,   # CSTICK_X_LOGICAL
            0.971675825912117     # CSTICK_Y_LOGICAL
        ]

        # Create pos_weight or bce_weights depending on your logic
        if transform:
            # Merge your two sets if needed
            sample_means = trigger_logical_sample_mean + buttons_sample_mean
        else:
            sample_means = buttons_sample_mean

        # pos_weight for each dimension: (1 - p) / p
        pos_weight_vals = np.zeros((channels-4, segment_length))
        for i, mean in enumerate(sample_means):
            p_pos = mean
            p_neg = 1.0 - mean
            
            pos_weight_vals[i,:] += p_neg / p_pos
        pos_weight_tensor = torch.tensor(pos_weight_vals, dtype=torch.float, device='cuda')

        if weighted:
            # Use pos_weight instead of 'weight'
            self.BCE = nn.BCEWithLogitsLoss(reduction='sum', pos_weight=pos_weight_tensor)
        else:
            self.BCE = nn.BCEWithLogitsLoss(reduction='sum')

        # Save the other components
        self.bce_scale = bce_scale
        self.MSE = nn.MSELoss(reduction='sum')

    def forward(self, pred, target):
        """
        pred, target shape: (B, Channels, T)
        We'll assume:
          - pred[:, 0:4, :] are analog predictions (MSE)
          - pred[:, 4:, :] are button predictions (BCE)
        """
        # 1) MSE for first 4 analog channels
        mse_loss = self.MSE(torch.sigmoid(pred[:, 0:4, :]), target[:, 0:4, :]) 
        # 2) BCE for the rest
        bce_loss = self.BCE(pred[:, 4:, :], target[:, 4:, :])

        # Scale & return combined
        return mse_loss + bce_loss / self.bce_scale


In [7]:
# trasform = True adds binary features corresponding to when the analog inputs are 0.
transform = True
# bce_scale is a tunable parameter that scales the binary cross-entropy loss.
bce_scale = 100
# weighted = True weights the loss function to account for the imbalance of the button being pressed.
weighted = True

loaders = prepare_data_loaders(train_df, test_df, batch_size=16, num_workers=20,  transform=transform)
# Grab one item (segment tensor) from the train dataset
train_dataset = loaders['train'].dataset
first_item = train_dataset[0]
channels = first_item.size(0)
print(channels)

from Convolutional_Autoencoder_Model import ResNet_Autoencoder
# Initialize the model
model = ResNet_Autoencoder(channels)
model = model.cuda()

# With the size of an input we can get a model summary.
summary(model, input_size=(channels, segment_length))

13
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 64, 3600]             896
       BatchNorm1d-2             [-1, 64, 3600]             128
              ReLU-3             [-1, 64, 3600]               0
            Conv1d-4             [-1, 64, 3600]          12,352
       BatchNorm1d-5             [-1, 64, 3600]             128
              ReLU-6             [-1, 64, 3600]               0
            Conv1d-7            [-1, 256, 3600]          16,640
       BatchNorm1d-8            [-1, 256, 3600]             512
            Conv1d-9            [-1, 256, 3600]           3,584
      BatchNorm1d-10            [-1, 256, 3600]             512
             ReLU-11            [-1, 256, 3600]               0
Encoder_Bottleneck-12            [-1, 256, 3600]               0
           Conv1d-13             [-1, 64, 3600]          16,448
      BatchNorm1d-14             [-

Below are two tables to help you interpret **Binary Cross-Entropy (BCE)** loss in terms of approximate accuracy and what constitutes a "good" score for BCE in different contexts.

---

## **Table 1: Approximate BCE Loss to Accuracy Conversion**
The relationship between BCE and accuracy depends on how well the model differentiates between positive and negative classes. While these values can vary based on data distribution, below is a general mapping:

| **BCE Loss** | **Approximate Accuracy (%)** | **Interpretation**                  |
|--------------|-------------------------------|-------------------------------------|
| 0.69         | 50%                          | Random guessing (e.g., balanced binary classes) |
| 0.50         | 70%                          | Slightly better than random         |
| 0.30         | 85%                          | Good prediction capability          |
| 0.15         | 93%                          | Excellent prediction capability     |
| 0.05         | 98%                          | Almost perfect                      |
| 0.01         | ~99.9%                       | Near flawless prediction            |

### Notes:
1. **Loss and accuracy are not perfectly linear**: BCE measures how confident the model is in its predictions, while accuracy simply measures the fraction of correct predictions. Small BCE loss does not always imply perfect accuracy, especially for imbalanced datasets.
2. **Threshold Assumption**: The accuracy assumes a threshold of 0.5 to classify outputs as positive/negative.

---

## **Table 2: What Is a "Good" BCE Score?**
What qualifies as a "good" BCE loss depends heavily on the context of the task, the dataset, and whether your data is balanced or imbalanced. Here's a guide:

| **BCE Loss Range** | **Context/Interpretation**              | **Example**                    |
|---------------------|-----------------------------------------|---------------------------------|
| **0.69+**           | Random guessing; model not learning.   | Imbalanced dataset without weighting or a naive model. |
| **0.5 - 0.69**      | Slightly better than random.           | Early training or poorly tuned model. |
| **0.3 - 0.5**       | Decent; learning useful patterns.      | Baseline for balanced binary classification tasks. |
| **0.1 - 0.3**       | Good; strong predictive performance.   | Typical for well-trained models on balanced datasets. |
| **< 0.1**           | Excellent; near-perfect classification.| Highly confident predictions on well-modeled data. |
| **< 0.01**          | Overfitting or trivial task.           | Could indicate model memorizing training data. |

### Notes:
1. **Balanced vs. Imbalanced Datasets**:
   - On **balanced datasets**, a BCE of ~0.3 or below is generally good.
   - On **imbalanced datasets**, a low BCE may indicate the model is simply predicting the majority class. In such cases, consider weighted BCE loss or metrics like precision, recall, and F1-score.
2. **Dataset Difficulty**: For harder datasets (e.g., noisy data or highly overlapping classes), a BCE of ~0.4 could still represent excellent performance.

---

## Example Use Case Interpretation
- If your BCE loss stabilizes around **0.2**, you can expect accuracy to be around **90-92%** on a balanced dataset.
- If you see **0.01** BCE loss, double-check for overfitting, especially if the test BCE is much higher than the training BCE.

If you provide specifics about your task (e.g., balanced or imbalanced classes, the dataset type), I can refine the interpretation further!

In [8]:
criterion = CustomLoss(bce_scale=bce_scale, transform=transform, weighted=weighted, channels=channels, segment_length=segment_length)

optimizer = Adam(model.parameters(), lr=0.0001)
num_epochs = 1

# This seems to sometimes help
gc.collect()
torch.cuda.empty_cache()
# Train the model

# start_time = time.time()
# train_model(model, criterion, optimizer, loaders, 'cuda', num_epochs)
# train_model_with_virtual_epochs(model, criterion, optimizer, loaders, 'cuda', num_epochs)
train_model_with_virtual_epochs(model, criterion, optimizer, loaders, 'cuda', channels, segment_length, num_epochs, bce_scale=bce_scale)

Epoch 1/1: 100%|██████████| 92029/92029 [3:26:28<00:00,  7.43batch/s, Best=0.4354037318, Grad_Max=0.00498, Grad_Min=-0.00482, Param_Max=1.59, Param_Min=-1.35, Vepoch=0.4410146753, patience=103]   


In [9]:
torch.save(model.state_dict(), '/workspace/melee_project_data/autoencoder_models/autoencoder_revised_one_epoch.pt')

In [10]:
gc.collect()
torch.cuda.empty_cache()

pred, target = predict(model, loaders, 'test','cuda')

100%|██████████| 5111/5111 [04:15<00:00, 20.02batch/s]


In [11]:
integer_stick_targets = np.round(target[:,0:4] / 0.008620689655172415 ).astype(np.int32)
integer_stick_pred = np.round(pred[:,0:4] / 0.008620689655172415).astype(np.int32)



n = 10

buttons = ['JSTICK_X', 'JSTICK_Y', 'CSTICK_X', 'CSTICK_Y']
# buttons = ['X_or_Y']


stick_accuracy_df = pd.DataFrame(np.arange(n,dtype=np.int16),columns=['How Close'])
# print(summary_df)


for j in range(4):
    unique, counts = np.unique(integer_stick_pred[:,j] - integer_stick_targets[:,j], return_counts=True)
    data = []
    num = np.sum(counts)
    for i in range(n):
        mask = np.abs(unique) <= i
        data += [np.sum(counts[mask]) / num * 100]
    stick_accuracy_df[buttons[j]] = data
        
stick_accuracy_df



Unnamed: 0,How Close,JSTICK_X,JSTICK_Y,CSTICK_X,CSTICK_Y
0,0,23.334191,30.823776,68.872342,70.788014
1,1,45.892614,56.804105,92.195836,92.295496
2,2,55.297101,64.883439,95.826098,95.307862
3,3,61.410927,69.7576,96.897887,96.20517
4,4,65.92601,73.318168,97.198969,96.621605
5,5,69.485562,76.147949,97.315974,96.87592
6,6,72.433136,78.506577,97.382867,97.055283
7,7,74.946807,80.525916,97.431227,97.193124
8,8,77.138498,82.29107,97.469996,97.305327
9,9,79.075976,83.857887,97.502939,97.400375


In [12]:
target_stick_is_zero = (integer_stick_targets == 0)*1
pred_stick_is_zero = (integer_stick_pred == 0)*1

zero_accuracy = []
# find the accuracy of the model when the stick is zero
for j in range(4):
    diff = np.abs(target_stick_is_zero[:,j] - pred_stick_is_zero[:,j])
    data = []
    num_correct = np.sum(diff == 0)
    zero_accuracy.append(num_correct / np.prod(diff.shape) * 100)
print(zero_accuracy)
# zero_accuracy_df = pd.DataFrame(columns=buttons, data=[zero_accuracy])


[95.19907571588709, 97.4271425659839, 99.72547980569992, 99.66317877645301]


In [13]:
# Example data setup (make sure you have your actual data in these variables)
# target = np.random.randint(0, 2, (100, 9))  # Example target array
# pred = np.random.random((100, 9))  # Example predictions array
buttons = [ 'TRIGGER_LOGICAL', 'Z', 'A', 'B', 'X_or_Y']

# Initializing the DataFrame
button_accuracy_df = pd.DataFrame(columns=['Button', 'Accuracy', 'Acc of 0', 'Acc of 1'])

target_buttons = target[:, 4 + 4 * transform:]
pred_buttons = pred[:, 4 + 4 * transform:] > 0.5
total = np.prod(target_buttons.shape)

# Computing accuracies and filling the DataFrame
rows = []  # List to hold row data

for i, button in enumerate(buttons):
    correct_predictions = np.sum(target_buttons[:, i] == pred_buttons[:, i])
    correct_zeros = np.sum((target_buttons[:, i] == 0) & (pred_buttons[:, i] == 0))
    correct_ones = np.sum((target_buttons[:, i] == 1) & (pred_buttons[:, i] == 1))

    accuracy = correct_predictions / total * 100
    acc_of_0 = correct_zeros / np.sum(target_buttons[:, i] == 0) * 100 if np.sum(target_buttons[:, i] == 0) > 0 else 0
    acc_of_1 = correct_ones / np.sum(target_buttons[:, i] == 1) * 100 if np.sum(target_buttons[:, i] == 1) > 0 else 0

    rows.append({
        'Button': button,
        'Accuracy': accuracy,
        'Acc of 0': acc_of_0,
        'Acc of 1': acc_of_1
    })

# Use concat to add all new rows to the DataFrame at once
button_accuracy_df = pd.concat([button_accuracy_df, pd.DataFrame(rows)], ignore_index=True)

# Output the DataFrame
button_accuracy_df

Unnamed: 0,Button,Accuracy,Acc of 0,Acc of 1
0,TRIGGER_LOGICAL,17.151562,87.060622,93.896117
1,Z,19.795249,98.96703,99.990756
2,A,18.49176,92.075718,98.283654
3,B,19.153975,95.610165,99.176081
4,X_or_Y,16.800952,83.082631,92.378738
