In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torchsummary import summary
from torch.cuda.amp import autocast, GradScaler

import numpy as np
import pandas as pd
import gzip
import pickle
import os
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import gc
import time
import random
import matplotlib.pyplot as plt


import sys
sys.path.append('..')
# from slp_package.slp_functions import create_merged_game_data_df
from slp_package.input_dataset import InputDataSet
import slp_package.pytorch_functions as slp_pytorch_functions

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using CUDA
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [None]:
path = "/workspace/melee_project_data/input_np/mango/FALCO/72a00ff5-b2a2-4afa-9b63-9523feaf9beb.npy.gz"

with gzip.open(path, 'rb') as f:
            segment = np.load(f)

for n in range(5,9):
    print(np.unique(segment[n,:]))
    
non_zero = segment[0] != 0
print(np.min(np.abs(segment[0,non_zero])))

In [None]:
path = "/workspace/melee_project_data/input_np/mango/FALCO/72a00ff5-b2a2-4afa-9b63-9523feaf9beb.npy.gz"

with gzip.open(path, 'rb') as f:
            segment = np.load(f)

for n in range(5,9):
    print(np.unique(segment[n,:]))
    print('ones:', np.sum(segment[n,:]), ', zeros:', segment.shape[1] - np.sum(segment[n,:]))

# print(segment)

In [None]:
path = "/workspace/melee_project_data/input_np/mango/FALCO/e55c959c-533d-4b0e-82b1-4e79decd25e2.npy.gz"

with gzip.open(path, 'rb') as f:
            segment = np.load(f)

for n in range(5,9):
    print(np.unique(segment[n,:]))
    print('ones:', np.sum(segment[n,:]), ', zeros:', segment.shape[1] - np.sum(segment[n,:]))




In [None]:
source_data = ['ranked','public','mango']

general_features = {
    'stage_name': ['FOUNTAIN_OF_DREAMS','FINAL_DESTINATION','BATTLEFIELD','YOSHIS_STORY','POKEMON_STADIUM','DREAMLAND'],
    'num_players': [2],
    'conclusive': [True],
}
player_features = {
    # 'netplay_code': ['MANG#0'],
    # 'character_name': ['FALCO'],
    # 'character_name': ['FOX', 'FALCO', 'MARTH', 'CAPTAIN_FALCON', 'SHEIK'],
    'character_name': ['FOX', 'CAPTAIN_FALCON', 'SHEIK', 'FALCO', 'GAME_AND_WATCH', 'MARTH', 'LINK', 'ICE_CLIMBERS', 'SAMUS', 'GANONDORF', 'BOWSER', 'MEWTWO', 'YOSHI', 'PIKACHU', 'JIGGLYPUFF', 'NESS', 'DR_MARIO', 'MARIO', 'PEACH', 'ROY', 'LUIGI', 'YOUNG_LINK', 'DONKEY_KONG', 'PICHU', 'KIRBY'],
    # 'character_name': ['FOX', 'CAPTAIN_FALCON', 'SHEIK', 'FALCO', 'GAME_AND_WATCH', 'MARTH', 'LINK', 'ICE_CLIMBERS', 'SAMUS', 'GANONDORF', 'BOWSER', 'MEWTWO', 'YOSHI', 'PIKACHU', 'JIGGLYPUFF', 'NESS', 'DR_MARIO', 'PEACH', 'LUIGI', 'DONKEY_KONG'],
    'type_name': ['HUMAN']
    
}
opposing_player_features = {
    # 'character_name': ['MARTH'],
    # 'netplay_code': ['KOD#0', 'ZAIN#0']
    'type_name': ['HUMAN']
}
label_info = {
    'source': ['player'], # Can be 'general', 'player
    # 'feature': ['netplay_code']
    'feature': ['character_name']
}

In [None]:
dataset = InputDataSet(source_data, general_features, player_features, opposing_player_features, label_info)

df = dataset.dataset

df.head()

In [None]:
sample_df = df.sample(n=10000, random_state=42)
print(sample_df.shape)
print(sample_df.describe())
sample_df.head()

In [None]:
data_key = { 
    'JSTICK_X':[],
    'JSTICK_Y':[], 
    'CSTICK_X':[], 
    'CSTICK_Y':[],  
    'TRIGGER_LOGICAL':[],  
    'Z':[],  
    'A':[],  
    'B':[], 
    'X_or_Y':[]
}

def load_and_process(path, length, row_sums_dict):
    
    with gzip.open(path, 'rb') as f:
            inputs = np.load(f)
    
    
    # Want a new dictionary with each the sum of each row of inputs being added the values of data_key
    row_sums_np = np.sum(np.abs(inputs), axis=1) / length
    # Map each sum to the corresponding key in data_key
    keys = row_sums_dict.keys()
    for index, key in enumerate(keys):
        row_sums_dict[key].append(row_sums_np[index])
    
    row_sums_dict.update(row_sums_dict)
    return 

row_sums_dict = data_key.copy()
for i in range(sample_df.shape[0]):
# for i in range(3):
    load_and_process('/workspace/melee_project_data/input_np/' + sample_df['player_inputs_np_sub_path'].iloc[i].replace('\\','/'),sample_df['length'].iloc[i],row_sums_dict)

row_sums_df = pd.DataFrame(row_sums_dict)
# print(row_sums_df.head())
row_sums_df.describe()
    
    

In [None]:
import gzip
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Define input keys
input_keys = ['JSTICK_X', 'JSTICK_Y', 'CSTICK_X', 'CSTICK_Y', 'TRIGGER_LOGICAL', 'Z', 'A', 'B', 'X_or_Y']
binary_keys = ['TRIGGER_LOGICAL', 'Z', 'A', 'B', 'X_or_Y']

# Initialize global histograms with appropriate bin edges
bin_edges = {
    key: np.linspace(-1, 1, 21) if key not in binary_keys else np.array([0, 1, 2])
    for key in input_keys
}
global_histograms = {key: np.zeros(len(bin_edges[key]) - 1) for key in input_keys}

def load_and_process(path, length, histograms):
    with gzip.open(path, 'rb') as f:
        inputs = np.load(f)

    # Ensure inputs have the correct number of columns
    if inputs.shape[0] != len(input_keys):
        raise ValueError(f"Expected number of columns: {len(input_keys)}, but got {inputs.shape[0]}")

    # Compute histograms for each column
    for index, key in enumerate(input_keys):
        row_histogram, _ = np.histogram(inputs[index,:], bins=bin_edges[key])
        histograms[key] += row_histogram / length

# Example DataFrame loading and processing loop
# sample_df = pd.read_csv('path_to_your_dataframe.csv')
for i in range(sample_df.shape[0]):
    path = '/workspace/melee_project_data/input_np/' + sample_df['player_inputs_np_sub_path'].iloc[i].replace('\\', '/')
    load_and_process(path, sample_df['length'].iloc[i], global_histograms)

# Display histograms
for key, counts in global_histograms.items():
    plt.figure(figsize=(10,5))
    plt.title(f'Histogram for {key}')
    plt.bar(range(len(counts)), counts / sample_df.shape[0], tick_label=bin_edges[key][:-1])
    plt.xlabel('Bins')
    plt.ylabel('Counts')
    plt.show()
