<h2> Outline</h2>
In this notebook we extract the input data from Fox vs Sheik games. We split each game into n second segments to get more training examples.

In [None]:
import os as os
import sys
import numpy as np
import pandas as pd
import tqdm
import slippi as slp
import gzip
from joblib import Parallel, delayed
from multiprocessing import Manager
from numba import njit, prange
# import cupy as cp
import gzip

<h2> Initialize some variables</h2>
We can change the number of seconds per segment.

In [None]:

# seconds_per_segment = 16
frames_per_segment = 1024
# frames_per_segment = seconds_per_segment * 60
# frames_per_segment = 900
game_data_columns = ['slp_file', 'game_segment', 'character', 'input_data']
dataset_path = '../../Slippi_Public_Dataset_v3/'
slp_files = [file for file in os.listdir(dataset_path) if file.endswith('.slp') and 
             ('Fox' in file or
              'Falco' in file or
              'Marth' in file or
              'Sheik' in file or
              'Jigglypuff' in file
              )]
# data_type_inputs = np.half
data_type_inputs = np.single
data_type_character_encode = np.bool_

num_data_points_per_character = 19762
num_data_points_per_character = 1000

<h2> Preliminary Functions </h2>
We use these functions to one-hot encode the button bitmask and get the frame input data for a given port number and frames object.

In [None]:
# Takes encoded buttons as bitmask and returns binary array of buttons pressed
def one_hot_encode_buttons(bitmask):
    labels = ['DPAD_LEFT', 'DPAD_RIGHT', 'DPAD_DOWN', 'DPAD_UP', 'Z', 'R', 'L', 'A', 'B', 'X', 'Y', 'START']
    encoded_values = [1, 2, 4, 8, 16, 32, 64, 256, 512, 1024, 2048, 4096]

    # Create a dictionary mapping labels to their encoded values
    label_to_value = dict(zip(labels, encoded_values))

    # Initialize a list to store the one-hot encoded values
    one_hot_encoded = [0] * len(labels)

    # Iterate through labels and set the corresponding one-hot encoded value
    for label, value in label_to_value.items():
        if bitmask & value:
            one_hot_encoded[labels.index(label)] = 1

    return one_hot_encoded

def one_hot_encode_characters(character):
    if character == 'FOX':
        return np.array([1, 0, 0, 0, 0],dtype=data_type_character_encode)
    elif character == 'FALCO':
        return np.array([0, 1, 0, 0, 0],dtype=data_type_character_encode)
    elif character == 'MARTH':
        return np.array([0, 0, 1, 0, 0],dtype=data_type_character_encode)
    elif character == 'SHEIK':
        return np.array([0, 0, 0, 1, 0],dtype=data_type_character_encode)
    elif character == 'JIGGLYPUFF':
        return np.array([0, 0, 0, 0, 1],dtype=data_type_character_encode)
    else:
        return None

# Create a numpy list that is the correct size and fill it with a loop
def get_frame_data(frames, port):
    inputs = np.empty((frames_per_segment, 18),dtype=data_type_inputs)  # Initialize an empty Numpy array
    for i, frame in enumerate(frames):  
        buttons = one_hot_encode_buttons(frame.ports[port].leader.pre.buttons.physical.value)
        j_x = frame.ports[port].leader.pre.joystick.x
        j_y = frame.ports[port].leader.pre.joystick.y
        c_x = frame.ports[port].leader.pre.cstick.x
        c_y = frame.ports[port].leader.pre.cstick.y
        t_l = frame.ports[port].leader.pre.triggers.physical.l
        t_r = frame.ports[port].leader.pre.triggers.physical.r

        frame_data = buttons + [j_x, j_y, c_x, c_y, t_l, t_r]
        inputs[i] = frame_data

    return inputs

def process_input_data(frame_data):
    # button_labels = ['DPAD_LEFT', 'DPAD_RIGHT', 'DPAD_DOWN', 'DPAD_UP', 'Z', 'R', 'L', 'A', 'B', 'X', 'Y', 'START','J_X','J_Y','C_X','C_Y','T_L','T_R']

    # Reshape data
    frame_data = frame_data.reshape(18, frames_per_segment)
    # Combine X and Y button
    frame_data[0] = np.maximum(frame_data[9],frame_data[10]) 
    # Combine Triggers
    frame_data[1] = np.maximum(frame_data[16], frame_data[17])
    # Delete inputs we wont use
    frame_data = np.delete(frame_data,[2,3,5,6,9,10,11,16,17],axis = 0)
    # Append data to list
    
    return frame_data
    


<h2>Process SLP function</h2>
The function that will be called for each SLP file we are interested in.

In [None]:
# Each iteration of this creates a row of the dataframe and appends it to the dataframe.
def process_slp_file(slp_file, dataset_path, fox_game_data_list, falco_game_data_list, marth_game_data_list, sheik_game_data_list, jigglypuff_game_data_list):
    try:
        file_path = os.path.join(dataset_path, slp_file)
        
        # Skip the game if the played characters already have enough data points
        should_skip = True
        if 'Fox' in slp_file and len(fox_game_data_list) <= num_data_points_per_character:
            should_skip = False
        elif 'Falco' in slp_file and len(falco_game_data_list) <= num_data_points_per_character:
            should_skip = False
        elif 'Marth' in slp_file and len(marth_game_data_list) <= num_data_points_per_character:
            should_skip = False
        elif 'Sheik' in slp_file and len(sheik_game_data_list) <= num_data_points_per_character:
            should_skip = False
        elif 'Jigglypuff' in slp_file and len(jigglypuff_game_data_list) <= num_data_points_per_character:
            should_skip = False
        if should_skip:
            return
        
        
        game = slp.Game(file_path)
        frames = game.frames
         
        # Check game is long enough
        game_length = game.metadata.duration
        if game_length < 123 + frames_per_segment:          # TODO: Create a package and define constants like 123 with meaningful names
            return
        
        # Find the ports the players are using
        occupied_ports = [i for i, port in enumerate(game.start.players) if port is not None]
        # Ignore games that aren't singles
        if len(occupied_ports) > 2:  
            return
        # Ignore games with CPUs
        if game.start.players[occupied_ports[0]].type.value or game.start.players[occupied_ports[1]].type.value:
            return
        
        num_game_segments = game_length // frames_per_segment   # To get more training data we take all 15s segments of the game
        for i in range(num_game_segments):                      # Iterate over each of the 15s segments of the game
            # Get any frame data for the ith game segment
            for j in occupied_ports:
                character = game.start.players[j].character.name
                
                if character == 'FOX' and len(fox_game_data_list) <= num_data_points_per_character:
                    frame_data = get_frame_data(frames[123 + i * frames_per_segment: 123 + (i + 1) * frames_per_segment], j)
                    fox_game_data_list.append(process_input_data(frame_data))
                    
                elif character == 'FALCO' and len(falco_game_data_list) <= num_data_points_per_character:
                    frame_data = get_frame_data(frames[123 + i * frames_per_segment: 123 + (i + 1) * frames_per_segment], j)
                    falco_game_data_list.append(process_input_data(frame_data))
                    
                elif character == 'MARTH' and len(marth_game_data_list) <= num_data_points_per_character:
                    frame_data = get_frame_data(frames[123 + i * frames_per_segment: 123 + (i + 1) * frames_per_segment], j)
                    marth_game_data_list.append(process_input_data(frame_data))
                    
                elif character == 'SHEIK' and len(sheik_game_data_list) <= num_data_points_per_character:
                    frame_data = get_frame_data(frames[123 + i * frames_per_segment: 123 + (i + 1) * frames_per_segment], j)
                    sheik_game_data_list.append(process_input_data(frame_data))
                    
                elif character == 'JIGGLYPUFF' and len(jigglypuff_game_data_list) <= num_data_points_per_character:
                    frame_data = get_frame_data(frames[123 + i * frames_per_segment: 123 + (i + 1) * frames_per_segment], j)
                    jigglypuff_game_data_list.append(process_input_data(frame_data))
                # else:
                #     return
            
    except Exception as e:
        print(f"Error processing {slp_file}: {str(e)}")

<h2> Multiprocess data extraction </h2>
We use joblib to speed the extraction of data.

In [None]:
num_files =  -1 # How many files we want to extract data from

# Create shared lists to store results
manager = Manager()
fox_game_data_list = manager.list()
falco_game_data_list = manager.list()
marth_game_data_list = manager.list()
sheik_game_data_list = manager.list()
jigglypuff_game_data_list = manager.list()

# fox_game_data_list = manager.list()
# falco_game_data_list = manager.list()
# marth_game_data_list = manager.list()
# sheik_game_data_list = manager.list()
# jigglypuff_game_data_list = manager.list()



# Use joblib to parallelize processing of SLP files
Parallel(n_jobs=-1, verbose=1)(delayed(process_slp_file)(slp_file, dataset_path, fox_game_data_list, falco_game_data_list, marth_game_data_list, sheik_game_data_list, jigglypuff_game_data_list) for slp_file in tqdm.tqdm(slp_files[:num_files]))


# Make the data frame
# game_data_df = pd.DataFrame(list(game_data_list),columns = game_data_columns)
# game_data_df # Check the shape to make sure we actually did something

In [None]:

num_fox_games = min(len(fox_game_data_list),num_data_points_per_character)
num_falco_games = min(len(falco_game_data_list),num_data_points_per_character)
num_marth_games = min(len(marth_game_data_list),num_data_points_per_character)
num_sheik_games = min(len(sheik_game_data_list),num_data_points_per_character)
num_puff_games = min(len(jigglypuff_game_data_list),num_data_points_per_character)
print(num_fox_games)
print(num_falco_games)
print(num_marth_games)
print(num_sheik_games)
print(num_puff_games)

# The check to see if we have enough data points happens at the same time so we have more in each list than we need
game_data_array = np.stack(list(fox_game_data_list[:num_fox_games]) + list(falco_game_data_list[:num_falco_games] )+ list(marth_game_data_list[:num_marth_games]) + list(sheik_game_data_list[:num_sheik_games]) + list(jigglypuff_game_data_list[:num_puff_games]), axis=0,dtype = data_type_inputs)
print(game_data_array.shape)

In [None]:
labels = np.zeros((game_data_array.shape[0],5), dtype = np.bool_)
i,j = 0,0
labels[i : i + num_fox_games,j] = 1
i += num_fox_games
j += 1

labels[i : i + num_falco_games,j] = 1
i += num_falco_games
j += 1

labels[i : i + num_marth_games,j] = 1
i += num_marth_games
j += 1

labels[i : i + num_sheik_games,j] = 1
i += num_sheik_games
j += 1

labels[i : i + num_puff_games,j] = 1




# print(labels)
print(labels.shape)

In [None]:
# Define the path and filename
path = 'C:/Users/jaspa/Grant ML/slp/data/classify_5_data_19762_per_character.npy'

# Save the array
np.save(path,game_data_array)

# Define your path and filename
path = 'C:/Users/jaspa/Grant ML/slp/data/classify_5_labels_19762_per_character.npy'

# Save the array
np.save(path,labels)


In [None]:
print(num_puff_games)

<h1> Save Data </h1>