<h2> Outline</h2>
In this notebook we extract the input data from Fox vs Sheik games. We split each game into n second segments to get more training examples.

In [None]:
import os as os
import numpy as np
import pandas as pd
import tqdm
import slippi as slp
from joblib import Parallel, delayed
from multiprocessing import Manager

<h2> Initialize some variables</h2>
We can change the number of seconds per segment.

In [None]:
# seconds_per_segment = 16
frames_per_segment = 1024
# frames_per_segment = seconds_per_segment * 60
# frames_per_segment = 900
game_data_columns = ['slp_file', 'game_segment', 'character', 'input_data']
dataset_path = '../../Slippi_Public_Dataset_v3/'
slp_files = [file for file in os.listdir(dataset_path) if file.endswith('.slp') and 
             ('Fox' in file or
              'Falco' in file or
              'Marth' in file or
              'Sheik' in file or
              'Jigglypuff' in file)]

<h2> Preliminary Functions </h2>
We use these functions to one-hot encode the button bitmask and get the frame input data for a given port number and frames object.

In [None]:
# Takes encoded buttons as bitmask and returns binary array of buttons pressed
def one_hot_encode_buttons(bitmask):
    labels = ['DPAD_LEFT', 'DPAD_RIGHT', 'DPAD_DOWN', 'DPAD_UP', 'Z', 'R', 'L', 'A', 'B', 'X', 'Y', 'START']
    encoded_values = [1, 2, 4, 8, 16, 32, 64, 256, 512, 1024, 2048, 4096]

    # Create a dictionary mapping labels to their encoded values
    label_to_value = dict(zip(labels, encoded_values))

    # Initialize a list to store the one-hot encoded values
    one_hot_encoded = [0] * len(labels)

    # Iterate through labels and set the corresponding one-hot encoded value
    for label, value in label_to_value.items():
        if bitmask & value:
            one_hot_encoded[labels.index(label)] = 1

    return one_hot_encoded

def one_hot_encode_characters(character):
    if character == 'FOX':
        return np.array([1, 0, 0, 0, 0])
    elif character == 'FALCO':
        return np.array([0, 1, 0, 0, 0])
    elif character == 'MARTH':
        return np.array([0, 0, 1, 0, 0])
    elif character == 'SHEIK':
        return np.array([0, 0, 0, 1, 0])
    elif character == 'JIGGLYPUFF':
        return np.array([0, 0, 0, 0, 1])
    else:
        return None

# Create a numpy list that is the correct size and fill it with a loop
def get_frame_data(frames, port):
    inputs = np.empty((frames_per_segment, 18))  # Initialize an empty Numpy array
    for i, frame in enumerate(frames):  
        buttons = one_hot_encode_buttons(frame.ports[port].leader.pre.buttons.physical.value)
        j_x = frame.ports[port].leader.pre.joystick.x
        j_y = frame.ports[port].leader.pre.joystick.y
        c_x = frame.ports[port].leader.pre.cstick.x
        c_y = frame.ports[port].leader.pre.cstick.y
        t_l = frame.ports[port].leader.pre.triggers.physical.l
        t_r = frame.ports[port].leader.pre.triggers.physical.r

        frame_data = buttons + [j_x, j_y, c_x, c_y, t_l, t_r]
        inputs[i] = frame_data

    return inputs


<h2>Process SLP function</h2>
The function that will be called for each SLP file we are interested in.

In [None]:
# Each iteration of this creates a row of the dataframe and appends it to the dataframe.
def process_slp_file(slp_file, dataset_path, game_data):
    try:
        file_path = os.path.join(dataset_path, slp_file)
        game = slp.Game(file_path)
        frames = game.frames
         
        # Check game is long enough
        game_length = game.metadata.duration
        if game_length < 123 + frames_per_segment:          # TODO: Create a package and define constants like 123 with meaningful names
            return
        
        # Find the ports the players are using
        occupied_ports = [i for i, port in enumerate(game.start.players) if port is not None]
        # Ignore games that aren't singles
        if len(occupied_ports) > 2:  
            return
        # Ignore games with CPUs
        if game.start.players[occupied_ports[0]].type.value or game.start.players[occupied_ports[1]].type.value:
            return
        
        num_game_segments = game_length // frames_per_segment   # To get more training data we take all 15s segments of the game
        for i in range(num_game_segments):                      # Iterate over each of the 15s segments of the game
            # Get any frame data for the ith game segment
            for j in occupied_ports:
                character = one_hot_encode_characters(game.start.players[j].character.name)
                if character is not None:
                    frame_data = get_frame_data(frames[123 + i * frames_per_segment: 123 + (i + 1) * frames_per_segment], j)
                    game_data.append([slp_file, i, character, frame_data])
            
    except Exception as e:
        print(f"Error processing {slp_file}: {str(e)}")

<h2> Multiprocess data extraction </h2>
We use joblib to speed the extraction of data.

In [None]:
num_files = -1 # How many files we want to extract data from

# Create shared lists to store results
manager = Manager()
game_data_list = manager.list()


# Use joblib to parallelize processing of SLP files
Parallel(n_jobs=-1, verbose=0)(delayed(process_slp_file)(slp_file, dataset_path, game_data_list) for slp_file in tqdm.tqdm(slp_files[:num_files]))


# Make the data frame
game_data_df = pd.DataFrame(list(game_data_list),columns = game_data_columns)
game_data_df # Check the shape to make sure we actually did something

<h2>Save the extracted input data</h2>
Save the data as a pickle file. Pickle is not the best format, but it seems to be the only one that works with numpy arrays in the data frame.

In [None]:
print(game_data_df.shape)

X = game_data_df['input_data']      # Get all input data from dataset.
X = np.stack(X, axis = 0)           # Convert from list of np arrays to np array.
X = np.transpose(X, (0, 2, 1))
print(X.shape)

y = game_data_df['character']       # Get all labels from dataset.
y = np.array(y)                     # Convert from list to np array.
y = np.stack(y, axis = 0)           # Convert from list of np arrays to np array.

print(y.shape)

# Save as a Binary file
np.save('../../data/classify5_1024_data.npy', X)
np.save('../../data/classify5_1024_labels.npy', y)