<h1> Outline </h1>
We try to find a quick and memory efficient way to extract all the input data from every game.

In [7]:
import os as os
import sys
import numpy as np
import pandas as pd
import tqdm
import slippi as slp
import gzip
from joblib import Parallel, delayed
from multiprocessing import Manager
# from numba import njit, prange
# import cupy as cp


<h2> Initialize Useful Variables </h2>

In [8]:
frames_per_segment = 1024
dataset_path = 'C:/Users/jaspa/Grant ML/slp/Slippi_Public_Dataset_v3'
slp_files = [file for file in os.listdir(dataset_path) if file.endswith('.slp')] 
data_type_inputs = np.single
full_game_save_path = 'C:/Users/jaspa/Grant ML/slp/data/full_2_player_game_inputs_test'
segment_save_path = 'C:/Users/jaspa/Grant ML/slp/data/segment_2_player_game_inputs_test'


<h2> Preliminary Functions </h2>
We use these functions to one-hot encode the button bitmask and get the frame input data for a given port number and frames object.

In [9]:
# Takes encoded buttons as bitmask and returns binary array of buttons pressed
def one_hot_encode_buttons(bitmask):
    # Define labels and their encoded values
    labels = ['Z', 'A', 'B', 'X_or_Y']
    encoded_values = {
        'Z': 16,
        'A': 256,
        'B': 512,
        'X': 1024,
        'Y': 2048
    }

    # Initialize the one-hot encoded list with zeros
    one_hot_encoded = [0] * len(labels)

    # Check each label and set the corresponding one-hot encoded value
    if bitmask & encoded_values['Z']:
        one_hot_encoded[labels.index('Z')] = 1
    if bitmask & encoded_values['A']:
        one_hot_encoded[labels.index('A')] = 1
    if bitmask & encoded_values['B']:
        one_hot_encoded[labels.index('B')] = 1
    if bitmask & encoded_values['X'] or bitmask & encoded_values['Y']:
        one_hot_encoded[labels.index('X_or_Y')] = 1

    return one_hot_encoded


# Create a numpy list that is the correct size and fill it with a loop
def get_frame_data(frames, port):
    inputs = np.empty((9,len(frames)),dtype=data_type_inputs)  # Initialize an empty Numpy array
    for i, frame in enumerate(frames):  
        # buttons = one_hot_encode_buttons(frame.ports[port].leader.pre.buttons.physical.value)
        # j_x = frame.ports[port].leader.pre.joystick.x
        # j_y = frame.ports[port].leader.pre.joystick.y
        # c_x = frame.ports[port].leader.pre.cstick.x
        # c_y = frame.ports[port].leader.pre.cstick.y
        # t_max = max(frame.ports[port].leader.pre.triggers.physical.l,frame.ports[port].leader.pre.triggers.physical.r)
        
        # inputs[:,i] = buttons + [j_x, j_y, c_x, c_y, t_max]
        
         # Directly assign the encoded button values to the first 4 rows of column `i`
        inputs[:4, i] = one_hot_encode_buttons(frame.ports[port].leader.pre.buttons.physical.value)
        
        # Directly assign joystick and c-stick values to the respective rows of column `i`
        inputs[4, i] = frame.ports[port].leader.pre.joystick.x
        inputs[5, i] = frame.ports[port].leader.pre.joystick.y
        inputs[6, i] = frame.ports[port].leader.pre.cstick.x
        inputs[7, i] = frame.ports[port].leader.pre.cstick.y  

        # Directly assign the maximum trigger value to the last row of column `i`
        inputs[8, i] = max(frame.ports[port].leader.pre.triggers.physical.l, frame.ports[port].leader.pre.triggers.physical.r)

    return inputs



<h2>Process SLP function</h2>
The function that will be called for each SLP file we are interested in.

In [10]:
# Each iteration of this creates a row of the dataframe and appends it to the dataframe.
def process_slp_file(slp_file,dataset_path,full_game_save_path,segment_save_path):
    try:
        file_path = os.path.join(dataset_path, slp_file)
        slp_file_name = slp_file.removesuffix('.slp')
        
        game = slp.Game(file_path)
        frames = game.frames
         
        # Check game is long enough
        game_length = game.metadata.duration
        if game_length < 123 + frames_per_segment:          # TODO: Create a package and define constants like 123 with meaningful names
            return
        
        # Find the ports the players are using
        occupied_ports = [i for i, port in enumerate(game.start.players) if port is not None]
        
        # Ignore games that aren't singles
        if len(occupied_ports) > 2:  
            return
        # Ignore games with CPUs
        if game.start.players[occupied_ports[0]].type.value or game.start.players[occupied_ports[1]].type.value:
            return
        
       

        for j in occupied_ports:
            # Get the character
            character = game.start.players[j].character.name
            # Get all the frame data
            frame_data = get_frame_data(frames[123:], j)
            
            filename = f"{character}_port_{j}_from_{slp_file_name}.npy.gz"
            full_path = os.path.join(full_game_save_path, filename)
            with gzip.open(full_path, 'wb') as f:
                np.save(f, frame_data)
            
            game_length = frame_data.shape[1]
            # Calculate the total number of full segments
            num_segments = game_length // frames_per_segment
            
            for i in range(num_segments):
                # Extract the segment
                start_index = i * frames_per_segment
                end_index = start_index + frames_per_segment
                segment = frame_data[:, start_index:end_index]

                # Construct the filename for the segment
                filename = f"{character}_port_{j}_from_{slp_file_name}_1024_frames_segment_{i + 1}_of_{num_segments}.npy.gz"
                full_path = os.path.join(segment_save_path, filename)

                # Save the segment
                with gzip.open(full_path, 'wb') as f:
                    np.save(f, segment)
                
                # Construct the filename for the segment
                # filename = f"{character}_port_{j}_from_{slp_file_name}_1024_frames_segment_{i + 1}_of_{num_segments}.npy"
                # full_path = os.path.join(segment_save_path, filename)
                # # Save the array
                # np.save(full_path,segment)
             
    except Exception as e:
        print(f"Error processing {slp_file}: {str(e)}")

In [11]:
num_files = 1000

# Use joblib to parallelize processing of SLP files
Parallel(n_jobs=-1, verbose=0)(delayed(process_slp_file)(slp_file,dataset_path,full_game_save_path,segment_save_path) for slp_file in tqdm.tqdm(slp_files[:num_files]))


100%|██████████| 1000/1000 [00:29<00:00, 33.41it/s]


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,