<h1> Outline </h1>
We try to find a quick and memory efficient way to extract all the input data from every game.

In [1]:
import os as os
import sys
import numpy as np
import pandas as pd
import tqdm
import slippi as slp
import gzip
from joblib import Parallel, delayed
from multiprocessing import Manager
# from numba import njit, prange
# import cupy as cp


<h2> Initialize Useful Variables </h2>

In [2]:
frames_per_segment = 1024
dataset_path ='D:\\ranked\\ranked-anonymized-1-116248\\ranked-anonymized'
slp_files = [file for file in os.listdir(dataset_path) if file.endswith('.slp')] 
print(slp_files[0])
data_type_inputs = np.single
full_game_save_path = 'C:/Users/jaspa/Grant ML/ranked_full_subfolders'
segment_save_path_subfolder = 'C:/Users/jaspa/Grant ML/ranked_segments_subfolders'
segment_save_path_bulk = 'C:/Users/jaspa/Grant ML/ranked_segments_bulk'


diamond-diamond-00018278aa8bb1c30871ab4c.slp


In [3]:
# Function to create necessary directories
def create_directories(path):
    if not os.path.exists(path):
        os.makedirs(path)
        
create_directories(segment_save_path_subfolder)
# create_directories(full_game_save_path)
create_directories(segment_save_path_bulk)


<h2> Preliminary Functions </h2>
We use these functions to one-hot encode the button bitmask and get the frame input data for a given port number and frames object.

In [4]:
# Takes encoded buttons as bitmask and returns binary array of buttons pressed
def one_hot_encode_buttons(bitmask):
    # Define labels and their encoded values
    labels = ['Z', 'A', 'B', 'X_or_Y']
    encoded_values = {
        'Z': 16,
        'A': 256,
        'B': 512,
        'X': 1024,
        'Y': 2048
    }

    # Initialize the one-hot encoded list with zeros
    one_hot_encoded = [0] * len(labels)

    # Check each label and set the corresponding one-hot encoded value
    if bitmask & encoded_values['Z']:
        one_hot_encoded[labels.index('Z')] = 1
    if bitmask & encoded_values['A']:
        one_hot_encoded[labels.index('A')] = 1
    if bitmask & encoded_values['B']:
        one_hot_encoded[labels.index('B')] = 1
    if bitmask & encoded_values['X'] or bitmask & encoded_values['Y']:
        one_hot_encoded[labels.index('X_or_Y')] = 1

    return one_hot_encoded


# Create a numpy list that is the correct size and fill it with a loop
def get_frame_data(frames, port):
    inputs = np.empty((9,len(frames)),dtype=data_type_inputs)  # Initialize an empty Numpy array
    for i, frame in enumerate(frames):  
        # buttons = one_hot_encode_buttons(frame.ports[port].leader.pre.buttons.physical.value)
        # j_x = frame.ports[port].leader.pre.joystick.x
        # j_y = frame.ports[port].leader.pre.joystick.y
        # c_x = frame.ports[port].leader.pre.cstick.x
        # c_y = frame.ports[port].leader.pre.cstick.y
        # t_max = max(frame.ports[port].leader.pre.triggers.physical.l,frame.ports[port].leader.pre.triggers.physical.r)
        
        # inputs[:,i] = buttons + [j_x, j_y, c_x, c_y, t_max]
        
         # Directly assign the encoded button values to the first 4 rows of column `i`
        inputs[:4, i] = one_hot_encode_buttons(frame.ports[port].leader.pre.buttons.physical.value)
        
        # Directly assign joystick and c-stick values to the respective rows of column `i`
        inputs[4, i] = frame.ports[port].leader.pre.joystick.x
        inputs[5, i] = frame.ports[port].leader.pre.joystick.y
        inputs[6, i] = frame.ports[port].leader.pre.cstick.x
        inputs[7, i] = frame.ports[port].leader.pre.cstick.y  

        # Directly assign the maximum trigger value to the last row of column `i`
        inputs[8, i] = max(frame.ports[port].leader.pre.triggers.physical.l, frame.ports[port].leader.pre.triggers.physical.r)

    return inputs



<h2>Process SLP function</h2>
The function that will be called for each SLP file we are interested in.

In [5]:
# Each iteration of this creates a row of the dataframe and appends it to the dataframe.
def process_slp_file(file_number,slp_file,dataset_path,segment_save_path_subfolder,segment_save_path_bulk,full_game_save_path):
    try:
        file_path = os.path.join(dataset_path, slp_file)
        # slp_file_name = slp_file.removesuffix('.slp')
        
        game = slp.Game(file_path)
        frames = game.frames
         
        # Check game is long enough
        game_length = len(frames)
        if game_length < 123 + frames_per_segment:          # TODO: Create a package and define constants like 123 with meaningful names
            return
        
        # Find the ports the players are using
        occupied_ports = [i for i, port in enumerate(game.start.players) if port is not None]
        
        # Ignore games that aren't singles
        if len(occupied_ports) > 2:  
            return
        # Ignore games with CPUs
        if game.start.players[occupied_ports[0]].type.value or game.start.players[occupied_ports[1]].type.value:
            return
        
        port_1 = occupied_ports[0]
        port_2 = occupied_ports[1]
        
        # Get the character
        character_1 = game.start.players[port_1].character.name
        character_2 = game.start.players[port_2].character.name
        # Get all the frame data
        
        frame_data = get_frame_data(frames[123:], port_1)
        
        port_1_full_sub = os.path.join(full_game_save_path, f'{character_1}/{character_2}')
        create_directories(port_1_full_sub)
        
        file_name = f"{file_number}__{port_1}__{character_1}__{character_2}.npy.gz"
        full_path = os.path.join(port_1_full_sub, file_name)
        with gzip.open(full_path, 'wb') as f:
            np.save(f, frame_data)
        
        game_length = frame_data.shape[1]
        # Calculate the total number of full segments
        num_segments = game_length // frames_per_segment
        
        port_1_segment_path = os.path.join(segment_save_path_subfolder, f'{character_1}/{character_2}')
        port_2_segment_path  = os.path.join(segment_save_path_subfolder, f'{character_2}/{character_1}')
        create_directories(port_1_segment_path)
        create_directories(port_2_segment_path)
        
        for i in range(num_segments):
            # Extract the segment
            start_index = i * frames_per_segment
            end_index = start_index + frames_per_segment
            segment = frame_data[:, start_index:end_index]

            # Construct the filename for the segment
            filename = f"{file_number}__{i + 1}__{port_1}__{character_1}__{character_2}__1024.npy.gz"
            full_path_subfolder = os.path.join(port_1_segment_path, filename)

            # Save the segment
            with gzip.open(full_path_subfolder, 'wb') as f:
                np.save(f, segment)
                
            full_path_bulk = os.path.join(segment_save_path_bulk, filename)
            # Save the segment
            with gzip.open(full_path_bulk, 'wb') as f:
                np.save(f, segment)
        
        frame_data = get_frame_data(frames[123:], port_2)
        
        port_2_full_path = os.path.join(full_game_save_path, f'{character_2}/{character_1}')
        create_directories(port_2_full_path)
        filename = f"{file_number}__{port_2}__{character_1}__{character_2}.npy.gz"
        full_path_sub = os.path.join(port_2_full_path, filename)
        with gzip.open(full_path_sub, 'wb') as f:
            np.save(f, frame_data)
        
        game_length = frame_data.shape[1]
        # Calculate the total number of full segments
        num_segments = game_length // frames_per_segment
            
        for i in range(num_segments):
            # Extract the segment
            start_index = i * frames_per_segment
            end_index = start_index + frames_per_segment
            segment = frame_data[:, start_index:end_index]

            # Construct the filename for the segment
            filename = f"{file_number}__{i + 1}__{port_2}__{character_2}__{character_1}__1024.npy.gz"
            full_path_subfolder = os.path.join(port_2_segment_path, filename)

            # Save the segment
            with gzip.open(full_path_subfolder, 'wb') as f:
                np.save(f, segment)
                
            full_path_bulk = os.path.join(segment_save_path_bulk, filename)
            # Save the segment
            with gzip.open(full_path_bulk, 'wb') as f:
                np.save(f, segment)
        
             
    except Exception as e:
        print(f"Error processing {slp_file}: {str(e)}")

In [6]:
num_files = -1

# Use joblib to parallelize processing of SLP files
Parallel(n_jobs=-1, verbose=1)(delayed(process_slp_file)(i,slp_file,dataset_path,segment_save_path_subfolder,segment_save_path_bulk,full_game_save_path) for i,slp_file in tqdm.tqdm(enumerate(slp_files[:num_files])))


0it [00:00, ?it/s][Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
48it [00:01, 33.46it/s] [Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    1.2s
192it [00:08, 21.30it/s][Parallel(n_jobs=-1)]: Done 152 tasks      | elapsed:    8.5s
432it [00:19, 20.52it/s][Parallel(n_jobs=-1)]: Done 402 tasks      | elapsed:   20.3s
792it [00:36, 21.12it/s][Parallel(n_jobs=-1)]: Done 752 tasks      | elapsed:   36.7s
1248it [00:56, 22.88it/s][Parallel(n_jobs=-1)]: Done 1202 tasks      | elapsed:   56.6s
1776it [01:19, 22.04it/s][Parallel(n_jobs=-1)]: Done 1752 tasks      | elapsed:  1.3min
2448it [01:49, 21.78it/s][Parallel(n_jobs=-1)]: Done 2402 tasks      | elapsed:  1.8min
3192it [02:22, 20.57it/s][Parallel(n_jobs=-1)]: Done 3152 tasks      | elapsed:  2.4min
4032it [03:00, 22.22it/s][Parallel(n_jobs=-1)]: Done 4002 tasks      | elapsed:  3.0min
4992it [03:43, 21.77it/s][Parallel(n_jobs=-1)]: Done 4952 tasks      | elapsed:  3.7min
6048it [04:30, 22.14it/s][Paralle

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,