In [None]:
import os as os
import numpy as np
import pandas as pd
import polars as pl
import tqdm
import slippi as slp
from joblib import Parallel, delayed
from multiprocessing import Manager
import pyarrow as pa
import pyarrow.parquet as pq


<h2> Preliminary Functions </h2>

We use these functions to one-hot encode the button bitmask and get the frame data for a given port number and frames object.

In [None]:
# Set the number of time steps in the model inputs
frames_per_input = 60 * 12  # 12 seconds of gameplay

# Function to one-hot encode controller bitmask
def one_hot_encode(bitmask):
    labels = ['DPAD_LEFT', 'DPAD_RIGHT', 'DPAD_DOWN', 'DPAD_UP', 'Z', 'R', 'L', 'A', 'B', 'X', 'Y', 'START']
    encoded_values = [1, 2, 4, 8, 16, 32, 64, 256, 512, 1024, 2048, 4096]

    # Create a dictionary mapping labels to their encoded values
    label_to_value = dict(zip(labels, encoded_values))

    # Initialize a list to store the one-hot encoded values
    one_hot_encoded = [0] * len(labels)

    # Iterate through labels and set the corresponding one-hot encoded value
    for label, value in label_to_value.items():
        if bitmask & value:
            one_hot_encoded[labels.index(label)] = 1

    return one_hot_encoded

# Function to get frame data for a given set of frames and port
def get_frame_inputs(frames, port):
    sheik_inputs = np.empty((len(frames), 18))  # Initialize an empty NumPy array    
    for i, frame in enumerate(frames):   # Takes all the frames, skipping the first 5 seconds.
        buttons = one_hot_encode(frame.ports[port].leader.pre.buttons.physical.value)
        j_x = frame.ports[port].leader.pre.joystick.x
        j_y = frame.ports[port].leader.pre.joystick.y
        c_x = frame.ports[port].leader.pre.cstick.x
        c_y = frame.ports[port].leader.pre.cstick.y
        t_l = frame.ports[port].leader.pre.triggers.physical.l
        t_r = frame.ports[port].leader.pre.triggers.physical.r

        frame_data = buttons + [j_x, j_y, c_x, c_y, t_l, t_r]
        sheik_inputs[i] = frame_data

    return sheik_inputs





In [None]:




# # Function to process a single SLP file and append to shared polars df
# def process_slp_file(slp_file, dataset_path,game_data_list):#,df):#, time_series_list, label_list, ids):
#     try:
#         file_path = os.path.join(dataset_path, slp_file)
#         game = slp.Game(file_path)
#         frames = game.frames

#         if len(frames) <  + frames_per_input:  # Ignore games that are <3600 frames (i.e. <60 seconds)
#             return

#         # List occupied ports
#         occupied_ports = [i for i, port in enumerate(game.start.players) if port is not None]
#         port_1 = occupied_ports[0]
#         port_2 = occupied_ports[1]

#         if len(occupied_ports) > 2:  # Ignore games that aren't singles
#             return
        
#         # port_1_frame_
#         port_1_frame_inputs = get_frame_inputs(frames, port_1)
#         port_2_frame_inputs = get_frame_inputs(frames, port_2)
        
#         game_data_dict = {
#             # Start (Game Data)
#             'file': slp_file,
#             # 'is_teams': game.start.stage.is_teams, # bool
#             # 'players': game.start.stage.players, #tuple
#             # 'random_seed':game.start.stage.random_seed # int
#             # 'slippi': game.start.stage.slippi #Slippi
#             'stage_name': game.start.stage.name,
#             'stage_value': game.start.stage.value,
#             'is_pal': game.start.is_pal,
#             'is_frozen_ps': game.start.is_frozen_ps,
#             # Player 1 Data
#             'port_1': port_1,
#             'port_1_character_name': game.start.players[port_1].character.name,
#             'port_1_character_value': game.start.players[port_1].character.value,
#             'port_1_type_name': game.start.players[port_1].type.name,
#             'port_1_type_value': game.start.players[port_1].type.value,
#             'port_1_stocks': game.start.players[port_1].stocks,
#             'port_1_costume': game.start.players[port_1].costume,
#             # 'port_1_team': game.start.players[port_1].team # team|None
#             'port_1_ucf_dash_back_name': game.start.players[port_1].ucf.dash_back.name,
#             'port_1_ucf_dash_back_value': game.start.players[port_1].ucf.dash_back.value,
#             'port_1_ucf_shield_drop_name': game.start.players[port_1].ucf.shield_drop.name,
#             'port_1_ucf_shield_drop_value': game.start.players[port_1].ucf.shield_drop.value,
#             'port_1_tag': game.start.players[port_1].tag,
#             # Player 2 Data
#             'port_2': port_2,
#             'port_2_character_name': game.start.players[port_2].character.name,
#             'port_2_character_value': game.start.players[port_2].character.value,
#             'port_2_type_name': game.start.players[port_2].type.name,
#             'port_2_type_value': game.start.players[port_2].type.value,
#             'port_2_stocks': game.start.players[port_2].stocks,
#             'port_2_costume': game.start.players[port_2].costume,
#             # 'port_2_team': game.start.players[port_2].team # team|None
#             'port_2_ucf_dash_back_name': game.start.players[port_2].ucf.dash_back.name,
#             'port_2_ucf_dash_back_value': game.start.players[port_2].ucf.dash_back.value,
#             'port_2_ucf_shield_drop_name': game.start.players[port_2].ucf.shield_drop.name,
#             'port_2_ucf_shield_drop_value': game.start.players[port_2].ucf.shield_drop.value,
#             'port_2_tag': game.start.players[port_2].tag,
#             # End
#             'method_how_game_ended': game.end.method, #string
#              # Player 1 Pre Frame data
#             'port_1_pre_state_value': [np.array([frame.ports[port_1].leader.pre.state.value if frame.ports[port_1].leader.pre.state else None for frame in frames[:]])], #object
#             'port_1_pre_state_name': [np.array([frame.ports[port_1].leader.pre.state.name if frame.ports[port_1].leader.pre.state else None for frame in frames[:]])], #object
#             'port_1_pre_position_x': [np.array([frame.ports[port_1].leader.pre.position.x for frame in frames[:]])], #object
#             'port_1_pre_position_y': [np.array([frame.ports[port_1].leader.pre.position.y for frame in frames[:]])], #object
#             'port_1_pre_direction': [np.array([frame.ports[port_1].leader.pre.direction for frame in frames[:]])], #object
#             'port_1_pre_damage':[np.array([frame.ports[port_1].leader.pre.damage for frame in frames[:]])], #object
#             # Player 1 Frame pre Inputs
#             'port_1_DPAD_LEFT': [port_1_frame_inputs[:, 0]],
#             'port_1_DPAD_RIGHT': [port_1_frame_inputs[:, 1]],
#             'port_1_DPAD_DOWN': [port_1_frame_inputs[:, 2]],
#             'port_1_DPAD_UP': [port_1_frame_inputs[:, 3]],
#             'port_1_Z': [port_1_frame_inputs[:, 4]],
#             'port_1_R': [port_1_frame_inputs[:, 5]],
#             'port_1_L': [port_1_frame_inputs[:, 6]],
#             'port_1_A': [port_1_frame_inputs[:, 7]],
#             'port_1_B': [port_1_frame_inputs[:, 8]],
#             'port_1_X': [port_1_frame_inputs[:, 9]],
#             'port_1_Y': [port_1_frame_inputs[:, 10]],
#             'port_1_START': [port_1_frame_inputs[:, 11]],
#             'port_1_J_X': [port_1_frame_inputs[:, 12]],
#             'port_1_J_Y': [port_1_frame_inputs[:, 13]],
#             'port_1_C_X': [port_1_frame_inputs[:, 14]],
#             'port_1_C_Y': [port_1_frame_inputs[:, 15]],
#             'port_1_T_L': [port_1_frame_inputs[:, 16]],
#             'port_1_T_R': [port_1_frame_inputs[:, 17]],
#             # Player 1 post frame data
#             'port_1_post_character_value': [np.array([frame.ports[port_1].leader.post.character.value if frame.ports[port_1].leader.post.character else None for frame in frames[:]])], #object
#             'port_1_post_character_name': [np.array([frame.ports[port_1].leader.post.character.name if frame.ports[port_1].leader.post.character else None for frame in frames[:]])], #object
#             'port_1_post_state_value': [np.array([frame.ports[port_1].leader.post.state.value if frame.ports[port_1].leader.post.state else None for frame in frames[:]])], #object
#             'port_1_post_state_name': [np.array([frame.ports[port_1].leader.post.state.name if frame.ports[port_1].leader.post.state else None for frame in frames[:]])], #object
#             'port_1_post_position_x': [np.array([frame.ports[port_1].leader.post.position.x for frame in frames[:]])], #object
#             'port_1_post_position_y': [np.array([frame.ports[port_1].leader.post.position.y for frame in frames[:]])], #object
#             'port_1_post_direction': [np.array([frame.ports[port_1].leader.post.direction for frame in frames[:]])], #object
#             'port_1_post_damage': [np.array([frame.ports[port_1].leader.post.damage for frame in frames[:]])], #object
#             'port_1_post_shield': [np.array([frame.ports[port_1].leader.post.shield for frame in frames[:]])], #object
#             'port_1_post_stocks': [np.array([frame.ports[port_1].leader.post.stocks for frame in frames[:]])], #object
#             'port_1_post_last_attack_landed_value': [np.array([frame.ports[port_1].leader.post.last_attack_landed.value if frame.ports[port_1].leader.post.last_attack_landed else None for frame in frames[:]])], #object
#             'port_1_post_last_attack_landed_name': [np.array([frame.ports[port_1].leader.post.last_attack_landed.name if frame.ports[port_1].leader.post.last_attack_landed else None for frame in frames[:]])], #object
#             # 'port_1_post_last_hit_by': [np.array([frame.ports[port_1].leader.post.last_hit_by for frame in frames[:]])], #object
#             'port_1_post_combo_count': [np.array([frame.ports[port_1].leader.post.combo_count for frame in frames[:]])], #object
#             # 'port_1_post_state_age': [np.array([frame.ports[port_1].leader.post.state_age for frame in frames[:]])], #object
#             'port_1_post_flags_value': [np.array([frame.ports[port_1].leader.post.flags.value for frame in frames[:]])], #object
#             'port_1_post_hit_stun': [np.array([frame.ports[port_1].leader.post.hit_stun for frame in frames[:]])], #object ??????
#             'port_1_post_airborne': [np.array([frame.ports[port_1].leader.post.airborne for frame in frames[:]])], #object
#             'port_1_post_ground': [np.array([frame.ports[port_1].leader.post.ground for frame in frames[:]])], #object
#             'port_1_post_jumps': [np.array([frame.ports[port_1].leader.post.jumps for frame in frames[:]])], #object
#             'port_1_post_l_cancel': [np.array([frame.ports[port_1].leader.post.l_cancel for frame in frames[:]])], #object
#              # Player 2 Pre Frame data
#             'port_2_pre_state_value': [np.array([frame.ports[port_2].leader.pre.state.value if frame.ports[port_2].leader.pre.state else None for frame in frames[:]])], #object
#             'port_2_pre_state_name': [np.array([frame.ports[port_2].leader.pre.state.name if frame.ports[port_2].leader.pre.state else None for frame in frames[:]])], #object
#             'port_2_pre_position_x': [np.array([frame.ports[port_2].leader.pre.position.x for frame in frames[:]])], #object
#             'port_2_pre_position_y': [np.array([frame.ports[port_2].leader.pre.position.y for frame in frames[:]])], #object
#             'port_2_pre_direction': [np.array([frame.ports[port_2].leader.pre.direction for frame in frames[:]])], #object
#             'port_2_pre_damage':[np.array([frame.ports[port_2].leader.pre.damage for frame in frames[:]])], #object
#             # Player 2 Frame pre Inputs
#             'port_2_DPAD_LEFT': [port_2_frame_inputs[:, 0]],
#             'port_2_DPAD_RIGHT': [port_2_frame_inputs[:, 1]],
#             'port_2_DPAD_DOWN': [port_2_frame_inputs[:, 2]],
#             'port_2_DPAD_UP': [port_2_frame_inputs[:, 3]],
#             'port_2_Z': [port_2_frame_inputs[:, 4]],
#             'port_2_R': [port_2_frame_inputs[:, 5]],
#             'port_2_L': [port_2_frame_inputs[:, 6]],
#             'port_2_A': [port_2_frame_inputs[:, 7]],
#             'port_2_B': [port_2_frame_inputs[:, 8]],
#             'port_2_X': [port_2_frame_inputs[:, 9]],
#             'port_2_Y': [port_2_frame_inputs[:, 10]],
#             'port_2_START': [port_2_frame_inputs[:, 11]],
#             'port_2_J_X': [port_2_frame_inputs[:, 12]],
#             'port_2_J_Y': [port_2_frame_inputs[:, 13]],
#             'port_2_C_X': [port_2_frame_inputs[:, 14]],
#             'port_2_C_Y': [port_2_frame_inputs[:, 15]],
#             'port_2_T_L': [port_2_frame_inputs[:, 16]],
#             'port_2_T_R': [port_2_frame_inputs[:, 17]],
#             # Player 2 post frame data
#             'port_2_post_character_value': [np.array([frame.ports[port_2].leader.post.character.value if frame.ports[port_2].leader.post.character else None for frame in frames[:]])], #object
#             'port_2_post_character_name': [np.array([frame.ports[port_2].leader.post.character.name if frame.ports[port_2].leader.post.character else None for frame in frames[:]])], #object
#             'port_2_post_state_value': [np.array([frame.ports[port_2].leader.post.state.value if frame.ports[port_2].leader.post.state else None for frame in frames[:]])], #object
#             'port_2_post_state_name': [np.array([frame.ports[port_2].leader.post.state.name if frame.ports[port_2].leader.post.state else None for frame in frames[:]])], #object
#             'port_2_post_position_x': [np.array([frame.ports[port_2].leader.post.position.x for frame in frames[:]])], #object
#             'port_2_post_position_y': [np.array([frame.ports[port_1].leader.post.position.y for frame in frames[:]])], #object
#             'port_2_post_direction': [np.array([frame.ports[port_2].leader.post.direction for frame in frames[:]])], #object
#             'port_2_post_damage': [np.array([frame.ports[port_2].leader.post.damage for frame in frames[:]])], #object
#             'port_2_post_shield': [np.array([frame.ports[port_2].leader.post.shield for frame in frames[:]])], #object
#             'port_2_post_stocks': [np.array([frame.ports[port_2].leader.post.stocks for frame in frames[:]])], #object
#             'port_2_post_last_attack_landed_value': [np.array([frame.ports[port_2].leader.post.last_attack_landed.value if frame.ports[port_2].leader.post.last_attack_landed else None for frame in frames[:]])], #object
#             'port_2_post_last_attack_landed_name': [np.array([frame.ports[port_2].leader.post.last_attack_landed.name if frame.ports[port_2].leader.post.last_attack_landed else None for frame in frames[:]])], #object
#             # 'port_2_post_last_hit_by': [np.array([frame.ports[port_2].leader.post.last_hit_by for frame in frames[:]])], #object
#             'port_2_post_combo_count': [np.array([frame.ports[port_2].leader.post.combo_count for frame in frames[:]])], #object
#             # 'port_2_post_state_age': [np.array([frame.ports[port_2].leader.post.state_age for frame in frames[:]])], #object
#             'port_2_post_flags_value': [np.array([frame.ports[port_2].leader.post.flags.value for frame in frames[:]])], #object
#             'port_2_post_hit_stun': [np.array([frame.ports[port_2].leader.post.hit_stun for frame in frames[:]])], #object ??????
#             'port_2_post_airborne': [np.array([frame.ports[port_2].leader.post.airborne for frame in frames[:]])], #object
#             'port_2_post_ground': [np.array([frame.ports[port_2].leader.post.ground for frame in frames[:]])], #object
#             'port_2_post_jumps': [np.array([frame.ports[port_2].leader.post.jumps for frame in frames[:]])], #object
#             'port_2_post_l_cancel': [np.array([frame.ports[port_2].leader.post.l_cancel for frame in frames[:]])], #object
#             #Item
#             # 'item_':[np.array([frame.Item.type.value for frame in frames[:]])]# I CANT FIGURE THIS ONE OUT
#             # 'item_state':[np.array([frame.Item.state for frame in frames[:]])],# object
#             # 'item_direction':[np.array([frame.Item.direction for frame in frames[:]])],# object
#             # 'item_velocity_x':[np.array([frame.Item.velocity.x for frame in frames[:]])],# object
#             # 'item_velocity_y':[np.array([frame.Item.velocity.y for frame in frames[:]])],# object
#             # 'item_position_x':[np.array([frame.Item.position.x for frame in frames[:]])],# object
#             # 'item_position_y':[np.array([frame.Item.position.y for frame in frames[:]])],# object
#             # 'item_damage':[np.array([frame.Item.damage for frame in frames[:]])],# object
#             # 'item_timer':[np.array([frame.Item.timer for frame in frames[:]])],# object
#             # 'item_spawn_id':[np.array([frame.Item.spawn_id for frame in frames[:]])],# object
            
#         }
#         game_data_list.append(game_data_dict.values())
#         return game_data_dict
#     except Exception as e:
#         print(f"Error processing {slp_file}: {str(e)}")
        


    
# # Create an empty Polars DataFrame with the desired schema
# schema =    [
#     ('file', pl.String),
#     ('stage_name', pl.String),
#     ('stage_value', pl.Int8),
#     ('is_pal', pl.Boolean),
#     ('is_frozen_ps', pl.Boolean),
#     # Player 1 Data
#     ('port_1', pl.Int32),
#     ('port_1_character_name', pl.String),
#     ('port_1_character_value', pl.Int32),
#     ('port_1_type_name', pl.String),
#     ('port_1_type_value', pl.Boolean),
#     ('port_1_stocks', pl.Int32),
#     ('port_1_costume', pl.Int32),
#     ('port_1_ucf_dash_back_name', pl.String),
#     ('port_1_ucf_dash_back_value', pl.Int32),
#     ('port_1_ucf_shield_drop_name', pl.String),
#     ('port_1_ucf_shield_drop_value', pl.Int32),
#     ('port_1_tag', pl.String),
#     # Player 2 Data
#     ('port_2', pl.Int32),
#     ('port_2_character_name', pl.String),
#     ('port_2_character_value', pl.Int32),
#     ('port_2_type_name', pl.String),
#     ('port_2_type_value', pl.Boolean),
#     ('port_2_stocks', pl.Int32),
#     ('port_2_costume', pl.Int32),
#     ('port_2_ucf_dash_back_name', pl.String),
#     ('port_2_ucf_dash_back_value', pl.Int32),
#     ('port_2_ucf_shield_drop_name', pl.String),
#     ('port_2_ucf_shield_drop_value', pl.Int32),
#     ('port_2_tag', pl.String),
#      # End
#     ('method_how_game_ended',pl.String),
#     #  # Player 1 Pre Frame data
#     ('port_1_pre_state_value',pl.List),
#     ('port_1_pre_state_name',pl.List),
#     ('port_1_pre_position_x',pl.List),
#     ('port_1_pre_position_y',pl.List),
#     ('port_1_pre_direction',pl.List),
#     ('port_1_pre_damage',pl.List),
#         # pre Input Data for Player 1
#     ('port_1_DPAD_LEFT', pl.List),
#     ('port_1_DPAD_RIGHT', pl.List),
#     ('port_1_DPAD_DOWN', pl.List),
#     ('port_1_DPAD_UP', pl.List),
#     ('port_1_Z', pl.List),
#     ('port_1_R', pl.List),
#     ('port_1_L', pl.List),
#     ('port_1_A', pl.List),
#     ('port_1_B', pl.List),
#     ('port_1_X', pl.List),
#     ('port_1_Y', pl.List),
#     ('port_1_START', pl.List),
#     ('port_1_J_X', pl.List),
#     ('port_1_J_Y', pl.List),
#     ('port_1_C_X', pl.List),
#     ('port_1_C_Y', pl.List),
#     ('port_1_T_L', pl.List),
#     ('port_1_T_R', pl.List),
#     # Player 1 post frame data
#     ('port_1_post_character_value', pl.List),
#     ('port_1_post_character_name', pl.List),
#     ('port_1_post_state_value', pl.List),
#     ('port_1_post_state_name', pl.List),
#     ('port_1_post_position_x', pl.List),
#     ('port_1_post_position_y', pl.List),
#     ('port_1_post_direction', pl.List),
#     ('port_1_post_damage', pl.List),
#     ('port_1_post_shield', pl.List),
#     ('port_1_post_stocks', pl.List),
#     ('port_1_post_last_attack_landed_value', pl.List),
#     ('port_1_post_last_attack_landed_name', pl.List),
#     ('port_1_post_combo_count', pl.List),
#     ('port_1_post_flags_value', pl.List),
#     ('port_1_post_hit_stun', pl.List),
#     ('port_1_post_airborne', pl.List),
#     ('port_1_post_ground', pl.List),
#     ('port_1_post_jumps', pl.List),
#     ('port_1_post_l_cancel', pl.List),
#     # Player 1 Pre Frame data
#     ('port_2_pre_state_value',pl.List),
#     ('port_2_pre_state_name',pl.List),
#     ('port_2_pre_position_x',pl.List),
#     ('port_2_pre_position_y',pl.List),
#     ('port_2_pre_direction',pl.List),
#     ('port_2_pre_damage',pl.List),
#     # pre Input Data for Player 1
#     ('port_2_DPAD_LEFT', pl.List),
#     ('port_2_DPAD_RIGHT', pl.List),
#     ('port_2_DPAD_DOWN', pl.List),
#     ('port_2_DPAD_UP', pl.List),
#     ('port_2_Z', pl.List),
#     ('port_2_R', pl.List),
#     ('port_2_L', pl.List),
#     ('port_2_A', pl.List),
#     ('port_2_B', pl.List),
#     ('port_2_X', pl.List),
#     ('port_2_Y', pl.List),
#     ('port_2_START', pl.List),
#     ('port_2_J_X', pl.List),
#     ('port_2_J_Y', pl.List),
#     ('port_2_C_X', pl.List),
#     ('port_2_C_Y', pl.List),
#     ('port_2_T_L', pl.List),
#     ('port_2_T_R', pl.List),
#     # Player 1 post frame data
#     ('port_2_post_character_value', pl.List),
#     ('port_2_post_character_name', pl.List),
#     ('port_2_post_state_value', pl.List),
#     ('port_2_post_state_name', pl.List),
#     ('port_2_post_position_x', pl.List),
#     ('port_2_post_position_y', pl.List),
#     ('port_2_post_direction', pl.List),
#     ('port_2_post_damage', pl.List),
#     ('port_2_post_shield', pl.List),
#     ('port_2_post_stocks', pl.List),
#     ('port_2_post_last_attack_landed_value', pl.List),
#     ('port_2_post_last_attack_landed_name', pl.List),
#     ('port_2_post_combo_count', pl.List),
#     ('port_2_post_flags_value', pl.List),
#     ('port_2_post_hit_stun', pl.List),
#     ('port_2_post_airborne', pl.List),
#     ('port_2_post_ground', pl.List),
#     ('port_2_post_jumps', pl.List),
#     ('port_2_post_l_cancel', pl.List),

#     # Item (You can uncomment and add these fields if needed)
#     # ('item_type_value', pl.Object),
#     # ('item_state', pl.Object),
#     # ('item_direction', pl.Object),
#     # ('item_velocity_x', pl.Object),
#     # ('item_velocity_y', pl.Object),
#     # ('item_position_x', pl.Object),
#     # ('item_position_y', pl.Object),
#     # ('item_damage', pl.Object),
#     # ('item_timer', pl.Object),
#     # ('item_spawn_id', pl.Object),
# ]



In [None]:




# Function to process a single SLP file and append to shared polars df
def process_slp_file(slp_file, dataset_path,game_data_list):#,df):#, time_series_list, label_list, ids):
    try:
        file_path = os.path.join(dataset_path, slp_file)
        game = slp.Game(file_path)
        frames = game.frames

        if len(frames) <  + frames_per_input:  # Ignore games that are <3600 frames (i.e. <60 seconds)
            return

        # List occupied ports
        occupied_ports = [i for i, port in enumerate(game.start.players) if port is not None]
        port_1 = occupied_ports[0]
        port_2 = occupied_ports[1]

        if len(occupied_ports) > 2:  # Ignore games that aren't singles
            return
        
        # port_1_frame_
        port_1_frame_inputs = get_frame_inputs(frames, port_1)
        port_2_frame_inputs = get_frame_inputs(frames, port_2)
        
        game_data_dict = {
            # Start (Game Data)
            'file': slp_file,
            # 'is_teams': game.start.stage.is_teams, # bool
            # 'players': game.start.stage.players, #tuple
            # 'random_seed':game.start.stage.random_seed # int
            # 'slippi': game.start.stage.slippi #Slippi
            'stage_name': game.start.stage.name,
            'stage_value': game.start.stage.value,
            'is_pal': game.start.is_pal,
            'is_frozen_ps': game.start.is_frozen_ps,
            # Player 1 Data
            'port_1': port_1,
            'port_1_character_name': game.start.players[port_1].character.name,
            'port_1_character_value': game.start.players[port_1].character.value,
            'port_1_type_name': game.start.players[port_1].type.name,
            'port_1_type_value': game.start.players[port_1].type.value,
            'port_1_stocks': game.start.players[port_1].stocks,
            'port_1_costume': game.start.players[port_1].costume,
            # 'port_1_team': game.start.players[port_1].team # team|None
            'port_1_ucf_dash_back_name': game.start.players[port_1].ucf.dash_back.name,
            'port_1_ucf_dash_back_value': game.start.players[port_1].ucf.dash_back.value,
            'port_1_ucf_shield_drop_name': game.start.players[port_1].ucf.shield_drop.name,
            'port_1_ucf_shield_drop_value': game.start.players[port_1].ucf.shield_drop.value,
            'port_1_tag': game.start.players[port_1].tag,
            # Player 2 Data
            'port_2': port_2,
            'port_2_character_name': game.start.players[port_2].character.name,
            'port_2_character_value': game.start.players[port_2].character.value,
            'port_2_type_name': game.start.players[port_2].type.name,
            'port_2_type_value': game.start.players[port_2].type.value,
            'port_2_stocks': game.start.players[port_2].stocks,
            'port_2_costume': game.start.players[port_2].costume,
            # 'port_2_team': game.start.players[port_2].team # team|None
            'port_2_ucf_dash_back_name': game.start.players[port_2].ucf.dash_back.name,
            'port_2_ucf_dash_back_value': game.start.players[port_2].ucf.dash_back.value,
            'port_2_ucf_shield_drop_name': game.start.players[port_2].ucf.shield_drop.name,
            'port_2_ucf_shield_drop_value': game.start.players[port_2].ucf.shield_drop.value,
            'port_2_tag': game.start.players[port_2].tag,
            # End
            'method_how_game_ended': game.end.method, #string
             # Player 1 Pre Frame data
            'port_1_pre_state_value': [[frame.ports[port_1].leader.pre.state.value if frame.ports[port_1].leader.pre.state else None for frame in frames[:]]], #object
            'port_1_pre_state_name': [[frame.ports[port_1].leader.pre.state.name if frame.ports[port_1].leader.pre.state else None for frame in frames[:]]], #object
            'port_1_pre_position_x': [[frame.ports[port_1].leader.pre.position.x for frame in frames[:]]], #object
            'port_1_pre_position_y': [[frame.ports[port_1].leader.pre.position.y for frame in frames[:]]], #object
            'port_1_pre_direction': [[frame.ports[port_1].leader.pre.direction for frame in frames[:]]], #object
            'port_1_pre_damage':[[frame.ports[port_1].leader.pre.damage for frame in frames[:]]], #object
            # Player 1 Frame pre Inputs
            'port_1_DPAD_LEFT': [port_1_frame_inputs[:, 0].tolist()],
            'port_1_DPAD_RIGHT': [port_1_frame_inputs[:, 1].tolist()],
            'port_1_DPAD_DOWN': [port_1_frame_inputs[:, 2].tolist()],
            'port_1_DPAD_UP': [port_1_frame_inputs[:, 3].tolist()],
            'port_1_Z': [port_1_frame_inputs[:, 4].tolist()],
            'port_1_R': [port_1_frame_inputs[:, 5].tolist()],
            'port_1_L': [port_1_frame_inputs[:, 6].tolist()],
            'port_1_A': [port_1_frame_inputs[:, 7].tolist()],
            'port_1_B': [port_1_frame_inputs[:, 8].tolist()],
            'port_1_X': [port_1_frame_inputs[:, 9].tolist()],
            'port_1_Y': [port_1_frame_inputs[:, 10].tolist()],
            'port_1_START': [port_1_frame_inputs[:, 11].tolist()],
            'port_1_J_X': [port_1_frame_inputs[:, 12].tolist()],
            'port_1_J_Y': [port_1_frame_inputs[:, 13].tolist()],
            'port_1_C_X': [port_1_frame_inputs[:, 14].tolist()],
            'port_1_C_Y': [port_1_frame_inputs[:, 15].tolist()],
            'port_1_T_L': [port_1_frame_inputs[:, 16].tolist()],
            'port_1_T_R': [port_1_frame_inputs[:, 17].tolist()],
            # Player 1 post frame data
            'port_1_post_character_value': [[frame.ports[port_1].leader.post.character.value if frame.ports[port_1].leader.post.character else None for frame in frames[:]]], #object
            'port_1_post_character_name': [[frame.ports[port_1].leader.post.character.name if frame.ports[port_1].leader.post.character else None for frame in frames[:]]], #object
            'port_1_post_state_value': [[frame.ports[port_1].leader.post.state.value if frame.ports[port_1].leader.post.state else None for frame in frames[:]]], #object
            'port_1_post_state_name': [[frame.ports[port_1].leader.post.state.name if frame.ports[port_1].leader.post.state else None for frame in frames[:]]], #object
            'port_1_post_position_x': [[frame.ports[port_1].leader.post.position.x for frame in frames[:]]], #object
            'port_1_post_position_y': [[frame.ports[port_1].leader.post.position.y for frame in frames[:]]], #object
            'port_1_post_direction': [[frame.ports[port_1].leader.post.direction for frame in frames[:]]], #object
            'port_1_post_damage': [[frame.ports[port_1].leader.post.damage for frame in frames[:]]], #object
            'port_1_post_shield': [[frame.ports[port_1].leader.post.shield for frame in frames[:]]], #object
            'port_1_post_stocks': [[frame.ports[port_1].leader.post.stocks for frame in frames[:]]], #object
            'port_1_post_last_attack_landed_value': [[frame.ports[port_1].leader.post.last_attack_landed.value if frame.ports[port_1].leader.post.last_attack_landed else None for frame in frames[:]]], #object
            'port_1_post_last_attack_landed_name': [[frame.ports[port_1].leader.post.last_attack_landed.name if frame.ports[port_1].leader.post.last_attack_landed else None for frame in frames[:]]], #object
            # 'port_1_post_last_hit_by': [[frame.ports[port_1].leader.post.last_hit_by for frame in frames[:]]], #object
            'port_1_post_combo_count': [[frame.ports[port_1].leader.post.combo_count for frame in frames[:]]], #object
            # 'port_1_post_state_age': [[frame.ports[port_1].leader.post.state_age for frame in frames[:]]], #object
            'port_1_post_flags_value': [[frame.ports[port_1].leader.post.flags.value for frame in frames[:]]], #object
            'port_1_post_hit_stun': [[frame.ports[port_1].leader.post.hit_stun for frame in frames[:]]], #object ??????
            'port_1_post_airborne': [[frame.ports[port_1].leader.post.airborne for frame in frames[:]]], #object
            'port_1_post_ground': [[frame.ports[port_1].leader.post.ground for frame in frames[:]]], #object
            'port_1_post_jumps': [[frame.ports[port_1].leader.post.jumps for frame in frames[:]]], #object
            'port_1_post_l_cancel': [[frame.ports[port_1].leader.post.l_cancel for frame in frames[:]]], #object
             # Player 2 Pre Frame data
            'port_2_pre_state_value': [[frame.ports[port_2].leader.pre.state.value if frame.ports[port_2].leader.pre.state else None for frame in frames[:]]], #object
            'port_2_pre_state_name': [[frame.ports[port_2].leader.pre.state.name if frame.ports[port_2].leader.pre.state else None for frame in frames[:]]], #object
            'port_2_pre_position_x': [[frame.ports[port_2].leader.pre.position.x for frame in frames[:]]], #object
            'port_2_pre_position_y': [[frame.ports[port_2].leader.pre.position.y for frame in frames[:]]], #object
            'port_2_pre_direction': [[frame.ports[port_2].leader.pre.direction for frame in frames[:]]], #object
            'port_2_pre_damage':[[frame.ports[port_2].leader.pre.damage for frame in frames[:]]], #object
            # Player 2 Frame pre Inputs
            'port_2_DPAD_LEFT': [port_2_frame_inputs[:, 0].tolist()],
            'port_2_DPAD_RIGHT': [port_2_frame_inputs[:, 1].tolist()],
            'port_2_DPAD_DOWN': [port_2_frame_inputs[:, 2].tolist()],
            'port_2_DPAD_UP': [port_2_frame_inputs[:, 3].tolist()],
            'port_2_Z': [port_2_frame_inputs[:, 4].tolist()],
            'port_2_R': [port_2_frame_inputs[:, 5].tolist()],
            'port_2_L': [port_2_frame_inputs[:, 6].tolist()],
            'port_2_A': [port_2_frame_inputs[:, 7].tolist()],
            'port_2_B': [port_2_frame_inputs[:, 8].tolist()],
            'port_2_X': [port_2_frame_inputs[:, 9].tolist()],
            'port_2_Y': [port_2_frame_inputs[:, 10].tolist()],
            'port_2_START': [port_2_frame_inputs[:, 11].tolist()],
            'port_2_J_X': [port_2_frame_inputs[:, 12].tolist()],
            'port_2_J_Y': [port_2_frame_inputs[:, 13].tolist()],
            'port_2_C_X': [port_2_frame_inputs[:, 14].tolist()],
            'port_2_C_Y': [port_2_frame_inputs[:, 15].tolist()],
            'port_2_T_L': [port_2_frame_inputs[:, 16].tolist()],
            'port_2_T_R': [port_2_frame_inputs[:, 17].tolist()],
            # Player 2 post frame data
            'port_2_post_character_value': [[frame.ports[port_2].leader.post.character.value if frame.ports[port_2].leader.post.character else None for frame in frames[:]]], #object
            'port_2_post_character_name': [[frame.ports[port_2].leader.post.character.name if frame.ports[port_2].leader.post.character else None for frame in frames[:]]], #object
            'port_2_post_state_value': [[frame.ports[port_2].leader.post.state.value if frame.ports[port_2].leader.post.state else None for frame in frames[:]]], #object
            'port_2_post_state_name': [[frame.ports[port_2].leader.post.state.name if frame.ports[port_2].leader.post.state else None for frame in frames[:]]], #object
            'port_2_post_position_x': [[frame.ports[port_2].leader.post.position.x for frame in frames[:]]], #object
            'port_2_post_position_y': [[frame.ports[port_1].leader.post.position.y for frame in frames[:]]], #object
            'port_2_post_direction': [[frame.ports[port_2].leader.post.direction for frame in frames[:]]], #object
            'port_2_post_damage': [[frame.ports[port_2].leader.post.damage for frame in frames[:]]], #object
            'port_2_post_shield': [[frame.ports[port_2].leader.post.shield for frame in frames[:]]], #object
            'port_2_post_stocks': [[frame.ports[port_2].leader.post.stocks for frame in frames[:]]], #object
            'port_2_post_last_attack_landed_value': [[frame.ports[port_2].leader.post.last_attack_landed.value if frame.ports[port_2].leader.post.last_attack_landed else None for frame in frames[:]]], #object
            'port_2_post_last_attack_landed_name': [[frame.ports[port_2].leader.post.last_attack_landed.name if frame.ports[port_2].leader.post.last_attack_landed else None for frame in frames[:]]], #object
            # 'port_2_post_last_hit_by': [[frame.ports[port_2].leader.post.last_hit_by for frame in frames[:]]], #object
            'port_2_post_combo_count': [[frame.ports[port_2].leader.post.combo_count for frame in frames[:]]], #object
            # 'port_2_post_state_age': [[frame.ports[port_2].leader.post.state_age for frame in frames[:]]], #object
            'port_2_post_flags_value': [[frame.ports[port_2].leader.post.flags.value for frame in frames[:]]], #object
            'port_2_post_hit_stun': [[frame.ports[port_2].leader.post.hit_stun for frame in frames[:]]], #object ??????
            'port_2_post_airborne': [[frame.ports[port_2].leader.post.airborne for frame in frames[:]]], #object
            'port_2_post_ground': [[frame.ports[port_2].leader.post.ground for frame in frames[:]]], #object
            'port_2_post_jumps': [[frame.ports[port_2].leader.post.jumps for frame in frames[:]]], #object
            'port_2_post_l_cancel': [[frame.ports[port_2].leader.post.l_cancel for frame in frames[:]]], #object
            #Item
            # 'item_':[np.array([frame.Item.type.value for frame in frames[:]])]# I CANT FIGURE THIS ONE OUT
            # 'item_state':[np.array([frame.Item.state for frame in frames[:]])],# object
            # 'item_direction':[np.array([frame.Item.direction for frame in frames[:]])],# object
            # 'item_velocity_x':[np.array([frame.Item.velocity.x for frame in frames[:]])],# object
            # 'item_velocity_y':[np.array([frame.Item.velocity.y for frame in frames[:]])],# object
            # 'item_position_x':[np.array([frame.Item.position.x for frame in frames[:]])],# object
            # 'item_position_y':[np.array([frame.Item.position.y for frame in frames[:]])],# object
            # 'item_damage':[np.array([frame.Item.damage for frame in frames[:]])],# object
            # 'item_timer':[np.array([frame.Item.timer for frame in frames[:]])],# object
            # 'item_spawn_id':[np.array([frame.Item.spawn_id for frame in frames[:]])],# object
            
        }
        game_data_list.append(game_data_dict.values())
        return game_data_dict
    except Exception as e:
        print(f"Error processing {slp_file}: {str(e)}")

            

        


    
# Create an empty Polars DataFrame with the desired schema
schema =    [
    ('file', pl.String),
    ('stage_name', pl.String),
    ('stage_value', pl.Int8),
    ('is_pal', pl.Boolean),
    ('is_frozen_ps', pl.Boolean),
    # Player 1 Data
    ('port_1', pl.Int32),
    ('port_1_character_name', pl.String),
    ('port_1_character_value', pl.Int32),
    ('port_1_type_name', pl.String),
    ('port_1_type_value', pl.Boolean),
    ('port_1_stocks', pl.Int32),
    ('port_1_costume', pl.Int32),
    ('port_1_ucf_dash_back_name', pl.String),
    ('port_1_ucf_dash_back_value', pl.Int32),
    ('port_1_ucf_shield_drop_name', pl.String),
    ('port_1_ucf_shield_drop_value', pl.Int32),
    ('port_1_tag', pl.String),
    # Player 2 Data
    ('port_2', pl.Int32),
    ('port_2_character_name', pl.String),
    ('port_2_character_value', pl.Int32),
    ('port_2_type_name', pl.String),
    ('port_2_type_value', pl.Boolean),
    ('port_2_stocks', pl.Int32),
    ('port_2_costume', pl.Int32),
    ('port_2_ucf_dash_back_name', pl.String),
    ('port_2_ucf_dash_back_value', pl.Int32),
    ('port_2_ucf_shield_drop_name', pl.String),
    ('port_2_ucf_shield_drop_value', pl.Int32),
    ('port_2_tag', pl.String),
     # End
    ('method_how_game_ended',pl.String),
    #  # Player 1 Pre Frame data
    ('port_1_pre_state_value',pl.List),
    ('port_1_pre_state_name',pl.List),
    ('port_1_pre_position_x',pl.List),
    ('port_1_pre_position_y',pl.List),
    ('port_1_pre_direction',pl.List),
    ('port_1_pre_damage',pl.List),
        # pre Input Data for Player 1
    ('port_1_DPAD_LEFT', pl.List),
    ('port_1_DPAD_RIGHT', pl.List),
    ('port_1_DPAD_DOWN', pl.List),
    ('port_1_DPAD_UP', pl.List),
    ('port_1_Z', pl.List),
    ('port_1_R', pl.List),
    ('port_1_L', pl.List),
    ('port_1_A', pl.List),
    ('port_1_B', pl.List),
    ('port_1_X', pl.List),
    ('port_1_Y', pl.List),
    ('port_1_START', pl.List),
    ('port_1_J_X', pl.List),
    ('port_1_J_Y', pl.List),
    ('port_1_C_X', pl.List),
    ('port_1_C_Y', pl.List),
    ('port_1_T_L', pl.List),
    ('port_1_T_R', pl.List),
    # Player 1 post frame data
    ('port_1_post_character_value', pl.List),
    ('port_1_post_character_name', pl.List),
    ('port_1_post_state_value', pl.List),
    ('port_1_post_state_name', pl.List),
    ('port_1_post_position_x', pl.List),
    ('port_1_post_position_y', pl.List),
    ('port_1_post_direction', pl.List),
    ('port_1_post_damage', pl.List),
    ('port_1_post_shield', pl.List),
    ('port_1_post_stocks', pl.List),
    ('port_1_post_last_attack_landed_value', pl.List),
    ('port_1_post_last_attack_landed_name', pl.List),
    ('port_1_post_combo_count', pl.List),
    ('port_1_post_flags_value', pl.List),
    ('port_1_post_hit_stun', pl.List),
    ('port_1_post_airborne', pl.List),
    ('port_1_post_ground', pl.List),
    ('port_1_post_jumps', pl.List),
    ('port_1_post_l_cancel', pl.List),
    # Player 1 Pre Frame data
    ('port_2_pre_state_value',pl.List),
    ('port_2_pre_state_name',pl.List),
    ('port_2_pre_position_x',pl.List),
    ('port_2_pre_position_y',pl.List),
    ('port_2_pre_direction',pl.List),
    ('port_2_pre_damage',pl.List),
    # pre Input Data for Player 1
    ('port_2_DPAD_LEFT', pl.List),
    ('port_2_DPAD_RIGHT', pl.List),
    ('port_2_DPAD_DOWN', pl.List),
    ('port_2_DPAD_UP', pl.List),
    ('port_2_Z', pl.List),
    ('port_2_R', pl.List),
    ('port_2_L', pl.List),
    ('port_2_A', pl.List),
    ('port_2_B', pl.List),
    ('port_2_X', pl.List),
    ('port_2_Y', pl.List),
    ('port_2_START', pl.List),
    ('port_2_J_X', pl.List),
    ('port_2_J_Y', pl.List),
    ('port_2_C_X', pl.List),
    ('port_2_C_Y', pl.List),
    ('port_2_T_L', pl.List),
    ('port_2_T_R', pl.List),
    # Player 1 post frame data
    ('port_2_post_character_value', pl.List),
    ('port_2_post_character_name', pl.List),
    ('port_2_post_state_value', pl.List),
    ('port_2_post_state_name', pl.List),
    ('port_2_post_position_x', pl.List),
    ('port_2_post_position_y', pl.List),
    ('port_2_post_direction', pl.List),
    ('port_2_post_damage', pl.List),
    ('port_2_post_shield', pl.List),
    ('port_2_post_stocks', pl.List),
    ('port_2_post_last_attack_landed_value', pl.List),
    ('port_2_post_last_attack_landed_name', pl.List),
    ('port_2_post_combo_count', pl.List),
    ('port_2_post_flags_value', pl.List),
    ('port_2_post_hit_stun', pl.List),
    ('port_2_post_airborne', pl.List),
    ('port_2_post_ground', pl.List),
    ('port_2_post_jumps', pl.List),
    ('port_2_post_l_cancel', pl.List),

    # Item (You can uncomment and add these fields if needed)
    # ('item_type_value', pl.Object),
    # ('item_state', pl.Object),
    # ('item_direction', pl.Object),
    # ('item_velocity_x', pl.Object),
    # ('item_velocity_y', pl.Object),
    # ('item_position_x', pl.Object),
    # ('item_position_y', pl.Object),
    # ('item_damage', pl.Object),
    # ('item_timer', pl.Object),
    # ('item_spawn_id', pl.Object),
]



In [None]:
# # Set the number of time steps in the model inputs
# frames_per_input = 60 * 12  # 12 seconds of gameplay

dataset_path = './Slippi_Public_Dataset_v3/'

# # List of file names
slp_files = [file for file in os.listdir(dataset_path)]

# # slp_file = slp_files[10]

# # A single file path
# # file_path = os.path.join(dataset_path, slp_file)
# num_files = 1

# # game_data_df = pl.DataFrame([], schema=schema)
# manager = Manager()
# game_data_list = manager.list()
# # Use joblib to parallelize processing of SLP files
# Parallel(n_jobs=-1, verbose=10)(delayed(process_slp_file)(slp_file, dataset_path, game_data_list) for slp_file in tqdm.tqdm(slp_files[:num_files]))

# # game_data_df = pl.DataFrame(game_data_list, schema=schema)# Convert the ListProxy to a regular list
# game_data_list_converted = list(game_data_list)
# # print(game_data_list_converted)
# # Now you can create the Polars DataFrame from the converted list
# game_data_df = pl.DataFrame(game_data_list_converted, schema=schema)


# # print(game_data_dict)
# # game_data_new_row = pl.DataFrame(game_data_dict,schema=schema)
# # print(type(game_data_new_row))

# print(game_data_df)
# print()

In [None]:
# game_data_df

In [None]:
# # Assuming `game_data_df` is your Polars DataFrame
# game_data_df.write_parquet('./data/dataframe.parquet')

In [None]:
# # Create a DataFrame with mixed column types
# df = pl.DataFrame({
#     'id': [1, 2, 3],  # Single values
#     'name': ['Alice', 'Bob', 'Charlie'],  # Single values
#     'scores': [np.array([95, 85]), np.array([88, 92]), np.array([70, 75])],  # Lists
#     'tags': [['fast', 'reliable'], ['newbie'], ['experienced', 'consistent']]  # Lists
# })

# print(df)
# df.write_parquet('./data/polars_df.parquet')

In [None]:
# Set the number of time steps in the model inputs
frames_per_input = 60 * 12  # 12 seconds of gameplay

dataset_path = './Slippi_Public_Dataset_v3/'

# List of file names
slp_files = [file for file in os.listdir(dataset_path)]

indices_to_remove = [405, 411, 3494, 8650, 8763, 9725, 14783, 15039, 15238, 15396, 15572, 15573, 15824, 16165, 16333, 16505, 16681, 17046, 17346, 17608, 17927, 18707, 18985, 19273, 20116, 20461, 20929, 21145, 21206, 21382, 21498, 21962, 21967, 22411, 22864, 22983, 23278, 23641, 23745, 24072, 24989, 25259, 40004, 40806, 41068, 41388, 41734, 41966, 42294, 43252, 44974, 46381, 46568, 47153, 47375, 47770, 47817, 47912, 48074, 48077, 48116, 48453, 48578, 48621, 48628, 48638, 48693, 48802, 48841, 49988, 51728, 52841, 52842, 54466, 54467, 54468, 55687, 55688, 55689, 56391, 56392, 56393, 94229, 94230]

# Sort the indices in reverse order
indices_to_remove.sort(reverse=True)

# Remove the entries from the list
for index in indices_to_remove:
    if index < len(slp_files):  # Check if the index is within the current range of the list
        del slp_files[index]
                
# Helper function to create batches
def create_batches(slp_files, batch_size):
    for i in range(0, len(slp_files), batch_size):
        yield slp_files[i:i + batch_size]
        
batch_size = 100

# Create batches of slp_files
slp_batches = list(create_batches(slp_files, batch_size))
# print(slp_batches)
num_batches = -1


for batch_index, slp_batch in enumerate(slp_batches[:num_batches]):
    # print(slp_batch)
    print(batch_index)
    manager = Manager()
    game_data_list = manager.list()
    
    # Use joblib to parallelize processing of SLP files
    Parallel(n_jobs=-1, verbose=0)(delayed(process_slp_file)(slp_file, dataset_path, game_data_list) for slp_file in slp_batch)

    # Convert the ListProxy to a regular list
    game_data_list_converted = list(game_data_list)

    # Now you can create the Polars DataFrame from the converted list
    game_data_df = pl.DataFrame(game_data_list_converted, schema=schema)
    
    # Convert Polars DataFrame to PyArrow Table
    arrow_table = game_data_df.to_arrow()

    # Define file path for the batch
    batch_file_path = os.path.join('./data/all_game_data', f'batch_{batch_index}.parquet')

    # Write the table to a new Parquet file within the directory
    pq.write_table(arrow_table, batch_file_path)



In [None]:
# # Set the number of time steps in the model inputs
# frames_per_input = 60 * 12  # 12 seconds of gameplay

# dataset_path = './Slippi_Public_Dataset_v3/'

# # List of file names
# slp_files = [file for file in os.listdir(dataset_path)]

# # slp_file = slp_files[10]

# # A single file path
# # file_path = os.path.join(dataset_path, slp_file)
# start_at = 405
# start_at = 8650
# start_at = 21967
# start_at = 22411
# start_at = 47770
# # for i in range(10):
#     # start_at = 400
# num_files =1
# print(slp_files[start_at:start_at+num_files])
# print(start_at)

# # game_data_df = pl.DataFrame([], schema=schema)
# manager = Manager()
# game_data_list = manager.list()
# # Use joblib to parallelize processing of SLP files
# Parallel(n_jobs=-1, verbose=10)(delayed(process_slp_file)(slp_file, dataset_path, game_data_list) for slp_file in tqdm.tqdm(slp_files[start_at:start_at+num_files]))

# # game_data_df = pl.DataFrame(game_data_list, schema=schema)# Convert the ListProxy to a regular list
# game_data_list_converted = list(game_data_list)
# # print
# # Now you can create the Polars DataFrame from the converted list
# game_data_df = pl.DataFrame(game_data_list_converted, schema=schema)
# # print(game_data_list_converted)
#     # start_at += 1
    

# # print(game_data_dict)
# # game_data_new_row = pl.DataFrame(game_data_dict,schema=schema)
# # print(type(game_data_new_row))



In [None]:
# Assuming `game_data_df` is your Polars DataFrame
# game_data_df.write_parquet('./data/dataframe.parquet')

In [None]:
# load_df = pl.read_parquet('./data/dataframe.parquet')

In [None]:
# load_df

In [None]:
# slp_files = [file for file in os.listdir(dataset_path)]
# count = 0
# for i, slp_file in enumerate(slp_files):
#     if "Mr. Game & Watch + Mario" in slp_file:
#         count +=1
#         print(i)
# print(count)

In [None]:
slp_files = [file for file in os.listdir(dataset_path)]
index = []
count = 0
for i, slp_file in enumerate(slp_files):
    if "&" in slp_file:
        count +=1
        index += [i]
print(count)
print(index)

In [None]:




# # Function to process a single SLP file and append to shared polars df
# def process_slp_file(slp_file, dataset_path,game_data_list):#,df):#, time_series_list, label_list, ids):
#     try:
#         file_path = os.path.join(dataset_path, slp_file)
#         game = slp.Game(file_path)
#         frames = game.frames

#         if len(frames) <  + frames_per_input:  # Ignore games that are <3600 frames (i.e. <60 seconds)
#             return

#         # List occupied ports
#         occupied_ports = [i for i, port in enumerate(game.start.players) if port is not None]
#         port_1 = occupied_ports[0]
#         port_2 = occupied_ports[1]

#         if len(occupied_ports) > 2:  # Ignore games that aren't singles
#             return
        
#         # port_1_frame_
#         port_1_frame_inputs = get_frame_inputs(frames, port_1)
#         port_2_frame_inputs = get_frame_inputs(frames, port_2)
        
#         game_data_dict = {
#             # Start (Game Data)
#             'file': slp_file,
#             # 'is_teams': game.start.stage.is_teams, # bool
#             # 'players': game.start.stage.players, #tuple
#             # 'random_seed':game.start.stage.random_seed # int
#             # 'slippi': game.start.stage.slippi #Slippi
#             'stage_name': game.start.stage.name,
#             'stage_value': game.start.stage.value,
#             'is_pal': game.start.is_pal,
#             'is_frozen_ps': game.start.is_frozen_ps,
#             # Player 1 Data
#             'port_1': port_1,
#             'port_1_character_name': game.start.players[port_1].character.name,
#             'port_1_character_value': game.start.players[port_1].character.value,
#             'port_1_type_name': game.start.players[port_1].type.name,
#             'port_1_type_value': game.start.players[port_1].type.value,
#             'port_1_stocks': game.start.players[port_1].stocks,
#             'port_1_costume': game.start.players[port_1].costume,
#             # 'port_1_team': game.start.players[port_1].team # team|None
#             'port_1_ucf_dash_back_name': game.start.players[port_1].ucf.dash_back.name,
#             'port_1_ucf_dash_back_value': game.start.players[port_1].ucf.dash_back.value,
#             'port_1_ucf_shield_drop_name': game.start.players[port_1].ucf.shield_drop.name,
#             'port_1_ucf_shield_drop_value': game.start.players[port_1].ucf.shield_drop.value,
#             'port_1_tag': game.start.players[port_1].tag,
#             # Player 2 Data
#             'port_2': port_2,
#             'port_2_character_name': game.start.players[port_2].character.name,
#             'port_2_character_value': game.start.players[port_2].character.value,
#             'port_2_type_name': game.start.players[port_2].type.name,
#             'port_2_type_value': game.start.players[port_2].type.value,
#             'port_2_stocks': game.start.players[port_2].stocks,
#             'port_2_costume': game.start.players[port_2].costume,
#             # 'port_2_team': game.start.players[port_2].team # team|None
#             'port_2_ucf_dash_back_name': game.start.players[port_2].ucf.dash_back.name,
#             'port_2_ucf_dash_back_value': game.start.players[port_2].ucf.dash_back.value,
#             'port_2_ucf_shield_drop_name': game.start.players[port_2].ucf.shield_drop.name,
#             'port_2_ucf_shield_drop_value': game.start.players[port_2].ucf.shield_drop.value,
#             'port_2_tag': game.start.players[port_2].tag,
#             # End
#             'method_how_game_ended': game.end.method, #string
#              # Player 1 Pre Frame data
#             'port_1_pre_state_value': [[frame.ports[port_1].leader.pre.state.value if frame.ports[port_1].leader.pre.state else None for frame in frames[:]]], #object
#             'port_1_pre_state_name': [[frame.ports[port_1].leader.pre.state.name if frame.ports[port_1].leader.pre.state else None for frame in frames[:]]], #object
#             'port_1_pre_position_x': [[frame.ports[port_1].leader.pre.position.x for frame in frames[:]]], #object
#             'port_1_pre_position_y': [[frame.ports[port_1].leader.pre.position.y for frame in frames[:]]], #object
#             'port_1_pre_direction': [[frame.ports[port_1].leader.pre.direction for frame in frames[:]]], #object
#             'port_1_pre_damage':[[frame.ports[port_1].leader.pre.damage for frame in frames[:]]], #object
#             # Player 1 Frame pre Inputs
#             'port_1_DPAD_LEFT': [port_1_frame_inputs[:, 0].tolist()],
#             'port_1_DPAD_RIGHT': [port_1_frame_inputs[:, 1].tolist()],
#             'port_1_DPAD_DOWN': [port_1_frame_inputs[:, 2].tolist()],
#             'port_1_DPAD_UP': [port_1_frame_inputs[:, 3].tolist()],
#             'port_1_Z': [port_1_frame_inputs[:, 4].tolist()],
#             'port_1_R': [port_1_frame_inputs[:, 5].tolist()],
#             'port_1_L': [port_1_frame_inputs[:, 6].tolist()],
#             'port_1_A': [port_1_frame_inputs[:, 7].tolist()],
#             'port_1_B': [port_1_frame_inputs[:, 8].tolist()],
#             'port_1_X': [port_1_frame_inputs[:, 9].tolist()],
#             'port_1_Y': [port_1_frame_inputs[:, 10].tolist()],
#             'port_1_START': [port_1_frame_inputs[:, 11].tolist()],
#             'port_1_J_X': [port_1_frame_inputs[:, 12].tolist()],
#             'port_1_J_Y': [port_1_frame_inputs[:, 13].tolist()],
#             'port_1_C_X': [port_1_frame_inputs[:, 14].tolist()],
#             'port_1_C_Y': [port_1_frame_inputs[:, 15].tolist()],
#             'port_1_T_L': [port_1_frame_inputs[:, 16].tolist()],
#             'port_1_T_R': [port_1_frame_inputs[:, 17].tolist()],
#             # Player 1 post frame data
#             'port_1_post_character_value': [[frame.ports[port_1].leader.post.character.value if frame.ports[port_1].leader.post.character else None for frame in frames[:]]], #object
#             'port_1_post_character_name': [[frame.ports[port_1].leader.post.character.name if frame.ports[port_1].leader.post.character else None for frame in frames[:]]], #object
#             'port_1_post_state_value': [[frame.ports[port_1].leader.post.state.value if frame.ports[port_1].leader.post.state else None for frame in frames[:]]], #object
#             'port_1_post_state_name': [[frame.ports[port_1].leader.post.state.name if frame.ports[port_1].leader.post.state else None for frame in frames[:]]], #object
#             'port_1_post_position_x': [[frame.ports[port_1].leader.post.position.x for frame in frames[:]]], #object
#             'port_1_post_position_y': [[frame.ports[port_1].leader.post.position.y for frame in frames[:]]], #object
#             'port_1_post_direction': [[frame.ports[port_1].leader.post.direction for frame in frames[:]]], #object
#             'port_1_post_damage': [[frame.ports[port_1].leader.post.damage for frame in frames[:]]], #object
#             'port_1_post_shield': [[frame.ports[port_1].leader.post.shield for frame in frames[:]]], #object
#             'port_1_post_stocks': [[frame.ports[port_1].leader.post.stocks for frame in frames[:]]], #object
#             'port_1_post_last_attack_landed_value': [[frame.ports[port_1].leader.post.last_attack_landed.value if frame.ports[port_1].leader.post.last_attack_landed else None for frame in frames[:]]], #object
#             'port_1_post_last_attack_landed_name': [[frame.ports[port_1].leader.post.last_attack_landed.name if frame.ports[port_1].leader.post.last_attack_landed else None for frame in frames[:]]], #object
#             # 'port_1_post_last_hit_by': [[frame.ports[port_1].leader.post.last_hit_by for frame in frames[:]]], #object
#             'port_1_post_combo_count': [[frame.ports[port_1].leader.post.combo_count for frame in frames[:]]], #object
#             # 'port_1_post_state_age': [[frame.ports[port_1].leader.post.state_age for frame in frames[:]]], #object
#             'port_1_post_flags_value': [[frame.ports[port_1].leader.post.flags.value for frame in frames[:]]], #object
#             'port_1_post_hit_stun': [[frame.ports[port_1].leader.post.hit_stun for frame in frames[:]]], #object ??????
#             'port_1_post_airborne': [[frame.ports[port_1].leader.post.airborne for frame in frames[:]]], #object
#             'port_1_post_ground': [[frame.ports[port_1].leader.post.ground for frame in frames[:]]], #object
#             'port_1_post_jumps': [[frame.ports[port_1].leader.post.jumps for frame in frames[:]]], #object
#             'port_1_post_l_cancel': [[frame.ports[port_1].leader.post.l_cancel for frame in frames[:]]], #object
#              # Player 2 Pre Frame data
#             'port_2_pre_state_value': [[frame.ports[port_2].leader.pre.state.value if frame.ports[port_2].leader.pre.state else None for frame in frames[:]]], #object
#             'port_2_pre_state_name': [[frame.ports[port_2].leader.pre.state.name if frame.ports[port_2].leader.pre.state else None for frame in frames[:]]], #object
#             'port_2_pre_position_x': [[frame.ports[port_2].leader.pre.position.x for frame in frames[:]]], #object
#             'port_2_pre_position_y': [[frame.ports[port_2].leader.pre.position.y for frame in frames[:]]], #object
#             'port_2_pre_direction': [[frame.ports[port_2].leader.pre.direction for frame in frames[:]]], #object
#             'port_2_pre_damage':[[frame.ports[port_2].leader.pre.damage for frame in frames[:]]], #object
#             # Player 2 Frame pre Inputs
#             'port_2_DPAD_LEFT': [port_2_frame_inputs[:, 0].tolist()],
#             'port_2_DPAD_RIGHT': [port_2_frame_inputs[:, 1].tolist()],
#             'port_2_DPAD_DOWN': [port_2_frame_inputs[:, 2].tolist()],
#             'port_2_DPAD_UP': [port_2_frame_inputs[:, 3].tolist()],
#             'port_2_Z': [port_2_frame_inputs[:, 4].tolist()],
#             'port_2_R': [port_2_frame_inputs[:, 5].tolist()],
#             'port_2_L': [port_2_frame_inputs[:, 6].tolist()],
#             'port_2_A': [port_2_frame_inputs[:, 7].tolist()],
#             'port_2_B': [port_2_frame_inputs[:, 8].tolist()],
#             'port_2_X': [port_2_frame_inputs[:, 9].tolist()],
#             'port_2_Y': [port_2_frame_inputs[:, 10].tolist()],
#             'port_2_START': [port_2_frame_inputs[:, 11].tolist()],
#             'port_2_J_X': [port_2_frame_inputs[:, 12].tolist()],
#             'port_2_J_Y': [port_2_frame_inputs[:, 13].tolist()],
#             'port_2_C_X': [port_2_frame_inputs[:, 14].tolist()],
#             'port_2_C_Y': [port_2_frame_inputs[:, 15].tolist()],
#             'port_2_T_L': [port_2_frame_inputs[:, 16].tolist()],
#             'port_2_T_R': [port_2_frame_inputs[:, 17].tolist()],
#             # Player 2 post frame data
#             'port_2_post_character_value': [[frame.ports[port_2].leader.post.character.value if frame.ports[port_2].leader.post.character else None for frame in frames[:]]], #object
#             'port_2_post_character_name': [[frame.ports[port_2].leader.post.character.name if frame.ports[port_2].leader.post.character else None for frame in frames[:]]], #object
#             'port_2_post_state_value': [[frame.ports[port_2].leader.post.state.value if frame.ports[port_2].leader.post.state else None for frame in frames[:]]], #object
#             'port_2_post_state_name': [[frame.ports[port_2].leader.post.state.name if frame.ports[port_2].leader.post.state else None for frame in frames[:]]], #object
#             'port_2_post_position_x': [[frame.ports[port_2].leader.post.position.x for frame in frames[:]]], #object
#             'port_2_post_position_y': [[frame.ports[port_1].leader.post.position.y for frame in frames[:]]], #object
#             'port_2_post_direction': [[frame.ports[port_2].leader.post.direction for frame in frames[:]]], #object
#             'port_2_post_damage': [[frame.ports[port_2].leader.post.damage for frame in frames[:]]], #object
#             'port_2_post_shield': [[frame.ports[port_2].leader.post.shield for frame in frames[:]]], #object
#             'port_2_post_stocks': [[frame.ports[port_2].leader.post.stocks for frame in frames[:]]], #object
#             'port_2_post_last_attack_landed_value': [[frame.ports[port_2].leader.post.last_attack_landed.value if frame.ports[port_2].leader.post.last_attack_landed else None for frame in frames[:]]], #object
#             'port_2_post_last_attack_landed_name': [[frame.ports[port_2].leader.post.last_attack_landed.name if frame.ports[port_2].leader.post.last_attack_landed else None for frame in frames[:]]], #object
#             # 'port_2_post_last_hit_by': [[frame.ports[port_2].leader.post.last_hit_by for frame in frames[:]]], #object
#             'port_2_post_combo_count': [[frame.ports[port_2].leader.post.combo_count for frame in frames[:]]], #object
#             # 'port_2_post_state_age': [[frame.ports[port_2].leader.post.state_age for frame in frames[:]]], #object
#             'port_2_post_flags_value': [[frame.ports[port_2].leader.post.flags.value for frame in frames[:]]], #object
#             'port_2_post_hit_stun': [[frame.ports[port_2].leader.post.hit_stun for frame in frames[:]]], #object ??????
#             'port_2_post_airborne': [[frame.ports[port_2].leader.post.airborne for frame in frames[:]]], #object
#             'port_2_post_ground': [[frame.ports[port_2].leader.post.ground for frame in frames[:]]], #object
#             'port_2_post_jumps': [[frame.ports[port_2].leader.post.jumps for frame in frames[:]]], #object
#             'port_2_post_l_cancel': [[frame.ports[port_2].leader.post.l_cancel for frame in frames[:]]], #object
#             #Item
#             # 'item_':[np.array([frame.Item.type.value for frame in frames[:]])]# I CANT FIGURE THIS ONE OUT
#             # 'item_state':[np.array([frame.Item.state for frame in frames[:]])],# object
#             # 'item_direction':[np.array([frame.Item.direction for frame in frames[:]])],# object
#             # 'item_velocity_x':[np.array([frame.Item.velocity.x for frame in frames[:]])],# object
#             # 'item_velocity_y':[np.array([frame.Item.velocity.y for frame in frames[:]])],# object
#             # 'item_position_x':[np.array([frame.Item.position.x for frame in frames[:]])],# object
#             # 'item_position_y':[np.array([frame.Item.position.y for frame in frames[:]])],# object
#             # 'item_damage':[np.array([frame.Item.damage for frame in frames[:]])],# object
#             # 'item_timer':[np.array([frame.Item.timer for frame in frames[:]])],# object
#             # 'item_spawn_id':[np.array([frame.Item.spawn_id for frame in frames[:]])],# object
            
#         }
#         game_data_list.append(game_data_dict.values())
#         return game_data_dict
#     except Exception as e:
#         print(f"Error processing {slp_file}: {str(e)}")

            

        


    
# # Create an empty Polars DataFrame with the desired schema
# schema =    [
#     ('file', pl.String),
#     ('stage_name', pl.String),
#     ('stage_value', pl.Int8),
#     ('is_pal', pl.Boolean),
#     ('is_frozen_ps', pl.Boolean),
#     # Player 1 Data
#     ('port_1', pl.Int32),
#     ('port_1_character_name', pl.String),
#     ('port_1_character_value', pl.Int32),
#     ('port_1_type_name', pl.String),
#     ('port_1_type_value', pl.Boolean),
#     ('port_1_stocks', pl.Int32),
#     ('port_1_costume', pl.Int32),
#     ('port_1_ucf_dash_back_name', pl.String),
#     ('port_1_ucf_dash_back_value', pl.Int32),
#     ('port_1_ucf_shield_drop_name', pl.String),
#     ('port_1_ucf_shield_drop_value', pl.Int32),
#     ('port_1_tag', pl.String),
#     # Player 2 Data
#     ('port_2', pl.Int32),
#     ('port_2_character_name', pl.String),
#     ('port_2_character_value', pl.Int32),
#     ('port_2_type_name', pl.String),
#     ('port_2_type_value', pl.Boolean),
#     ('port_2_stocks', pl.Int32),
#     ('port_2_costume', pl.Int32),
#     ('port_2_ucf_dash_back_name', pl.String),
#     ('port_2_ucf_dash_back_value', pl.Int32),
#     ('port_2_ucf_shield_drop_name', pl.String),
#     ('port_2_ucf_shield_drop_value', pl.Int32),
#     ('port_2_tag', pl.String),
#      # End
#     ('method_how_game_ended',pl.String),
#     #  # Player 1 Pre Frame data
#     ('port_1_pre_state_value',pl.List),
#     ('port_1_pre_state_name',pl.List),
#     ('port_1_pre_position_x',pl.List),
#     ('port_1_pre_position_y',pl.List),
#     ('port_1_pre_direction',pl.List),
#     ('port_1_pre_damage',pl.List),
#         # pre Input Data for Player 1
#     ('port_1_DPAD_LEFT', pl.List),
#     ('port_1_DPAD_RIGHT', pl.List),
#     ('port_1_DPAD_DOWN', pl.List),
#     ('port_1_DPAD_UP', pl.List),
#     ('port_1_Z', pl.List),
#     ('port_1_R', pl.List),
#     ('port_1_L', pl.List),
#     ('port_1_A', pl.List),
#     ('port_1_B', pl.List),
#     ('port_1_X', pl.List),
#     ('port_1_Y', pl.List),
#     ('port_1_START', pl.List),
#     ('port_1_J_X', pl.List),
#     ('port_1_J_Y', pl.List),
#     ('port_1_C_X', pl.List),
#     ('port_1_C_Y', pl.List),
#     ('port_1_T_L', pl.List),
#     ('port_1_T_R', pl.List),
#     # Player 1 post frame data
#     ('port_1_post_character_value', pl.List),
#     ('port_1_post_character_name', pl.List),
#     ('port_1_post_state_value', pl.List),
#     ('port_1_post_state_name', pl.List),
#     ('port_1_post_position_x', pl.List),
#     ('port_1_post_position_y', pl.List),
#     ('port_1_post_direction', pl.List),
#     ('port_1_post_damage', pl.List),
#     ('port_1_post_shield', pl.List),
#     ('port_1_post_stocks', pl.List),
#     ('port_1_post_last_attack_landed_value', pl.List),
#     ('port_1_post_last_attack_landed_name', pl.List),
#     ('port_1_post_combo_count', pl.List),
#     ('port_1_post_flags_value', pl.List),
#     ('port_1_post_hit_stun', pl.List),
#     ('port_1_post_airborne', pl.List),
#     ('port_1_post_ground', pl.List),
#     ('port_1_post_jumps', pl.List),
#     ('port_1_post_l_cancel', pl.List),
#     # Player 1 Pre Frame data
#     ('port_2_pre_state_value',pl.List),
#     ('port_2_pre_state_name',pl.List),
#     ('port_2_pre_position_x',pl.List),
#     ('port_2_pre_position_y',pl.List),
#     ('port_2_pre_direction',pl.List),
#     ('port_2_pre_damage',pl.List),
#     # pre Input Data for Player 1
#     ('port_2_DPAD_LEFT', pl.List),
#     ('port_2_DPAD_RIGHT', pl.List),
#     ('port_2_DPAD_DOWN', pl.List),
#     ('port_2_DPAD_UP', pl.List),
#     ('port_2_Z', pl.List),
#     ('port_2_R', pl.List),
#     ('port_2_L', pl.List),
#     ('port_2_A', pl.List),
#     ('port_2_B', pl.List),
#     ('port_2_X', pl.List),
#     ('port_2_Y', pl.List),
#     ('port_2_START', pl.List),
#     ('port_2_J_X', pl.List),
#     ('port_2_J_Y', pl.List),
#     ('port_2_C_X', pl.List),
#     ('port_2_C_Y', pl.List),
#     ('port_2_T_L', pl.List),
#     ('port_2_T_R', pl.List),
#     # Player 1 post frame data
#     ('port_2_post_character_value', pl.List),
#     ('port_2_post_character_name', pl.List),
#     ('port_2_post_state_value', pl.List),
#     ('port_2_post_state_name', pl.List),
#     ('port_2_post_position_x', pl.List),
#     ('port_2_post_position_y', pl.List),
#     ('port_2_post_direction', pl.List),
#     ('port_2_post_damage', pl.List),
#     ('port_2_post_shield', pl.List),
#     ('port_2_post_stocks', pl.List),
#     ('port_2_post_last_attack_landed_value', pl.List),
#     ('port_2_post_last_attack_landed_name', pl.List),
#     ('port_2_post_combo_count', pl.List),
#     ('port_2_post_flags_value', pl.List),
#     ('port_2_post_hit_stun', pl.List),
#     ('port_2_post_airborne', pl.List),
#     ('port_2_post_ground', pl.List),
#     ('port_2_post_jumps', pl.List),
#     ('port_2_post_l_cancel', pl.List),

#     # Item (You can uncomment and add these fields if needed)
#     # ('item_type_value', pl.Object),
#     # ('item_state', pl.Object),
#     # ('item_direction', pl.Object),
#     # ('item_velocity_x', pl.Object),
#     # ('item_velocity_y', pl.Object),
#     # ('item_position_x', pl.Object),
#     # ('item_position_y', pl.Object),
#     # ('item_damage', pl.Object),
#     # ('item_timer', pl.Object),
#     # ('item_spawn_id', pl.Object),
# ]



In [None]:
# # Set the number of time steps in the model inputs
# frames_per_input = 60 * 12  # 12 seconds of gameplay

# dataset_path = './Slippi_Public_Dataset_v3/'

# # List of file names
# slp_files = [file for file in os.listdir(dataset_path)]

# # slp_file = slp_files[10]

# # A single file path
# # file_path = os.path.join(dataset_path, slp_file)
# num_files = 1

# # game_data_df = pl.DataFrame([], schema=schema)
# manager = Manager()
# game_data_list = manager.list()
# # Use joblib to parallelize processing of SLP files
# Parallel(n_jobs=-1, verbose=10)(delayed(process_slp_file)(slp_file, dataset_path, game_data_list) for slp_file in tqdm.tqdm(slp_files[:num_files]))

# # game_data_df = pl.DataFrame(game_data_list, schema=schema)# Convert the ListProxy to a regular list
# game_data_list_converted = list(game_data_list)
# # print(game_data_list_converted)
# # Now you can create the Polars DataFrame from the converted list
# game_data_df = pl.DataFrame(game_data_list_converted, schema=schema)


# # print(game_data_dict)
# # game_data_new_row = pl.DataFrame(game_data_dict,schema=schema)
# # print(type(game_data_new_row))

# print(game_data_df)
# # print()

In [None]:
# # Assuming `game_data_df` is your Polars DataFrame
# game_data_df.write_parquet('./data/dataframe.parquet')