<h1> Data Extraction </h1>
We extract all the data from all the replay files and store it in a sensible way. We use a modified version of py-slippi that handles the lack of metadata in the ranked dataset.

In [1]:
import os as os
import numpy as np
import pandas as pd
# import polars as pl
import tqdm
import slippi as slp
from joblib import Parallel, delayed
from multiprocessing import Manager
# import pyarrow as pa
# import pyarrow.parquet as pq
import gzip
import pickle
import feather

<h2> Define paths </h2>
We define paths to our datasets.

In [2]:
ranked_path = 'D:\\ranked\\ranked-anonymized-1-116248\\ranked-anonymized'
public_path = 'C:\\Users\\jaspa\\Grant ML\\Slippi_Public_Dataset_v3'


<h2> Extract Simple Data </h2>
Extract the data that will be stored in the data frame. Need to handle the cases where the data is none or not there. Esspecially metadata.

In [1]:
#get the overall metadata from the start of the game
def get_event_start_metadata(game):
    ################
    # Start Metadata
    ################
    start_metadata_dict = {
        'is_teams': game.start.is_teams, #bool # We only take two player games
        # 'players': game.start.players, #tuple that contains players by port.
        'random_seed': game.start.random_seed, #int
        'slippi': game.start.slippi, #slippi
        'stage_name': game.start.stage.name, #string
        # 'stage_value': game.start.stage.value, #int
        'is_pal': game.start.is_pal, #bool
        'is_frozen_ps': game.start.is_frozen_ps, #bool
    }
    return start_metadata_dict

# get the player data from a single port
def get_event_start_player_data(game, port, player_number):
    start_player_data_dict = {
        f'player_{player_number}_port': port, #int
        f'player_{player_number}_character_name': game.start.players[port].character.name, #string
        # f'player_{player_number}_character_value': game.start.players[port_1].character.value, #int
        f'player_{player_number}_type_name': game.start.players[port].type.name, #string
        f'player_{player_number}_type': game.start.players[port].type.name, #string
        # f'player_{player_number}_type_value': game.start.players[port].type.value, #int
        f'player_{player_number}_stocks': game.start.players[port].stocks, #int
        f'player_{player_number}_costume': game.start.players[port].costume, #int
        f'player_{player_number}_costume': game.start.players[port].team.value, #int
        f'player_{player_number}_ucf_shield_drop_name': game.start.players[port].ucf.shield_drop.name, #string
        # f'player_{player_number}_ucf_shield_drop_value': game.start.players[port_1].ucf.shield_drop.value, #int
        f'player_{player_number}_tag': game.start.players[port].tag, #string
        f'player_{player_number}_display_name': game.start.player[port].display_name
    }
    return start_player_data_dict

# Extract all the data from the slippi.event.end module
def get_event_end_data(game):
    end_data_dict = {
        'end_method_name' : game.end.method, #string
        # 'end_method_value' : game.end.method.value, #int
        'lras_initiator' : game.end.lras_initiator #int
    }
    return end_data_dict

# Function to determine who won the game
# 0 if inconclusive
# 1 or 2 if conclusive
def determine_winner(game,occupied_ports):
    #To do
    return 

# Some games won't have metadata
def get_metadata(game, occupied_ports):
   #To do
   return


<h2> Frame Data Functions </h2>
These are the functions that we will call to extract all the frame data and one-hot encode some of it. The outputs are numpy arrays. I am not removing the first 123 frames.

In [None]:
# # Function to extract frames.pre data
# def get_frames_pre_data(frames, port):
#     # self.state = state #: :py:class:`slippi.id.ActionState` | int: Character's action state
#     # self.position = position #: :py:class:`Position`: Character's position
#     # self.direction = direction #: :py:class:`Direction`: Direction the character is facing
#     # self.joystick = joystick #: :py:class:`Position`: Processed analog joystick position
#     # self.cstick = cstick #: :py:class:`Position`: Processed analog c-stick position
#     # self.triggers = triggers #: :py:class:`Triggers`: Trigger state
#     # self.buttons = buttons #: :py:class:`Buttons`: Button state
#     # self.random_seed = random_seed #: int: Random seed at this point
#     # self.raw_analog_x = raw_analog_x #: int | None: `added(1.2.0)` Raw x analog controller input (for UCF)
#     # self.damage = damage #: float | None: `added(1.4.0)` Current damage percent
    
#     pre_data = np.empty((4,len(frames)),dtype = np.float)
#     # self.state = state #: :py:class:`slippi.id.ActionState` | int: Character's action state
#     # self.position = position #: :py:class:`Position`: Character's position
#     # self.direction = direction #: :py:class:`Direction`: Direction the character is facing
#     # self.damage = damage #: float | None: `added(1.4.0)` Current damage percent
    
#     pre_input_data = np.empty((4,len(frames)),dtype = np.float)
    
#     for i, frame in enumerate(frames):
#         integer_data[0,i] = frame.ports[port].leader.pre.state.value
#         integer_data[1,i] = frame.ports[port].leader.pre.direction.value
#         integer_data[2,i] = frame.ports[port].leader.pre.buttons.logical.value
#         integer_data[3,i] = frame.ports[port].leader.pre.buttons.physical.value
        
#         float_data[0,i] = frame.ports[port].leader.pre.position.x
#         float_data[1,i] = frame.ports[port].leader.pre.position.y
#         float_data[2,i] = frame.ports[port].leader.pre.joystick.x
#         float_data[3,i] = frame.ports[port].leader.pre.joystick.y
#         float_data[4,i] = frame.ports[port].leader.pre.cstick.x
#         float_data[5,i] = frame.ports[port].leader.pre.cstick.y  
#         float_data[6,i] = frame.ports[port].leader.pre.trigger.logical
#         float_data[7,i] = frame.ports[port].leader.pre.trigger.physical.l
#         float_data[8,i] = frame.ports[port].leader.pre.trigger.physical.r
    
#     return integer_data, float_data

# # return np containing floats and np containing int.
# def get_frames_post_data(frames,port):
#     class Post(
#         character, #int
#         state, #slippi.id.ActionState: int 0-382
#         position_x, #float
#         position_y,#float
#         direction, #int
#         damage, #float
#         shield, #float
#         stocks, #int
#         last_attack_landed, #int
#         last_hit_by, #int
#         combo_count, #int
#         state_age=None, #float | none
#         flags=None, #int 16-549755813888 int64
#         hit_stun=None,#float
#         airborne=None,#bool
#         ground=None,#int
#         jumps=None, #int
#         l_cancel=None # success = 1, failure = 2
#         )

        

<h2> Get Frames </h2>
A function to put all the frame data from a single port into a pandas df.

In [26]:
# Function to extract frame data
def get_frames_df(frames, port):
    # Pre
    # self.state = state #: :py:class:`slippi.id.ActionState` | int: Character's action state
    # self.position = position #: :py:class:`Position`: Character's position
    # self.direction = direction #: :py:class:`Direction`: Direction the character is facing
    # self.joystick = joystick #: :py:class:`Position`: Processed analog joystick position
    # self.cstick = cstick #: :py:class:`Position`: Processed analog c-stick position
    # self.triggers = triggers #: :py:class:`Triggers`: Trigger state
    # self.buttons = buttons #: :py:class:`Buttons`: Button state
    # self.random_seed = random_seed #: int: Random seed at this point
    # self.raw_analog_x = raw_analog_x #: int | None: `added(1.2.0)` Raw x analog controller input (for UCF)
    # self.damage = damage #: float | None: `added(1.4.0)` Current damage percent
    
    # Post
    # character: sid.InGameCharacter #: In-game character (can only change for Zelda/Sheik). Check on first frame to determine if Zelda started as Sheik
    # state: Union[sid.ActionState, int] #: Character's action state
    # position: Position #: Character's position
    # direction: Direction #: Direction the character is facing
    # damage: float #: Current damage percent
    # shield: float #: Current size of shield
    # stocks: int #: Number of stocks remaining
    # last_attack_landed: Union[Attack, int] #: Last attack that this character landed
    # last_hit_by: Optional[int] #: Port of character that last hit this character
    # combo_count: int #: Combo count as defined by the game
    # state_age: Optional[float] #: `added(0.2.0)` Number of frames action state has been active. Can have a fractional component for certain actions
    # flags: Optional[StateFlags] #: `added(2.0.0)` State flags
    # hit_stun: Optional[float] #: `added(2.0.0)` Number of hitstun frames remaining
    # airborne: Optional[bool] #: `added(2.0.0)` True if character is airborne
    # ground: Optional[int] #: `added(2.0.0)` ID of ground character is standing on, if any
    # jumps: Optional[int] #: `added(2.0.0)` Jumps remaining
    # l_cancel: Optional[LCancel] #: `added(2.0.0)` L-cancel status, if any
    column_names = ['frame_index',
                    # 
                    'pre_state', 'pre_position_x','pre_position_y','pre_direction',
                    'pre_joystick_x','pre_joystick_y', 'pre_cstick_x', 'pre_cstick_y',
                    'pre_triggers_logical','pre_triggers_physical_l','pre_triggers_physical_r',
                    'pre_buttons_logical','pre_buttons_physical',
                    'pre_random_seed','pre_raw_analog_x', 'pre_damage',
                    # 
                    'post_character',
                    'post_state','post_position_x','post_position_y','post_direction',
                    'post_damage','post_sheild','post_stocks',
                    'post_last_attack_landed','post_last_hit_by','post_combo_count',
                    'post_state_age','post_flags','post_hit_stun',
                    'post_airbourn', 'post_ground','post_jumps','post_l_cancel'
                    ]
    
    frame_data = []
    
    for frame in frames:
        frame_data.append([
            frame.index, # To remind us that it starts at -123
            # Pre
            frame.ports[port].leader.pre.state.value,
            frame.ports[port].leader.pre.position.x,
            frame.ports[port].leader.pre.position.y,
            frame.ports[port].leader.pre.direction.value,
            #
            frame.ports[port].leader.pre.joystick.x,
            frame.ports[port].leader.pre.joystick.y,
            frame.ports[port].leader.pre.cstick.x,
            frame.ports[port].leader.pre.cstick.y,
            #
            frame.ports[port].leader.pre.triggers.logical,
            frame.ports[port].leader.pre.triggers.physical.l,
            frame.ports[port].leader.pre.triggers.physical.r,
            #
            frame.ports[port].leader.pre.buttons.logical.value,
            frame.ports[port].leader.pre.buttons.physical.value,
            #
            frame.ports[port].leader.pre.random_seed,
            frame.ports[port].leader.pre.raw_analog_x,
            frame.ports[port].leader.pre.damage,
            # Post
            frame.ports[port].leader.post.character.value,
            #
            frame.ports[port].leader.post.state.value,
            frame.ports[port].leader.post.position.x,
            frame.ports[port].leader.post.position.y,
            frame.ports[port].leader.post.direction.value,
            #
            frame.ports[port].leader.post.damage,
            frame.ports[port].leader.post.shield,
            frame.ports[port].leader.post.stocks,
            #
            frame.ports[port].leader.post.last_attack_landed.value if frame.ports[port].leader.post.last_attack_landed else None,
            frame.ports[port].leader.post.last_hit_by if frame.ports[port].leader.post.last_hit_by else None,
            frame.ports[port].leader.post.combo_count,
            #
            frame.ports[port].leader.post.state_age if frame.ports[port].leader.post.state_age else None,
            frame.ports[port].leader.post.flags.value if frame.ports[port].leader.post.flags else None,
            frame.ports[port].leader.post.hit_stun if frame.ports[port].leader.post.hit_stun else None,
            #
            frame.ports[port].leader.post.airborne if frame.ports[port].leader.post.airborne else None,
            frame.ports[port].leader.post.ground if frame.ports[port].leader.post.ground else None,
            frame.ports[port].leader.post.jumps if frame.ports[port].leader.post.jumps else None,
            frame.ports[port].leader.post.l_cancel.value if frame.ports[port].leader.post.l_cancel else None
            ])

    return pd.DataFrame(frame_data, columns=column_names)
    

<h2> Process Fuction </h2>
A function to process a .slp file path. Save the frame data in character subfolder of public, ranked, and mango depending on which dataset the game comes from. We want each frame data file to be saved with a unique name that is relatively short (I think there is a package to generate unique codes and then we would put underscore followed by the port.)  We will save all the data, even if we don't think will use it.

In [None]:


# Each iteration of this creates a row of the dataframe and appends it to the dataframe.
def process_slp_file(slp_file, dataset_path, save_path, no_teams_2_players,no_teams_3_players,no_teams_4_players,teams_3_players,teams_4_players):
    try:
        file_path = os.path.join(dataset_path, slp_file)
        # slp_file_name = slp_file.removesuffix('.slp')
        
        game = slp.Game(file_path)
        
        # We do this for every game
        game_data_dict = get_event_start_metadata(game)
        
        occupied_ports = [i for i, port in enumerate(game.start.players) if port is not None]
        
        # add player data to the game dictionary
        for i, port in enumerate(occupied_ports): 
            game_data_dict.update(get_event_start_player_data(game,port, i+1))
        
        game_data_dict.update(get_event_end_data(game))
        
        game_data_dict.update(determine_winner(game,occupied_ports))
        
        # handle the case that there is no metadata
        if game.metadata is not None:
            game_data_dict.update(get_metadata(game, occupied_ports))
            
            
        # Get the frame data and save it
        frames = game.frames
        
        for port in occupied_ports:
            frame_df = get_frames_df(frames, port)
            #save the data frame as either parquet or feather depending on which is faster to open
            #append the path to the frame data for this port to game_data_dict
            
        #update the game_data_dict to the right list initilized with the manager
        #no_teams_2_players.append(game_data_dict)
        #no_teams_3_players.append(game_data_dict)
        #no_teams_4_players.append(game_data_dict)
        #teams_3_players.append(game_data_dict)
        #teams_4_players.append(game_data_dict)
        
        return
             
    except Exception as e:
        print(f"Error processing {slp_file}: {str(e)}")

In [27]:
slp_file = "D:\\Mango\\Slippi-20240312T153007Z-001\\Slippi\\Game_20220505T172728.slp"
game = slp.Game(slp_file)
frames = game.frames

print(len(frames))
# List occupied ports
occupied_ports = [i for i, port in enumerate(game.start.players) if port is not None]
print(occupied_ports)
port_1 = occupied_ports[0]
port_2 = occupied_ports[1]


131
[0, 1]


In [28]:
port_1_frame_df = get_frames_df(frames,port_1)

In [29]:
port_1_frame_df.head()

Unnamed: 0,frame_index,pre_state,pre_position_x,pre_position_y,pre_direction,pre_joystick_x,pre_joystick_y,pre_cstick_x,pre_cstick_y,pre_triggers_logical,...,post_last_attack_landed,post_last_hit_by,post_combo_count,post_state_age,post_flags,post_hit_stun,post_airbourn,post_ground,post_jumps,post_l_cancel
0,-123,322,-60.0,10.0,1,0.0,0.0,0.0,0.0,0.0,...,,,0,-1.0,274877900000.0,5.605194e-45,True,,1,
1,-122,322,-60.0,10.0,1,0.0,0.0,0.0,0.0,0.0,...,,,0,-1.0,274877900000.0,4.203895e-45,True,,1,
2,-121,322,-60.0,10.0,1,0.0,0.0,0.0,0.0,0.0,...,,,0,-1.0,274877900000.0,2.802597e-45,True,,1,
3,-120,322,-60.0,10.0,1,0.0,0.0,0.0,0.0,0.0,...,,,0,-1.0,274877900000.0,1.401298e-45,True,,1,
4,-119,322,-60.0,10.0,1,0.0,0.0,0.0,0.0,0.0,...,,,0,-1.0,274877900000.0,,True,,1,


In [31]:
port_1_frame_df.columns

Index(['frame_index', 'pre_state', 'pre_position_x', 'pre_position_y',
       'pre_direction', 'pre_joystick_x', 'pre_joystick_y', 'pre_cstick_x',
       'pre_cstick_y', 'pre_triggers_logical', 'pre_triggers_physical_l',
       'pre_triggers_physical_r', 'pre_buttons_logical',
       'pre_buttons_physical', 'pre_random_seed', 'pre_raw_analog_x',
       'pre_damage', 'post_character', 'post_state', 'post_position_x',
       'post_position_y', 'post_direction', 'post_damage', 'post_sheild',
       'post_stocks', 'post_last_attack_landed', 'post_last_hit_by',
       'post_combo_count', 'post_state_age', 'post_flags', 'post_hit_stun',
       'post_airbourn', 'post_ground', 'post_jumps', 'post_l_cancel'],
      dtype='object')