In [1]:
from carball import analyze_replay_file
from carball.analysis.analysis_manager import AnalysisManager
import logging

def parse(replay_path: str) -> AnalysisManager:
    return analyze_replay_file(replay_path, logging_level=logging.FATAL)

In [3]:
import os

import pandas as pd

from carball.generated.api.metadata.game_metadata_pb2 import Playlist
from carball.generated.api.metadata import game_metadata_pb2

from DataCleaning import formatFrames

replay_directory = os.path.join(os.getcwd(), "replays")

valid_playlists = [
    game_metadata_pb2.UNRANKED_DUELS,
    game_metadata_pb2.UNRANKED_DOUBLES,
    game_metadata_pb2.UNRANKED_STANDARD,
    game_metadata_pb2.CUSTOM_LOBBY,
    game_metadata_pb2.RANKED_DUELS,
    game_metadata_pb2.RANKED_DOUBLES,
    game_metadata_pb2.RANKED_SOLO_STANDARD,
    game_metadata_pb2.RANKED_STANDARD,
    game_metadata_pb2.TOURNAMENT,
]

player_columns_duels = ["blue_player_0", "orange_player_0"]
player_columns_doubles = ["blue_player_0", "blue_player_1", "orange_player_0", "orange_player_1"]
player_columns_standard = ["blue_player_0", "blue_player_1", "blue_player_2", "orange_player_0", "orange_player_1", "orange_player_2"]

player_columns = ["pos_x", "pos_y", "pos_z", "vel_x", "vel_y", "vel_z", "ang_vel_x", "ang_vel_y", "ang_vel_z", "rot_x", "rot_y", "rot_z", "boost", "has_left"]

player_columns_duels = [(player, column) for player in player_columns_duels for column in player_columns]
player_columns_doubles = [(player, column) for player in player_columns_doubles for column in player_columns]
player_columns_standard = [(player, column) for player in player_columns_standard for column in player_columns]

ball_columns = [("ball", sub_column) for sub_column in ["pos_x", "pos_y", "pos_z", "vel_x", "vel_y", "vel_z"]]
game_columns = [("game", sub_column) for sub_column in ["seconds_remaining", "is_overtime", "goal_differential"]]
targe_columns = [("target", "winner")]

duels_df = pd.DataFrame(columns=pd.MultiIndex.from_tuples(player_columns_duels + ball_columns + game_columns + targe_columns))
doubles_df = pd.DataFrame(columns=pd.MultiIndex.from_tuples(player_columns_doubles + ball_columns + game_columns + targe_columns))
standard_df = pd.DataFrame(columns=pd.MultiIndex.from_tuples(player_columns_standard + ball_columns + game_columns + targe_columns))

In [3]:
for replay in os.listdir(replay_directory):
    if replay.endswith(".replay"):
        try:
            am = parse(os.path.join(replay_directory, replay))

            if am.protobuf_game.game_metadata.playlist not in valid_playlists:
                raise Exception(f"Invalid playlist: {Playlist.Name(am.protobuf_game.game_metadata.playlist)}\nID: {am.protobuf_game.game_metadata.playlist}")
            
            if am.protobuf_game.mutators.game_mutator_index != 0 and am.protobuf_game.mutators.game_mutator_index != -1:
                raise Exception(f"Invalid mutator index: {am.protobuf_game.mutators.game_mutator_index}")
            
            try:
                frames = formatFrames(am)
            except Exception as e:
                raise Exception(f"Failed to format frames: {e}")
            
            sampled_frames = frames.sample(frac=.1)
            
            if len(frames.columns) == len(duels_df.columns):
                print(f"Parsed {replay} as duels")
                duels_df = pd.concat([duels_df, sampled_frames], ignore_index=True)
            elif len(frames.columns) == len(doubles_df.columns):
                print(f"Parsed {replay} as doubles")
                doubles_df = pd.concat([doubles_df, sampled_frames], ignore_index=True)
            elif len(frames.columns) == len(standard_df.columns):
                print(f"Parsed {replay} as standard")
                standard_df = pd.concat([standard_df, sampled_frames], ignore_index=True)
            else:
                raise Exception(f"Invalid number of columns: {len(frames.columns)}")
        except Exception as e:
            print(f"Failed to parse {replay}")
            print(e)


Failed to parse 00762cfb-4535-46ae-986d-b78015a9a11d.replay
Invalid playlist: RANKED_HOOPS
ID: 27


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['time_till_power_up'] = math.nan
  header = _wrap_header_guess_version(header)


Failed to parse 009ddea4-874a-4eba-80b9-46cfbbf2f9f5.replay
Invalid playlist: RANKED_RUMBLE
ID: 28
Failed to parse 031ac835-bf0b-427d-bb9c-5cca12d423a6.replay
No objects to concatenate
Parsed 03aef3ad-55a8-494d-a866-a77a89354c0b.replay as standard
Parsed 0463c19c-2a70-41e0-8ff7-464a8376da5d.replay as duels
Parsed 06759dde-0ef6-4193-b555-cec1f60efa03.replay as duels
Parsed 06fb04da-8a1e-4f6c-989e-e3ddcb16c01f.replay as standard
Parsed 078bf541-23b7-4a51-95a7-d3ef4ef95773.replay as duels
Parsed 0970dcff-af43-4b0e-9548-6ba613067f25.replay as doubles
Failed to parse 0a58a944-6fe6-4a0f-91ef-8f01c286294c.replay
Invalid playlist: UNKNOWN
ID: 0
Parsed 0b24cdff-b58d-45c0-8a71-ee546627dca3.replay as standard
Parsed 0b498b41-049e-4c19-ab3f-112155918dcf.replay as doubles
Parsed 0c339d88-b9f6-4ac5-9a48-063d07abbf71.replay as standard
Parsed 0d9d635b-08af-4353-89a7-9cff00904fef.replay as doubles
Failed to parse 0f774585-3c98-40b8-af39-77e56a78c137.replay
Failed to format frames: Bots are not support

In [4]:
datasets_directory = os.path.join(os.getcwd(), "datasets")

duels_df.sample(frac=1).to_csv(os.path.join(datasets_directory, "duels.csv"), index=False)
doubles_df.sample(frac=1).to_csv(os.path.join(datasets_directory, "doubles.csv"), index=False)
standard_df.sample(frac=1).to_csv(os.path.join(datasets_directory, "standard.csv"), index=False)