In [11]:
import pandas as pd
import xml.etree.ElementTree as ET
from collections import defaultdict
import zipfile


# Function to extract and parse XML from a zip file
def extract_and_parse_xml(zip_path, file_name):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        with zip_ref.open(file_name) as xml_file:
            # Read the content of the file
            xml_content = xml_file.read()
            # Parse the XML content using lxml
            root = ET.fromstring(xml_content)
    return root

# Load the XML file
root = extract_and_parse_xml('C:/source/MicroRTS/tournament_5/traces/0-vs-0-0-0.zip', 'game.xml')

# Initialize lists to hold extracted data
data = []


# Iterate through each TraceEntry to extract timestep data
for entry in root.findall('.//rts.TraceEntry'):
    time = int(entry.get('time'))
    
    # Extract players' data
    players = entry.findall('.//rts.Player')
    player_data = {f"player_{player.get('ID')}_resources": int(player.get('resources')) for player in players}
    
    # Initialize counters for unit types
    unit_counts = defaultdict(int)
    
    # Extract units' data and count unit types for each player
    units = entry.findall('.//rts.units.Unit')
    for unit in units:
        unit_type = unit.get('type')
        player_id = unit.get('player')
        if player_id != '-1':  # Exclude neutral units like resources
            unit_counts[f"player_{player_id}_{unit_type}_units"] += 1
    
    # Extract actions' data
    action_counts = defaultdict(int)
    actions = entry.findall('.//action')
    for action in actions:
        unit_id = action.get('unitID')
        unit_action = action.find('UnitAction')
        if unit_action is not None:
            action_type = unit_action.get('type')
            parameter = unit_action.get('parameter')
            for unit in units:
                if unit.get('ID') == unit_id:
                    player_id = unit.get('player')
                    break
            action_counts[f"player_{player_id}_action_type_{action_type}_count"] += 1
            action_counts[f"player_{player_id}_action_parameter_{parameter}_count"] += 1
    
    # Combine data for this timestep
    timestep_data = {'time': time}
    timestep_data.update(player_data)
    timestep_data.update(unit_counts)
    timestep_data.update(action_counts)
    
    # Add to data list
    data.append(timestep_data)

# Create DataFrame
df = pd.DataFrame(data)

# Fill NaN values with 0 (in case there are no actions of certain types in some timesteps)
df = df.fillna(0)

# Display the DataFrame
df.head()


Unnamed: 0,time,player_0_resources,player_1_resources,player_0_Base_units,player_1_Base_units,player_0_Worker_units,player_1_Worker_units,player_0_action_type_4_count,player_0_action_parameter_0_count,player_0_action_type_2_count,...,player_1_action_parameter_0_count,player_1_action_parameter_2_count,player_0_action_type_5_count,player_0_action_parameter_None_count,player_1_action_type_5_count,player_1_action_parameter_None_count,player_0_action_type_0_count,player_0_action_parameter_10_count,player_1_action_type_0_count,player_1_action_parameter_10_count
0,0,5,5,1,1.0,1,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,5,5,1,1.0,1,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,20,5,5,1,1.0,1,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,30,5,5,1,1.0,1,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,40,6,6,1,1.0,1,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
