In [13]:
import pandas as pd
import os
import zipfile
import xml.etree.ElementTree as ET

In [23]:
# HELPER FUNCTIONS

def parse_unit_type_table(unit_type_table):
    unit_types = []
    for unit_type in unit_type_table:
        unit_data = unit_type.attrib
        for child in unit_type:
            if child.tag == 'produces' or child.tag == 'producedBy':
                if child.tag not in unit_data:
                    unit_data[child.tag] = []
                unit_data[child.tag].append(child.attrib)
        unit_types.append(unit_data)
    return unit_types

def parse_trace_entry(trace_entry):
    entry_data = {'time': trace_entry.attrib['time']}
    
    physical_game_state = trace_entry.find('rts.PhysicalGameState')
    entry_data.update(physical_game_state.attrib)
    
    terrain = physical_game_state.find('terrain').text
    entry_data['terrain'] = terrain
    
    players = []
    for player in physical_game_state.find('players'):
        players.append(player.attrib)
    entry_data['players'] = players
    
    units = []
    for unit in physical_game_state.find('units'):
        units.append(unit.attrib)
    entry_data['units'] = units
    
    actions = []
    for action in trace_entry.find('actions'):
        action_data = action.attrib
        for ua in action:
            action_data.update(ua.attrib)
        actions.append(action_data)
    entry_data['actions'] = actions
    
    return entry_data

In [27]:
# Define the directory containing the zip files
directory = 'C:/source/MicroRTS/tournament_5/traces'

# Initialize lists to hold all data
all_unit_types = []
all_trace_entries = []

# Get a list of all zip files in the directory
zip_files = [f for f in os.listdir(directory) if f.endswith('.zip')]

# Limit to the first 100 zip files
zip_files = zip_files[:100]

# Loop over all files in the directory
for filename in zip_files:
    if filename.endswith('.zip'):
        # Construct full file path
        file_path = os.path.join(directory, filename)
        
        # Open the zip file using zipfile
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            # Loop through each file in the zip file
            for xml_filename in zip_ref.namelist():
                if xml_filename.endswith('.xml'):
                    # Read the XML file
                    with zip_ref.open(xml_filename) as xml_file:
                        tree = ET.parse(xml_file)
                        root = tree.getroot()
                        
                        # Parse unit type table
                        unit_type_table = root.find('rts.units.UnitTypeTable')
                        if unit_type_table is not None:
                            all_unit_types.extend(parse_unit_type_table(unit_type_table))
                        
                        # Parse trace entries
                        trace_entries = root.findall('entries/rts.TraceEntry')
                        for entry in trace_entries:
                            all_trace_entries.append(parse_trace_entry(entry))

# Convert parsed data to DataFrames
unit_types_df = pd.DataFrame(all_unit_types)
trace_entries_df = pd.DataFrame(all_trace_entries)

# Display the DataFrames
print("Unit Types DataFrame:")
print(unit_types_df)
print("\nTrace Entries DataFrame:")
print(trace_entries_df)

Unit Types DataFrame:
    ID      name cost  hp minDamage maxDamage attackRange produceTime  \
0    0  Resource    1   1         1         1           1          10   
1    1      Base   10  10         1         1           1         250   
2    2  Barracks    5   4         1         1           1         200   
3    3    Worker    1   1         1         1           1          50   
4    4     Light    2   4         2         2           1          80   
..  ..       ...  ...  ..       ...       ...         ...         ...   
695  2  Barracks    5   4         1         1           1         200   
696  3    Worker    1   1         1         1           1          50   
697  4     Light    2   4         2         2           1          80   
698  5     Heavy    2   4         4         4           1         120   
699  6    Ranged    2   1         1         1           3         100   

    moveTime attackTime  ... returnTime harvestAmount sightRadius isResource  \
0         10         