### import libraries

In [176]:
import pandas as pd
import xml.etree.ElementTree as ET
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import zipfile
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import time

### helper functions and classes

In [18]:
# Function to extract and parse XML from a zip file
def extract_and_parse_xml(zip_path, file_name):
    # use the xipfile library to get the content => https://docs.python.org/3/library/zipfile.html
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        with zip_ref.open(file_name) as xml_file:
            # Read the content of the file
            xml_content = xml_file.read()
            # Parse the XML to a python object => https://docs.python.org/3/library/xml.etree.elementtree.html
            root = ET.fromstring(xml_content)
    return root

In [237]:
# helper to find a specific row in the tournament summary file
def find_row(filepath, content):
    with open(filepath, 'r') as file:
        for i, line in enumerate(file):
            if content in line:
                return i

# define the header content globally, it is standard for all tournaments
header_content = 'iteration\tmap\tai1\tai2\ttime\twinner\tcrashed\ttimedout'

# define the header content globally, it is standard for all tournaments
maps_header_content = 'maps'

In [217]:
# class to define an object containing tournament traces
class TournamentAnalysis:

    #### CONSTRUCTOR ####
    # loads games and traces of the of the tournament
    def __init__(self, name):
        self.name = name
        
        # the path to the tournament summary file
        tournament_summary_file = 'C:/source/MicroRTS/' + name + '/tournament.csv'
   
        # Find the number of models used in this tournament
        n_models = find_row(tournament_summary_file, maps_header_content) - 2
        
        # read in the model names
        ai_models = pd.read_table(tournament_summary_file, usecols=[1], sep='\t', skiprows=1, nrows=n_models)
        ai_models_list = ai_models.iloc[:,0].tolist()
        # create a dict so we can use map()
        model_names_dict = {i: name for i, name in enumerate(ai_models_list)}
        
        # Find the row number of the header
        header_row = find_row(tournament_summary_file, header_content)

        # load the tournament summary into a dataframe
        footer_length = (n_models * 7) + 7
        df_games = pd.read_table(tournament_summary_file, sep='\t', skiprows=header_row, skipfooter=footer_length, engine='python')

        # add the model names to the dataframe
        df_games['ai1_name'] = df_games['ai1'].map(model_names_dict)
        df_games['ai2_name'] = df_games['ai2'].map(model_names_dict)
        
        # winner columns, so we know which column won
        df_games['winning_model'] = np.where(df_games['winner'] == 0, df_games['ai1'], np.where(df_games['winner'] == 1, df_games['ai2'], -1))
        df_games['winning_model_name'] = df_games['winning_model'].map(model_names_dict)
        # loser columns, so we know which model was beaten
        df_games['losing_model'] = np.where(df_games['winner'] == 0, df_games['ai2'], np.where(df_games['winner'] == 1, df_games['ai2'], -1))
        df_games['losing_model_name'] = df_games['losing_model'].map(model_names_dict)

        # Initialize list to hold trace data
        trace_data = []
        # the path where traces are stored
        traces_path = 'C:/source/MicroRTS/' + self.name + '/traces/'
        
        # loop through all games in the tournament summary
        for index, row in df_games.iterrows():
        
            # compose the filename where the traces for this game will be stored
            filename = "%d-vs-%d-%d-%d.zip" % (row['ai1'], row['ai2'], row['map'], row['iteration']) 
            # Load the XML file
            root = extract_and_parse_xml(traces_path + filename, 'game.xml')
            
            # Iterate through each TraceEntry to extract timestep data
            for entry in root.findall('.//rts.TraceEntry'):
                # timestep ID if this TraceEntry in the game
                time = int(entry.get('time'))
                
                # Extract players' data
                players = entry.findall('.//rts.Player')
                player_data = {f"player_{player.get('ID')}_resources": int(player.get('resources')) for player in players}
                
                # Initialize counters for unit types
                unit_counts = defaultdict(int)
                
                # Extract units - count unit types for each player
                units = entry.findall('.//rts.units.Unit')
                for unit in units:
                    unit_type = unit.get('type')
                    player_id = unit.get('player')
                    if player_id != '-1':  # Exclude neutral units like resources
                        unit_counts[f"player_{player_id}_{unit_type}_units"] += 1
                
                # Extract actions' data
                action_counts = defaultdict(int)
                # get an array of action tags
                actions = entry.findall('.//action')
                # loop through each action
                for action in actions:
                    # the unit that the action was performed on
                    unit_id = action.get('unitID')
                    # the action that was performed
                    unit_action = action.find('UnitAction')
                    # if an action was defined
                    if unit_action is not None:
                        # action details
                        action_type = unit_action.get('type')
                        parameter = unit_action.get('parameter')
                        for unit in units:
                            if unit.get('ID') == unit_id:
                                player_id = unit.get('player')
                                break
                        # create columns for player and action type, or player and parameter
                        action_counts[f"player_{player_id}_action_type_{action_type}_count"] += 1
                        action_counts[f"player_{player_id}_action_parameter_{parameter}_count"] += 1
                
                # Combine data for this timestep
                timestep_data = {'time': time}
                timestep_data.update(player_data)
                timestep_data.update(unit_counts)
                timestep_data.update(action_counts)
        
                # add label columns
                timestep_data['opponent'] = row['ai2_name']
                timestep_data['game_id'] = filename
                
                # Add to data list of traces
                trace_data.append(timestep_data)
        
        # Create DataFrame from the traces
        df = pd.DataFrame(trace_data)
        
        # Fill NaN values with 0 (in case there are no actions of certain types in some timesteps)
        df = df.fillna(0)

        # Set properties on this object
        self.games = df_games
        self.models = model_names_dict
        self.traces = df  
        self.n_models = n_models
    
    #### PREPROCESSING ####
    # preprocesses data into sliding windows and train-test split
    def preprocess(self, window_size = 5, test_size=0.2):
        X, y = [], []
        i = 0
        df = self.traces
        # loop through the timestep data
        while i < len(df) - window_size:
            # make sure the next N traces are within the same game (don't want to mix games in a window)
            if df.iloc[i]['game_id'] == df.iloc[i + window_size]['game_id']:
                # Take take N traces at a time
                window = df.iloc[i:i + window_size]
                X.append(window.drop(columns=['time', 'game_id', 'opponent']).values)
                # Use the opponent label at the end of the window
                y.append(window['opponent'].iloc[-1])
                # increment i to make a sliding window starting with the next
                i += 1
            else:
                # we need to skip to start a new window at the next different value
                i = i + window_size - 1

        X = np.array(X)
        self.y = np.array(y)

        # Encode the opponent labels
        self.label_encoder = LabelEncoder()
        y_encoded = self.label_encoder.fit_transform(y)

        # We need to flatten the X array for Scikit-Learn
        num_samples, window_size, num_features = X.shape
        # Flatten each window
        self.X = np.array([window.flatten() for window in X])
        
        # Split the data into training and testing sets
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=test_size, random_state=42, stratify=self.y)

    #### TRAIN ####
    def train(self):
        # Initialize the classifier
        self.classifier = RandomForestClassifier(n_estimators=100, random_state=42)
        
        # Train the classifier
        self.classifier.fit(self.X_train, self.y_train)

    #### EVALUATE ####
    def evaluate(self, X_test, y_test):
        # Make predictions on the test set
        y_pred = self.classifier.predict(X_test)
        # Evaluate performance
        print("Overall accuracy:", accuracy_score(y_test, y_pred))
        print("Report:")
        print(classification_report(y_test, y_pred, target_names=self.label_encoder.classes_))
        return y_pred
        

In [219]:
%%time
t7 = TournamentAnalysis("tournament_7")

CPU times: total: 1 s
Wall time: 3.28 s


In [221]:
%%time
t7.preprocess()

CPU times: total: 1.66 s
Wall time: 7.78 s


In [207]:
%%time
t7.train()

CPU times: total: 93.8 ms
Wall time: 975 ms


In [209]:
t7.evaluate(t7.X_test, t7.y_test)

Overall accuracy: 0.9326837909654562
Report:
                              precision    recall  f1-score   support

 HeavyRush(AStarPathFinding)       0.92      0.90      0.91      1113
 LightRush(AStarPathFinding)       0.93      0.94      0.94      1672
RangedRush(AStarPathFinding)       0.91      0.91      0.91      1111
WorkerRush(AStarPathFinding)       1.00      1.00      1.00       620

                    accuracy                           0.93      4516
                   macro avg       0.94      0.94      0.94      4516
                weighted avg       0.93      0.93      0.93      4516

CPU times: total: 78.1 ms
Wall time: 107 ms


array(['LightRush(AStarPathFinding)', 'RangedRush(AStarPathFinding)',
       'LightRush(AStarPathFinding)', ..., 'WorkerRush(AStarPathFinding)',
       'LightRush(AStarPathFinding)', 'HeavyRush(AStarPathFinding)'],
      dtype='<U28')

In [223]:
%%time
t5 = TournamentAnalysis("tournament_5")

CPU times: total: 2min 45s
Wall time: 10min 6s


In [225]:
%%time
t5.preprocess()

CPU times: total: 4min 13s
Wall time: 20min 58s


In [227]:
%%time
t5.train()

CPU times: total: 3min 6s
Wall time: 10min 3s


In [233]:
%%time
t5.evaluate(t5.X_test, t5.y_test)

Overall accuracy: 0.8096149140493195
Report:
                                 precision    recall  f1-score   support

 HeavyDefense(AStarPathFinding)       0.81      0.79      0.80    120919
    HeavyRush(AStarPathFinding)       0.79      0.78      0.78    116624
 LightDefense(AStarPathFinding)       0.83      0.92      0.87     87741
    LightRush(AStarPathFinding)       0.88      0.87      0.87     71384
RangedDefense(AStarPathFinding)       0.70      0.68      0.69     95019
   RangedRush(AStarPathFinding)       0.68      0.67      0.68     92311
WorkerDefense(AStarPathFinding)       0.98      0.98      0.98     87140
   WorkerRush(AStarPathFinding)       0.93      0.93      0.93     27693

                       accuracy                           0.81    698831
                      macro avg       0.82      0.83      0.82    698831
                   weighted avg       0.81      0.81      0.81    698831

CPU times: total: 7.16 s
Wall time: 27.5 s


array(['LightDefense(AStarPathFinding)', 'LightDefense(AStarPathFinding)',
       'WorkerDefense(AStarPathFinding)', ...,
       'RangedRush(AStarPathFinding)', 'WorkerDefense(AStarPathFinding)',
       'LightDefense(AStarPathFinding)'], dtype='<U31')

In [235]:
t5.models

{0: 'WorkerRush(AStarPathFinding)',
 1: 'LightRush(AStarPathFinding)',
 2: 'HeavyRush(AStarPathFinding)',
 3: 'RangedRush(AStarPathFinding)',
 4: 'WorkerDefense(AStarPathFinding)',
 5: 'LightDefense(AStarPathFinding)',
 6: 'HeavyDefense(AStarPathFinding)',
 7: 'RangedDefense(AStarPathFinding)',
 8: nan,
 9: 'C:\\source\\MicroRTS\\maps\\16x16\\basesWorkers16x16.xml',
 10: '100',
 11: '5000',
 12: '100',
 13: '-1',
 14: '1000',
 15: 'TRUE',
 16: 'TRUE',
 17: 'TRUE',
 18: 'FALSE'}

### Tournament 8 - 8 models, 25 iterations

In [242]:
%%time
# load tournament info
tournament_8 = TournamentAnalysis("tournament_8")

CPU times: total: 17.5 s
Wall time: 1min 55s


In [244]:
# view the models in the tournament
tournament_8.models

{0: 'WorkerRush(AStarPathFinding)',
 1: 'LightRush(AStarPathFinding)',
 2: 'HeavyRush(AStarPathFinding)',
 3: 'RangedRush(AStarPathFinding)',
 4: 'WorkerDefense(AStarPathFinding)',
 5: 'LightDefense(AStarPathFinding)',
 6: 'HeavyDefense(AStarPathFinding)',
 7: 'RangedDefense(AStarPathFinding)'}

In [246]:
%%time
# preprocess the data
tournament_8.preprocess()

CPU times: total: 1min 32s
Wall time: 3min 50s


In [248]:
%%time
tournament_8.train()

CPU times: total: 20.2 s
Wall time: 1min 28s


In [254]:
%%time
# evaluate the model trained on this tournament
tournament_8.evaluate(tournament_8.X_test, tournament_8.y_test)

Overall accuracy: 0.782607333615275
Report:
                                 precision    recall  f1-score   support

 HeavyDefense(AStarPathFinding)       0.77      0.75      0.76     20320
    HeavyRush(AStarPathFinding)       0.74      0.71      0.72     17820
 LightDefense(AStarPathFinding)       0.77      0.90      0.83     17885
    LightRush(AStarPathFinding)       0.88      0.86      0.87     18035
RangedDefense(AStarPathFinding)       0.63      0.57      0.60     14920
   RangedRush(AStarPathFinding)       0.66      0.67      0.67     17000
WorkerDefense(AStarPathFinding)       0.98      0.96      0.97     16150
   WorkerRush(AStarPathFinding)       0.89      0.93      0.91      5556

                       accuracy                           0.78    127686
                      macro avg       0.79      0.79      0.79    127686
                   weighted avg       0.78      0.78      0.78    127686

CPU times: total: 1.09 s
Wall time: 5.1 s


array(['WorkerDefense(AStarPathFinding)',
       'LightDefense(AStarPathFinding)', 'RangedRush(AStarPathFinding)',
       ..., 'LightRush(AStarPathFinding)',
       'HeavyDefense(AStarPathFinding)', 'LightDefense(AStarPathFinding)'],
      dtype='<U31')

### Tournament 10 - 5 models, 25 iterations

In [260]:
%%time
# load tournament info
tournament_10 = TournamentAnalysis("tournament_10")

CPU times: total: 7.48 s
Wall time: 49.5 s


In [262]:
# view the models in the tournament
tournament_10.models

{0: 'CoacAI(AStarPathFinding)',
 1: 'WorkerRush(AStarPathFinding)',
 2: 'LightRush(AStarPathFinding)',
 3: 'HeavyRush(AStarPathFinding)',
 4: 'RangedRush(AStarPathFinding)'}

In [264]:
%%time
# preprocess the data
tournament_10.preprocess()

CPU times: total: 31.1 s
Wall time: 2min 11s


In [266]:
%%time
tournament_10.train()

CPU times: total: 11.4 s
Wall time: 24 s


In [268]:
%%time
# evaluate the model trained on this tournament
tournament_10.evaluate(tournament_10.X_test, tournament_10.y_test)

Overall accuracy: 0.9321015917476905
Report:
                              precision    recall  f1-score   support

    CoacAI(AStarPathFinding)       1.00      1.00      1.00     34603
 HeavyRush(AStarPathFinding)       0.80      0.84      0.82      9595
 LightRush(AStarPathFinding)       0.89      0.87      0.88     13470
RangedRush(AStarPathFinding)       0.87      0.85      0.86     12015
WorkerRush(AStarPathFinding)       1.00      1.00      1.00      4575

                    accuracy                           0.93     74258
                   macro avg       0.91      0.91      0.91     74258
                weighted avg       0.93      0.93      0.93     74258

CPU times: total: 953 ms
Wall time: 1.99 s


array(['CoacAI(AStarPathFinding)', 'LightRush(AStarPathFinding)',
       'HeavyRush(AStarPathFinding)', ..., 'LightRush(AStarPathFinding)',
       'CoacAI(AStarPathFinding)', 'CoacAI(AStarPathFinding)'],
      dtype='<U28')

In [296]:
# loop throuh the CoacAI games from tournament 10 and predict them
for i in range (10):
    if(tournament_10.X[i][7] == 0):
        print(tournament_8.classifier.predict(tournament_10.X))

['WorkerRush(AStarPathFinding)' 'WorkerRush(AStarPathFinding)'
 'WorkerRush(AStarPathFinding)' ... 'HeavyRush(AStarPathFinding)'
 'HeavyRush(AStarPathFinding)' 'HeavyRush(AStarPathFinding)']
['WorkerRush(AStarPathFinding)' 'WorkerRush(AStarPathFinding)'
 'WorkerRush(AStarPathFinding)' ... 'HeavyRush(AStarPathFinding)'
 'HeavyRush(AStarPathFinding)' 'HeavyRush(AStarPathFinding)']
['WorkerRush(AStarPathFinding)' 'WorkerRush(AStarPathFinding)'
 'WorkerRush(AStarPathFinding)' ... 'HeavyRush(AStarPathFinding)'
 'HeavyRush(AStarPathFinding)' 'HeavyRush(AStarPathFinding)']
['WorkerRush(AStarPathFinding)' 'WorkerRush(AStarPathFinding)'
 'WorkerRush(AStarPathFinding)' ... 'HeavyRush(AStarPathFinding)'
 'HeavyRush(AStarPathFinding)' 'HeavyRush(AStarPathFinding)']
['WorkerRush(AStarPathFinding)' 'WorkerRush(AStarPathFinding)'
 'WorkerRush(AStarPathFinding)' ... 'HeavyRush(AStarPathFinding)'
 'HeavyRush(AStarPathFinding)' 'HeavyRush(AStarPathFinding)']
['WorkerRush(AStarPathFinding)' 'WorkerRush(A

In [290]:
tournament_10.X[0]

array([5., 5., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 5., 5., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 5., 5., 1., 1., 1., 1., 0., 0., 0.,
       1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 5., 5., 1., 1., 1.,
       1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 6.,
       6., 1., 1., 1., 1., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [292]:
tournament_10.traces

Unnamed: 0,time,player_0_resources,player_1_resources,player_0_Base_units,player_1_Base_units,player_0_Worker_units,player_1_Worker_units,opponent,game_id,player_0_action_type_2_count,...,player_1_Ranged_units,player_0_Ranged_units,player_1_action_type_5_count,player_1_action_parameter_None_count,player_0_action_type_5_count,player_0_action_parameter_None_count,player_1_action_parameter_10_count,player_1_Light_units,player_0_action_parameter_10_count,player_0_Light_units
0,0,5,5,1.0,1.0,1.0,1.0,CoacAI(AStarPathFinding),0-vs-0-0-0.zip,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,5,5,1.0,1.0,1.0,1.0,CoacAI(AStarPathFinding),0-vs-0-0-0.zip,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,20,5,5,1.0,1.0,1.0,1.0,CoacAI(AStarPathFinding),0-vs-0-0-0.zip,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,30,5,5,1.0,1.0,1.0,1.0,CoacAI(AStarPathFinding),0-vs-0-0-0.zip,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,40,6,6,1.0,1.0,1.0,1.0,CoacAI(AStarPathFinding),0-vs-0-0-0.zip,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376280,2960,2,2,1.0,1.0,1.0,1.0,RangedRush(AStarPathFinding),4-vs-4-0-24.zip,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
376281,2970,2,2,1.0,1.0,1.0,1.0,RangedRush(AStarPathFinding),4-vs-4-0-24.zip,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
376282,2980,3,3,1.0,1.0,1.0,1.0,RangedRush(AStarPathFinding),4-vs-4-0-24.zip,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0
376283,2990,3,3,1.0,1.0,1.0,1.0,RangedRush(AStarPathFinding),4-vs-4-0-24.zip,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
