### Dashboard for Exploration and Statistical Learning
#### Starcraft 2 Professional Games
###### Jeffrey Kwarsick, Ph.D.
###### September 2020

#### Introduction
This is the space where I will flush out an introduction.  Something about the richness and complexity of Starcraft played at the highest level.

### References
1. https://miguelgondu.github.io/python/ai/video%20games/2018/09/04/a-tutorial-on-sc2reader-events-and-units.html
2. https://lauler.github.io/blog/replay/
3. https://sc2reader.readthedocs.io/en/latest/
4. https://lotv.spawningtool.com/replaypacks/
5. https://github.com/GraylinKim/sc2reader

The most useful resource from the above list of references is the github repository containing all code for `sc2reader`.  The documentation is not complete, so if there is information you are intereseted in extracting, the best place to find it is in the source code.

In [78]:
# import relevant libraries
import os
import numpy as np
import scipy as sci
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.animation as animation
import pandas as pd
from scipy.optimize import curve_fit
import sklearn
import re
import seaborn as sns
import math
from datetime import datetime
import glob
from IPython.display import clear_output
import ipywidgets as widgets
from ipywidgets import interact, interact_manual, Layout, interactive
####################################
import sc2reader

Just checking in on what my current working directory is.

In [5]:
cwd = os.getcwd()
print(cwd)

C:\Users\stark\Documents\Coding\SC2


This next function utilizes `os.walk()` to create a list of all Starcraft 2 Replays that I have in my SC2_Replays folder.

In [19]:
replays = []
for root, dirs, files in os.walk(cwd+'\\SC2_Replays'):
    for file in files:
        if file.endswith('.SC2Replay'):
            replays.append(root + '\\' + file)

In [48]:
def split_events(replay):
    """
    Read in loaded replay from .sc2reader    
    Create KEYS of specific events via set() function 
    Iterate through all events append list in events_of_type[KEY]
    
    :RETURN Dictionary: KEY   = Event Type
                        VALUE = List(Events of Specified Type)
    """
    event_names = set([event.name for event in replay.events])
    events_of_type = {name: [] for name in event_names}
    for event in replay.events:
        events_of_type[event.name].append(event)
    return(events_of_type)

In [39]:
def extract_PlayerInfo(players, player_id):
    """
    Reads in sc2reader .player object.  This is a dictionary.
    Key = Player_ID (int)
    Value = 'PLayer # - PlayerGamerTag (Race)'
    
    :RETURN [PlayerName, PlayerRace]
    """
    re_plyr_name = r"(?<=Player\s\d\s-\s)(\S*)"
    re_plyr_race = r"(Terran|Zerg|Protoss)"
    plyr_name = re.findall(re_plyr_name, str(players[player_id]))
    plyr_race = re.findall(re_plyr_race, str(players[player_id]))
    return([plyr_name[0],plyr_race[0]])

In [45]:
def determineWinner(replay):
    """
    Generally, the loser is the first person leave a game.  You could look for a ChatEvent, looking for a 'gg',
        but not all players do this (for one reason or another), so first player leave event confidently identifies the loser
    Function to read through all events in SC2 Replay,
    Search for 'PlayerLeaveEvent' and Save Event to list IF Player is Competing, Append to list.
    IF First Player in List is Player 1, WINNER = Player 2; Vis-Versa
    
    :RETURN(Winner -> list[Player Name, Player Race])
    """
    PlayerLeaveEventList = []
    leaveTimeFrames = []
    for i in range(len(replay.events)):
        if replay.events[i].name == 'PlayerLeaveEvent':
            if replay.events[i].player == replay.player[1] or replay.events[i].player == replay.player[2]:
                PlayerLeaveEventList.append(replay.events[i].player)
                leaveTimeFrames.append(replay.events[i].frame)
    re_plyr_name = r"(?<=Player\s\d\s-\s)(\S*)"
    re_plyr_race = r"(Terran|Zerg|Protoss)"
    if PlayerLeaveEventList[0] == replay.player[1]:
        plyr_name = re.findall(re_plyr_name, str(replay.player[2]))
        plyr_race = re.findall(re_plyr_race, str(replay.player[2]))
    else:
        plyr_name = re.findall(re_plyr_name, str(replay.player[1]))
        plyr_race = re.findall(re_plyr_race, str(replay.player[1]))
    game_length = int(leaveTimeFrames[0]) // 24
    return([game_length, [plyr_name[0],plyr_race[0]]])

In [47]:
def extract_StatEvents(PlayerStatsEventList, player_id):
    """
    extract all stats and RETURN DataFrame
    stats are collected in the replay every 10 seconds
    :param = PlayerStatsEventList
    :param = player_id
    
    :return = single DataFrame() for Single Player in Game
    - Frames
    - Economy = [Current Minerals, Current Vespene, Minerals Collection Rate, Vespene Collection Rate, Active Workers]
    - Minerals and Vespene Used In-Progress = [Minerals Army, Minerals Econ, Minerals Technology,
                                               Vespene Army, Vespene Econ, Vespene Technology]
    - Minerals and Vespene Current = [Minerals(Army, Econ, Tech), Vespene(Army, Econ, Tech)]
    - Minerals and Vespene Lost = [Minerals(Army, Econ, Tech), Vespene(Army, Econ, Tech)]
    - Minerals and Vespene Killed = [Minerals(Army, Econ, Tech), Vespene(Army, Econ, Tech)]
    - Food Used
    - Food Available
    - Friendly-Fire Losses = [Minerals(Army, Econ, Tech), Vespene(Army, Econ, Tech)]
    """ 
    ##############
    frames      = [PlayerStatsEventList[i].frame for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ##############
    min_curr    = [PlayerStatsEventList[i].minerals_current for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ves_curr    = [PlayerStatsEventList[i].vespene_current for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    min_colr    = [PlayerStatsEventList[i].minerals_collection_rate for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ves_colr    = [PlayerStatsEventList[i].vespene_collection_rate for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    wk_actv     = [PlayerStatsEventList[i].workers_active_count for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ##############
    prgArmy_min = [PlayerStatsEventList[i].minerals_used_in_progress_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    prgEcon_min = [PlayerStatsEventList[i].minerals_used_in_progress_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    prgTech_min = [PlayerStatsEventList[i].minerals_used_in_progress_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    prgArmy_ves = [PlayerStatsEventList[i].vespene_used_in_progress_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    prgEcon_ves = [PlayerStatsEventList[i].vespene_used_in_progress_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    prgTech_ves = [PlayerStatsEventList[i].vespene_used_in_progress_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ##############
    curArmy_min = [PlayerStatsEventList[i].minerals_used_current_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    curEcon_min = [PlayerStatsEventList[i].minerals_used_current_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    curTech_min = [PlayerStatsEventList[i].minerals_used_current_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    curArmy_ves = [PlayerStatsEventList[i].vespene_used_current_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    curEcon_ves = [PlayerStatsEventList[i].vespene_used_current_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    curTech_ves = [PlayerStatsEventList[i].vespene_used_current_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ##############
    lossArmy_min = [PlayerStatsEventList[i].minerals_lost_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    lossEcon_min = [PlayerStatsEventList[i].minerals_lost_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    lossTech_min = [PlayerStatsEventList[i].minerals_lost_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    lossArmy_ves = [PlayerStatsEventList[i].vespene_lost_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    lossEcon_ves = [PlayerStatsEventList[i].vespene_lost_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    lossTech_ves = [PlayerStatsEventList[i].vespene_lost_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ##############
    killArmy_min = [PlayerStatsEventList[i].minerals_killed_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    killEcon_min = [PlayerStatsEventList[i].minerals_killed_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    killTech_min = [PlayerStatsEventList[i].minerals_killed_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    killArmy_ves = [PlayerStatsEventList[i].vespene_killed_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    killEcon_ves = [PlayerStatsEventList[i].vespene_killed_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    killTech_ves = [PlayerStatsEventList[i].vespene_killed_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ##############
    food_used  = [PlayerStatsEventList[i].food_used for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    food_avail = [PlayerStatsEventList[i].food_made for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ##############
    ff_lossArmy_min = [PlayerStatsEventList[i].ff_minerals_lost_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ff_lossEcon_min = [PlayerStatsEventList[i].ff_minerals_lost_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ff_lossTech_min = [PlayerStatsEventList[i].ff_minerals_lost_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ff_lossArmy_ves = [PlayerStatsEventList[i].ff_vespene_lost_army for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ff_lossEcon_ves = [PlayerStatsEventList[i].ff_vespene_lost_economy for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    ff_lossTech_ves = [PlayerStatsEventList[i].ff_vespene_lost_technology for i in range(len(PlayerStatsEventList)) if PlayerStatsEventList[i].pid==player_id]
    
    
    
    stats_df    = pd.DataFrame(list(zip(frames,wk_actv,min_curr,min_colr,ves_curr,ves_colr,
                                        prgArmy_min,prgArmy_ves,prgEcon_min,prgEcon_ves,prgTech_min,prgTech_ves,
                                        curArmy_min,curArmy_ves,curEcon_min,curEcon_ves,curTech_min,curTech_ves,
                                        lossArmy_min,lossArmy_ves,lossEcon_min,lossEcon_ves,lossTech_min,lossTech_ves,
                                        killArmy_min,killArmy_ves,killEcon_min,killEcon_ves,killTech_min,killTech_ves,
                                        food_used,food_avail,
                                        ff_lossArmy_min,ff_lossArmy_ves,ff_lossEcon_min,ff_lossEcon_ves,
                                        ff_lossTech_min,ff_lossTech_ves)))
    stats_df.columns = ['Frames', 'WorkersActive',
                        'CurrentMinerals', 'MineralCollectionRate', 'CurrentVespene','VespeneCollectionRate',
                        'MinUsedProgArmy', 'VesUsedProgArmy','MinUsedProgEcon',
                        'VesUsedProgEcon', 'MinUsedProgTech', 'VesUsedProgTech',
                        'CurrArmy_Min', 'CurrArmy_Ves', 'CurrEcon_Min', 'CurrEcon_Ves', 'CurrTech_Min', 'CurrTech_Ves',
                        'LostArmy_Min', 'LostArmy_Ves', 'LostEcon_Min', 'LostEcon_Ves', 'LostTech_Min', 'LostTech_Ves',
                        'KilledArmy_Min', 'KilledArmy_Ves', 'KilledEcon_Min', 'KilledEcon_Ves', 'KilledTech_Min', 'KilledTech_Ves',
                        'FoodUsed', 'FoodAvailable',
                        'FriendlyFire_lossArmy_Min', 'FriendlyFire_lossArmy_Ves',
                        'FriendlyFire_lossEcon_Min', 'FriendlyFire_lossEcon_Ves',
                        'FriendlyFire_lossTech_Min', 'FriendlyFire_lossTech_Ves']
    return(stats_df)

In [60]:
def plot_stats(p1_stats, p2_stats, replay):
    plt.style.use('ggplot')
    fig = plt.figure(figsize=(15,15))
    gs = gridspec.GridSpec(nrows=4, ncols=2, figure=fig)
    ax0 = fig.add_subplot(gs[0,0])
    ax1 = fig.add_subplot(gs[0,1])
    ax2 = fig.add_subplot(gs[1,0])
    ax3 = fig.add_subplot(gs[1,1])
    ax4 = fig.add_subplot(gs[2,:])
    ax5 = fig.add_subplot(gs[3,:])
#############################################
    ax0.plot(p1_stats['Frames']//24, 'MineralCollectionRate', 'bo-', data=p1_stats, label=replay.player[1])
    ax0.plot(p2_stats['Frames']//24, 'MineralCollectionRate', 'ro-', data=p2_stats, label=replay.player[2])
    ax0.set_ylabel('MineralCollectionRate')
    ax0.set_xlabel('Seconds')
    ax0.legend(loc='best')
#############################################
    ax1.plot(p1_stats['Frames']//24, 'VespeneCollectionRate', 'bo-', data=p1_stats, label=replay.player[1])
    ax1.plot(p2_stats['Frames']//24, 'VespeneCollectionRate', 'ro-', data=p2_stats, label=replay.player[2])
    ax1.set_ylabel('VespeneCollectionRate')
    ax1.set_xlabel('Seconds')
    ax1.legend(loc='best')
############################################
    ax2.plot(p1_stats['Frames']//24, p1_stats['LostEcon_Min'] + p1_stats['LostEcon_Ves'], 'bo-', data=p1_stats, label=str(replay.player[1]) + ' Econ')
    ax2.plot(p2_stats['Frames']//24, p2_stats['LostEcon_Min'] + p2_stats['LostEcon_Ves'], 'ro-', data=p2_stats, label=str(replay.player[2]) + ' Econ')
    ax2.set_ylabel('Econ Losses')
    ax2.set_xlabel('Seconds')
    ax2.legend(loc='best')
############################################
    ax3.plot(p1_stats['Frames']//24, p1_stats['LostTech_Min'] + p1_stats['LostTech_Ves'], 'bs-', data=p1_stats, label=str(replay.player[1]) + ' Tech')
    ax3.plot(p2_stats['Frames']//24, p2_stats['LostTech_Min'] + p2_stats['LostTech_Ves'], 'rs-', data=p2_stats, label=str(replay.player[2]) + ' Tech')
    ax3.set_ylabel('Tech Losses')
    ax3.set_xlabel('Frames')
    ax3.legend(loc='best')
############################################
    ax4.plot(p1_stats['Frames']//24, p1_stats['CurrArmy_Min'] + p1_stats['CurrArmy_Ves'], 'bo-', data=p1_stats, label=str(replay.player[1]) + ' Army')
    ax4.plot(p2_stats['Frames']//24, p2_stats['CurrArmy_Min'] + p2_stats['CurrArmy_Ves'], 'ro-', data=p2_stats, label=str(replay.player[2]) + ' Army')
    ax4.set_ylabel('Current Army Value (Min + Ves)')
    ax4.set_xlabel('Frames')
    ax4.legend(loc='best')
############################################
    ax5.plot(p1_stats['Frames']//24, p1_stats['LostArmy_Min'] + p1_stats['LostArmy_Ves'], 'bo-', data=p1_stats, label=str(replay.player[1]) + ' Army')
    ax5.plot(p2_stats['Frames']//24, p2_stats['LostArmy_Min'] + p2_stats['LostArmy_Ves'], 'ro-', data=p2_stats, label=str(replay.player[2]) + ' Army')
    ax5.set_ylabel('ArmyLosses')
    ax5.set_xlabel('Frames')
    ax5.legend(loc='best')
    plt.show()

In [61]:
layout = widgets.Layout(width='auto', height='40px')
def load_replay(replay):
    game = sc2reader.load_replay(replay, load_map=True)
    p1, p2 = extract_PlayerInfo(game.player, 1), extract_PlayerInfo(game.player, 2)
    winner = determineWinner(game)
    events = split_events(game)
    p1_stats = extract_StatEvents(events['PlayerStatsEvent'], 1)
    p2_stats = extract_StatEvents(events['PlayerStatsEvent'], 2)
    print("{} ({}) vs. {} ({})".format(p1[0], p1[1], p2[0], p2[1]))
    print("Map: {}".format(game.map.name))
    print("Winner: {} ({})".format(winner[1][0], winner[1][1]))
    print("Game Length: {}s".format(winner[0]))
    plot_stats(p1_stats, p2_stats, game)
    return(game)
interact_manual(load_replay, replay=replays, layout=layout)

interactive(children=(Dropdown(description='replay', options=('C:\\Users\\stark\\Documents\\Coding\\SC2\\SC2_R…

<function __main__.load_replay(replay)>

In [57]:
def AnalyzeGames(directory, numfiles):
    counter=0
    player1_info, player2_info, gameLength, Map, Winner = [], [], [], [], []
    for root, dirs, files in os.walk(directory):
        for name in files:
            try:
                replay = sc2reader.load_replay(os.path.join(root,name), load_map=True)
                player1_info.append(extract_PlayerInfo(replay.player, 1))
                player2_info.append(extract_PlayerInfo(replay.player, 2))
                Map.append(replay.map.name)
                tmp = determineWinner(replay)
                gameLength.append(tmp[0])
                Winner.append(tmp[1])
                counter+=1
                clear_output()
                print(name)
                print("{}/{}".format(counter,numfiles))
            except:
                pass
    GameInfo_df = pd.DataFrame(list(zip(player1_info,player2_info,Map,gameLength,Winner)))
    GameInfo_df.columns = ['P1_Info', 'P2_Info', 'Map', 'GameLength', 'Winner']
    return(GameInfo_df)

In [None]:
direct = 'C:\\Users\\stark\\Documents\\Coding\\SC2\\SC2_Replays\\'
out = AnalyzeGames(direct,numfiles)
out