In [None]:
import json
import sc2reader
import os
import uuid

player_db = "TheLeengwist"

import sc2reader
import psycopg2
from psycopg2 import sql

# Function to insert replay data into the database
def insert_replay_data(conn, replay_info):
    with conn.cursor() as cursor:
        cursor.execute(sql.SQL("""
            INSERT INTO replays (game_uuid, start_time, game_length, player1_name, player1_race, player2_name, player2_race, winner_name, match_up, win, category)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """), (
            replay_info['game_uuid'], replay_info['start_time'], replay_info['game_length'],
            replay_info['player1_name'], replay_info['player1_race'],
            replay_info['player2_name'], replay_info['player2_race'],
            replay_info['winner_name'], replay_info['match_up'],
            replay_info['win'], replay_info['category']
        ))
        conn.commit()

def extract_and_save_replay_info(conn, replay_file, uuid, personal_account="TheLeengwist"):
    replay = sc2reader.load_replay(replay_file)
    players = replay.players
    if len(players) != 2:
        return  # Only process 1v1 replays

    game_length = replay.frames / replay.game_fps / 60  # convert frames to minutes
    start_time = replay.start_time.strftime("%Y-%m-%d %H:%M:%S")  # format the start time
    winner_name = [player.name for player in players if player.result == 'Win'][0]

    # Assume player 1 is always the first player listed
    player1_name = players[0].name
    player1_race = players[0].play_race
    player2_name = players[1].name
    player2_race = players[1].play_race
    match_up = f"{player1_race[0]}v{player2_race[0]}"

    win = personal_account in winner_name
    category = "personal" if personal_account in [player1_name, player2_name] else "public"

    replay_info = {
        "game_uuid": uuid,
        "start_time": start_time,
        "game_length": game_length,
        "player1_name": player1_name,
        "player1_race": player1_race,
        "player2_name": player2_name,
        "player2_race": player2_race,
        "winner_name": winner_name,
        "match_up": match_up,
        "win": win,
        "category": category
    }
    insert_replay_data(conn, replay_info)

# Function to insert unit composition data into the database
def insert_unit_composition_data(conn, replay_id, player_name, unit_type, count, percentage):
    with conn.cursor() as cursor:
        cursor.execute(sql.SQL("""
            INSERT INTO unit_compositions (replay_id, player_name, unit_type, unit_count, unit_percentage)
            VALUES (%s, %s, %s, %s, %s)
        """), (replay_id, player_name, unit_type, count, percentage))
        conn.commit()


def extract_and_save_unit_compositions(conn, replay_file):
    replay = sc2reader.load_replay(replay_file)
    replay_id = str(uuid.uuid4())  # Generate a unique ID for each replay
    unit_compositions = {player.name: {} for player in replay.players}
    invalid_units = {
        "BeaconArmy", "BeaconDefend", "BeaconAttack", "BeaconHarass", "BeaconIdle",
        "BeaconAuto", "BeaconDetect", "BeaconScout", "BeaconClaim", "BeaconExpand",
        "BeaconRally", "BeaconCustom1", "BeaconCustom2", "BeaconCustom3", "BeaconCustom4",
        "CommandCenter", "KD8Charge", "Hatchery", "Lair", "Hive", "Extractor", "Nexus",
        "OrbitalCommand", "AutoTurret", "MULE", "Larva","Interceptor", 'Larva',
        'InvisibleTargetDummy', 'Corruptor', 'SwarmHostMP', 'Infestor',
        'ChangelingMarineShield', 'BroodlingEscort', 'Interceptor', 'AdeptPhaseShift'
    }

    for event in replay.events:
        if isinstance(event, sc2reader.events.tracker.UnitBornEvent):
            player_name = event.unit.owner.name if event.unit.owner else "Neutral"
            if player_name not in unit_compositions:
                continue

            unit_type = event.unit_type_name
            if unit_type in invalid_units:
                continue
            if unit_type in unit_compositions[player_name]:
                unit_compositions[player_name][unit_type] += 1
            else:
                unit_compositions[player_name][unit_type] = 1

    # Insert data into the database
    for player_name, units in unit_compositions.items():
        total_units = sum(units.values())
        for unit_type, count in units.items():
            percentage = (count / total_units) if total_units > 0 else 0
            insert_unit_composition_data(conn, replay_id, player_name, unit_type, count, round(percentage, 2))
   


def process_replays(directory):
    all_data = []
    for filename in os.listdir(directory):
        if filename.endswith(".SC2Replay"):
            try:
                game_uuid = str(uuid.uuid4())
                filepath = os.path.join(directory, filename)
                replay_info = extract_and_save_replay_info(filepath, uuid=game_uuid)
                unit_compositions = extract_and_save_unit_compositions(filepath)

                replay_data = {
                    "replay_file": filename,
                    "replay_info": replay_info,
                    "unit_compositions": unit_compositions
                }
                all_data.append(replay_data)
            except Exception as e:
                print(f"Failed to process {filename}: {e}")
                continue
    
    return all_data

# Example usage
directory = "/mnt/c/Users/matth/Documents/StarCraft II/Accounts/86722028/1-S2-1-3925175/Replays/Multiplayer/"
all_replay_data = process_replays(directory)


: 

In [None]:
import json
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Load JSON data from file
input_file = "data/replays_data.json"
with open(input_file, "r") as f:
    replay_data = json.load(f)

: 

In [None]:
# Filter games where the opponent race is Terran
terran_games = [game for game in replay_data if game['replay_info']['opponent_race'] == 'Terran']