In [None]:
import lzma
import json

# Function to read .xz archives from ESTA
def read_parsed_demo(filename):
  with lzma.LZMAFile(filename, "rb") as f:
    d = json.load(f)
    return d

def generate_vector_state(frame, map_name):
    """Returns a game state in a dictionary format.

    Args:
        frame (dict) : Dict output of a frame generated from the DemoParser class
        map_name (string): String indicating the map name

    Returns:
        A dict with keys for each feature.
    """
    game_state = {}
    game_state["mapName"] = map_name
    game_state["secondsSincePhaseStart"] = frame["seconds"]
    game_state["bombPlanted"] = frame["bombPlanted"]
    game_state["bombsite"] = frame["bombsite"]
    game_state["totalSmokes"] = len(frame["smokes"])
    game_state["totalFires"] = len(frame["fires"])

    # Team specific info (CT)
    game_state["ctAlive"] = 0
    game_state["ctHp"] = 0
    game_state["ctArmor"] = 0
    game_state["ctHelmet"] = 0
    game_state["ctEq"] = 0
    game_state["ctUtility"] = 0
    game_state["ctEqValStart"] = 0
    game_state["ctBombZone"] = 0
    game_state["defusers"] = 0
    for p in frame["ct"]["players"]:
        game_state["ctEqValStart"] += p["equipmentValueFreezetimeEnd"]
        if p["isAlive"]:
            game_state["ctAlive"] += 1
            game_state["ctHp"] += p["hp"]
            game_state["ctArmor"] += p["armor"]
            game_state["ctHelmet"] += p["hasHelmet"]
            game_state["ctEq"] += p["equipmentValue"]
            game_state["ctUtility"] += p["totalUtility"]
            game_state["defusers"] += p["hasDefuse"]
            if p["isInBombZone"]:
                game_state["ctBombZone"] += 1

    # Team specific info (T)
    game_state["tAlive"] = 0
    game_state["tHp"] = 0
    game_state["tArmor"] = 0
    game_state["tHelmet"] = 0
    game_state["tEq"] = 0
    game_state["tUtility"] = 0
    game_state["tEqValStart"] = 0
    game_state["tHoldingBomb"] = 0
    game_state["tBombZone"] = 0
    for p in frame["t"]["players"]:
        game_state["tEqValStart"] += p["equipmentValueFreezetimeEnd"]
        if p["isAlive"]:
            game_state["tAlive"] += 1
            game_state["tHp"] += p["hp"]
            game_state["tArmor"] += p["armor"]
            game_state["tHelmet"] += p["hasHelmet"]
            game_state["tEq"] += p["equipmentValue"]
            game_state["tUtility"] += p["totalUtility"]
            if p["isInBombZone"]:
                game_state["tBombZone"] += 1
            if p["hasBomb"]:
                game_state["tHoldingBomb"] = 1

    return game_state

In [None]:
import os

from tqdm import tqdm

online_files = os.listdir("C:\\Users\\Matias\\esta\\data\\online")
online_files = ["C:\\Users\\Matias\\esta\\data\\online\\" + f for f in online_files] 

#demo_files = online_files + lan_files
demo_files = online_files

parsed_demos = {}

for f in tqdm(demo_files):
  demo = read_parsed_demo(f)

  parsed_demos[demo["demoId"]] = []

  for r in demo["gameRounds"]:
    parsed_frames_df_round = []
    ct_win = 1

    if r["roundEndReason"] in ["CTWin", "TargetSaved", "BombDefused", "TargetBombed", "TerroristsWin"]:
      if r["roundEndReason"] not in ["CTWin", "TargetSaved", "BombDefused"]:
        ct_win = 0
      for fr in r["frames"]:
        if (fr["ct"]["players"] is not None) & (fr["t"]["players"] is not None) & (fr["clockTime"] != "00:00") & (fr["t"]["alivePlayers"] >= 0) & (fr["ct"]["alivePlayers"] >= 1):
          if (len(fr["ct"]["players"]) == 5) & (len(fr["t"]["players"]) == 5):
            # Create dataframe
            frame_row = generate_vector_state(fr, demo["mapName"])
            frame_row["ctWin"] = ct_win
            frame_row["mapName"] = demo["mapName"]         
            
            parsed_frames_df_round.append(frame_row)

    parsed_demos[demo["demoId"]].append(parsed_frames_df_round)


In [None]:
import random

random.seed(0)

all_states = []

for demo in tqdm(parsed_demos.keys()):
  for r in parsed_demos[demo]:
    if len(r) > 0:
      total_frames = len(r)
      indices = random.sample(range(0, total_frames), 1)
      all_states.append(r[indices[0]])

import pandas as pd
all_df = pd.DataFrame(all_states)

from sklearn.model_selection import train_test_split

train_size=0.7

X = all_df.drop("ctWin", axis=1).copy()
y = all_df["ctWin"]

# In the first step we will split the data in training and remaining dataset
X_train, X_rem, y_train, y_rem = train_test_split(X,y, train_size=0.7)

test_size = 0.5
X_val, X_test, y_val, y_test = train_test_split(X_rem,y_rem, test_size=0.5)

In [None]:
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

X_train["bombPlanted"] = X_train["bombPlanted"].astype("category")
X_train["bombsite"] = X_train["bombsite"].astype("category")
X_train["mapName"] = X_train["mapName"].astype("category")
X_val["bombPlanted"] = X_val["bombPlanted"].astype("category")
X_val["bombsite"] = X_val["bombsite"].astype("category")
X_val["mapName"] = X_val["mapName"].astype("category")
X_test["bombPlanted"] = X_test["bombPlanted"].astype("category")
X_test["bombsite"] = X_test["bombsite"].astype("category")
X_test["mapName"] = X_test["mapName"].astype("category")

lgbm = LGBMClassifier()
lgbm.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=50, verbose=False)

xgb = XGBClassifier(tree_method="gpu_hist", enable_categorical=True)
xgb.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=50, verbose=False)

cb = CatBoostClassifier(task_type="GPU", devices='0:1', verbose=False)
cb.fit(X_train, y_train, eval_set=[(X_val, y_val)], cat_features=[0,2,3], early_stopping_rounds=50)       

In [None]:
import pickle
# Guardo el modelo entrenado
pickle.dump(lgbm, open('lgbm.pkl', 'wb'))
pickle.dump(xgb, open("xgb.pkl", 'wb'))
pickle.dump(cb, open("cb.pkl", 'wb'))

In [None]:
from sklearn.metrics import log_loss

preds_lgbm = lgbm.predict_proba(X_test)
preds_xgb = xgb.predict_proba(X_test)
preds_cb = cb.predict_proba(X_test)

lgbm_ll = log_loss(y_test, preds_lgbm[:,1])
xgb_ll = log_loss(y_test, preds_xgb[:,1])
cb_ll = log_loss(y_test, preds_cb[:,1])

print("{} LGBM, {} XGB, {} CB".format(lgbm_ll, xgb_ll, cb_ll))

cb.save_model("win_prob.cb")

for a, b in zip(X_train.columns, cb.feature_importances_):
  print("{}, {}".format(a, b))

A partir de acá se corre directamente para calcular la probabilidad de ganar cada ronda

In [None]:
path = "C:\\Users\\Matias\\Documents\\UDESA\\Tesis_maestria\\Atpt"
os.chdir(path)

In [2]:
from openpyxl import Workbook
from awpy.parser import DemoParser
import pandas as pd
import os
import patoolib
import pickle

# Abro el modelo entrenado
#lgbm  = pickle.load(open('lgbm.pkl','rb'))

# lista de demos a procesar
path = "C:\\Users\\Matias\\Documents\\UDESA\\Tesis_maestria\\Atpt\\winProbDemos"
os.chdir(path)

carpetas = []
for file in os.listdir():
    if ".rar" in file:
        carpetas += [file]
    elif ".dem" in file or ".json" in file:
        os.remove(file)
        
    
# Ahora hay más archivos, puedo iterar sobre los nuevos
for folder in carpetas:
    # Extraigo la carpeta
    patoolib.extract_archive("%s" % folder, outdir="%s" %path)
    # La muevo a parsedDemos
    #shutil.move(path + "\\" + folder, "C:\\Users\\Matias\\Documents\\UDESA\\Tesis_maestria\\Atpt\\parsedDemos")
    
    # Itero sobre los files y me fijo si son demo y no les cambié el nombre. Si no, les cambio el nombre
    for demo in os.listdir():
        for nombre in carpetas:
            if (".dem" not in demo) or (demo[0] == "x"):
                break
        else:
            os.rename(demo, "x" + folder.split("vs")[0] + "vs" + demo.split("vs")[1])

# Agarro los nombres de las demos
demos = []
for file in os.listdir():
    if ".dem" in file:
        demos += [file]

### Itero el parser por cada demo
round_state_df = pd.DataFrame()
for demo in demos:
    demo_parser = DemoParser(
    demofile = "%s" % demo,
    parse_rate=128, 
    buy_style="hltv"
    )


    # Parse the demofile, output results to dictionary
    df = demo_parser.parse(return_type="json")

    # Agarro los frames del comienzo de cada ronda
    frames = []
    for ronda in df["gameRounds"]:
        for f in ronda["frames"]:
            if f["seconds"] == max(f["seconds"]):
                frames.append(f)
                break
    mapa = df["mapName"]
    

    # Genero los vectores para cada frame y los paso a un dataframe
    states = []
    for f in frames:
        game_state = generate_vector_state(f, mapa)
        states.append(game_state)
    states = pd.DataFrame(states)
    states["matchID"] = (demo[:-4])
    #states.index += 1
    round_state_df = pd.concat([round_state_df, pd.DataFrame(states)])

round_state_df_2 = round_state_df.drop(round_state_df.columns[len(round_state_df.columns)-1], axis=1)
round_state_df_2
round_state_df_2["bombPlanted"] = round_state_df_2["bombPlanted"].astype("category")
round_state_df_2["bombsite"] = round_state_df_2["bombsite"].astype("category")
round_state_df_2["mapName"] = round_state_df_2["mapName"].astype("category")

round_state_df.reset_index(inplace = True)

dfWinProbLgbm = pd.DataFrame(lgbm.predict_proba(pd.DataFrame(round_state_df_2)), columns = ["tWinProbLgbm", "ctWinProbLgbm"])
dfWinProbXgb = pd.DataFrame(xgb.predict_proba(pd.DataFrame(round_state_df_2)), columns = ["tWinProbXgb", "ctWinProbXgb"])
dfWinProbCb = pd.DataFrame(cb.predict_proba(pd.DataFrame(round_state_df_2)), columns = ["tWinProbCb", "ctWinProbCb"])

round_state_df[["tWinProbLgbm", "ctWinProbLgbm"]] = dfWinProbLgbm
round_state_df[["tWinProbXgb", "ctWinProbXgb"]] = dfWinProbXgb
round_state_df[["tWinProbCb", "ctWinProbCb"]] = dfWinProbCb

with pd.ExcelWriter(
    "C:\\Users\\Matias\\Documents\\UDESA\\Tesis_maestria\\Atpt\\winProb.xlsx",
    mode = "a",
    if_sheet_exists = "overlay"
) as writer:
    round_state_df.to_excel(writer, index = False, startrow = writer.sheets['Sheet1'].max_row, header = None)


patool: Extracting DreamHack-Open-Fall-2020-oct24-astralis-vs-heroic-bo3-2.rar ...
patool: running "C:\Program Files\WinRAR\rar.EXE" x -- C:\Users\Matias\Documents\UDESA\Tesis_maestria\Atpt\winProbDemos\DreamHack-Open-Fall-2020-oct24-astralis-vs-heroic-bo3-2.rar
patool:     with cwd=C:\Users\Matias\Documents\UDESA\Tesis_maestria\Atpt\winProbDemos


Error calling Go. Check if Go is installed using 'go version'. Need at least v1.17.0.


patool: ... DreamHack-Open-Fall-2020-oct24-astralis-vs-heroic-bo3-2.rar extracted to `C:\Users\Matias\Documents\UDESA\Tesis_maestria\Atpt\winProbDemos'.
[WinError 2] The system cannot find the file specified


ValueError: Error calling Go. Check if Go is installed using 'go version'. Need at least v1.17.0.

In [None]:
# Viejo

In [None]:
from openpyxl import Workbook
from awpy.parser import DemoParser
import pandas as pd
import os
import patoolib
import pickle

# Abro el modelo entrenado
lgbm  = pickle.load(open('lgbm.pkl','rb'))

# lista de demos a procesar
path = "C:\\Users\\Matias\\Documents\\UDESA\\Tesis_maestria\\Atpt\\winProbDemos"
os.chdir(path)

carpetas = []
for file in os.listdir():
    if ".rar" in file:
        carpetas += [file]
    elif ".dem" in file or ".json" in file:
        os.remove(file)
        
    
# Ahora hay más archivos, puedo iterar sobre los nuevos
for folder in carpetas:
    # Extraigo la carpeta
    patoolib.extract_archive("%s" % folder, outdir="%s" %path)
    # La muevo a parsedDemos
    #shutil.move(path + "\\" + folder, "C:\\Users\\Matias\\Documents\\UDESA\\Tesis_maestria\\Atpt\\parsedDemos")
    
    # Itero sobre los files y me fijo si son demo y no les cambié el nombre. Si no, les cambio el nombre
    for demo in os.listdir():
        for nombre in carpetas:
            if (".dem" not in demo) or (demo[0] == "x"):
                break
        else:
            os.rename(demo, "x" + folder.split("vs")[0] + "vs" + demo.split("vs")[1])

# Agarro los nombres de las demos
demos = []
for file in os.listdir():
    if ".dem" in file:
        demos += [file]

### Itero el parser por cada demo
round_state_df = pd.DataFrame()
for demo in demos:
    demo_parser = DemoParser(
    demofile = "%s" % demo,
    parse_rate=128, 
    buy_style="hltv"
    )


    # Parse the demofile, output results to dictionary
    df = demo_parser.parse(return_type="json")

    # Agarro los frames del comienzo de cada ronda
    frames = []
    for ronda in df["gameRounds"]:
        for f in ronda["frames"]:
            if f["seconds"] <= 10.5 and f["seconds"] >= 9.5:
                frames.append(f)
                break
    mapa = df["mapName"]
    

    # Genero los vectores para cada frame y los paso a un dataframe
    states = []
    for f in frames:
        game_state = generate_vector_state(f, mapa)
        states.append(game_state)
    states = pd.DataFrame(states)
    states["matchID"] = (demo[:-4])
    #states.index += 1
    round_state_df = pd.concat([round_state_df, pd.DataFrame(states)])

# YA NO NECESITO CALCULAR PROBABILIDAD DE GANAR LA RONDA
#round_state_df_2 = round_state_df.drop(round_state_df.columns[len(round_state_df.columns)-1], axis=1)
#round_state_df_2
#round_state_df_2["bombPlanted"] = round_state_df_2["bombPlanted"].astype("category")
#round_state_df_2["bombsite"] = round_state_df_2["bombsite"].astype("category")
#round_state_df_2["mapName"] = round_state_df_2["mapName"].astype("category")

round_state_df.reset_index(inplace = True)

#dfWinProbLgbm = pd.DataFrame(lgbm.predict_proba(pd.DataFrame(round_state_df_2)), columns = ["tWinProbLgbm", "ctWinProbLgbm"])
#dfWinProbXgb = pd.DataFrame(xgb.predict_proba(pd.DataFrame(round_state_df_2)), columns = ["tWinProbXgb", "ctWinProbXgb"])
#dfWinProbCb = pd.DataFrame(cb.predict_proba(pd.DataFrame(round_state_df_2)), columns = ["tWinProbCb", "ctWinProbCb"])
#
#round_state_df[["tWinProbLgbm", "ctWinProbLgbm"]] = dfWinProbLgbm
#round_state_df[["tWinProbXgb", "ctWinProbXgb"]] = dfWinProbXgb
#round_state_df[["tWinProbCb", "ctWinProbCb"]] = dfWinProbCb

with pd.ExcelWriter(
    "C:\\Users\\Matias\\Documents\\UDESA\\Tesis_maestria\\Atpt\\winProb.xlsx",
    mode = "a",
    if_sheet_exists = "overlay"
) as writer:
    round_state_df.to_excel(writer, index = False, startrow = writer.sheets['Sheet1'].max_row, header = None)


In [None]:
pd.DataFrame(lgbm.predict_proba(pd.DataFrame(round_state_df_2)), columns = ["tWinProb", "ctWinProb"])

In [None]:
round_state_df