In [1]:
from awpy import Demo

import torch
from torch_geometric_temporal.signal import DynamicHeteroGraphTemporalSignal

import pandas as pd
import polars as pl
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler

import time
import sys
import os

pd.set_option('display.max_columns', 100)
pd.set_option('future.no_silent_downcasting', True)

pl.Config.set_tbl_rows(10)

sys.path.append(os.path.abspath('../../package'))

from CS2.graph import TabularGraphSnapshot, HeteroGraphSnapshot, TemporalHeteroGraphSnapshot
from CS2.token import Tokenizer
from CS2.preprocess import Dictionary, NormalizePosition, NormalizeTabularGraphSnapshot, ImputeTabularGraphSnapshot
from CS2.visualize import HeteroGraphVisualizer

### 1. Tabular dataset creation

#### 2024 Matches

Parse date: 2024. 09. 17.

In [6]:
# --------------------------------------------------
# PREPARATION
# --------------------------------------------------

MATCH_FOLDER_PATH = '../../data/matches-raw/cs2/2024/'
SAVE_PATH = '../../data/matches-processed/cs2/tabular/2024/'

# Get inferno matches
match_list = os.listdir(MATCH_FOLDER_PATH)
inferno_match_list = [file for file in match_list if 'inferno' in file and 
    '-p1' not in file and 
    '-p2' not in file and 
    '-p3' not in file and 
    '-p4' not in file and 
    '-p5' not in file
]

# Save complete match list and process time
parsed_matches_list = []
error_matches_list = []
process_time_list = []

# Match index
match_index = 0



# Read the files if they exist
if os.path.exists('completed_matches.txt'):
    with open('completed_matches.txt', 'r') as file:
        parsed_matches_list = [line.strip() for line in file]

if os.path.exists('error_matches.txt'):
    with open('error_matches.txt', 'r') as file:
        error_matches_list = [line.strip() for line in file]

if os.path.exists('process_times.txt'):
    with open('process_times.txt', 'r') as file:
        process_time_list = [line.strip() for line in file]

if os.path.exists('match_index.txt'):
    with open('match_index.txt', 'r') as file:
        match_index = int(file.read())


# Map nodes dataset
nodes = pd.read_csv('../../data/map_graph_model/de_inferno/nodes.csv')



# --------------------------------------------------
# PARSE MATCHES
# --------------------------------------------------

# Parse Inferno matches
for match in inferno_match_list:

    # If the match has already been parsed or is corrupted, skip it
    if match in parsed_matches_list or match in error_matches_list:
        continue

    # Start timer
    start_time = time.time()

    # Match path
    match_path = MATCH_FOLDER_PATH + match

    # Create tabular snapshot object
    tg = TabularGraphSnapshot()

    print('-----------------------------------------------------------------------------------------------------------------------------')
    print('                                                        PARSING MATCH                                                        \n')
    print('Match: ' + match)

    try:
        # Create tabular snapshot database
        df, df_dict, active_infernos, active_smokes, active_he_smokes = tg.process_match(
            match_path=match_path,
            player_stats_data_path='../../data/player-stats/scraped-in-2024/2023/norm_player_stats_2023.csv',
            missing_player_stats_data_path='../../data/player-stats/missing_players_df_2023.csv',
            weapon_data_path='../../data/weapon_info/ammo_info.csv',

            ticks_per_second=4,
            numerical_match_id=100000 + match_index,
            num_permutations_per_round=1,
            build_dictionary=True,

            package='pandas'
        )

    except Exception as e:
        # Print info and save error matches
        print('Error occured while parsing the match. Skipping match.')

        error_matches_list.append(match)
        with open('error_matches.txt', 'w') as file:
            for item in error_matches_list:
                file.write(f"{item}\n")
                
        continue



    # Impute missing values
    its = ImputeTabularGraphSnapshot()
    df = its.impute(df)

    # Tokenize match
    tokenizer = Tokenizer()
    df = tokenizer.tokenize_match(df, 'de_inferno', nodes)

    # Save dataframes
    df.to_csv(SAVE_PATH + match + '_df.csv', index=False)
    df_dict.to_csv(SAVE_PATH + match + '_df_dict.csv', index=False)
    active_infernos.to_csv(SAVE_PATH + match + '_active_infernos.csv', index=False)
    active_smokes.to_csv(SAVE_PATH + match + '_active_smokes.csv', index=False)
    active_he_smokes.to_csv(SAVE_PATH + match + '_active_he_smokes.csv', index=False)

    # Post-process
    match_index += 1
    parsed_matches_list.append(match)



    # --------------------------------------------------
    # LOG AND SAVE TIME
    # --------------------------------------------------
    
    # Time
    end_time = time.time()
    process_time = end_time - start_time
    process_time_list.append(process_time)

    # Write results to file
    with open('completed_matches.txt', 'w') as file:
        for item in parsed_matches_list:
            file.write(f"{item}\n")
    
    with open('process_times.txt', 'w') as file:
        for item in process_time_list:
            file.write(f"{item}\n")
    
    with open('match_index.txt', 'w') as file:
        file.write(str(match_index))

    print('Parse completed. Duration: ' + str(process_time) + ' seconds.')

-----------------------------------------------------------------------------------------------------------------------------
                                                        PARSING MATCH                                                        

Match: _iem-dallas-2024-virtuspro-vs-big-bo3-iNJygOnxyHMLcZ0aB_CRymvirtus-pro-vs-big-m1-inferno.dem
Parse completed. Duration: 95.31853127479553 seconds.
-----------------------------------------------------------------------------------------------------------------------------
                                                        PARSING MATCH                                                        

Match: _iem-katowice-2024-eternal-fire-vs-faze-bo3-NSvOwra3ZJLAlQou2jhAKveternal-fire-vs-faze-m1-inferno.dem
Parse completed. Duration: 199.3240532875061 seconds.
-----------------------------------------------------------------------------------------------------------------------------
                                                   

#### 2023 Matches

Parse date: 2024. 09. 18.

In [None]:
# --------------------------------------------------
# PREPARATION
# --------------------------------------------------

MATCH_FOLDER_PATH = '../../data/matches-raw/cs2/2023/'
SAVE_PATH = '../../data/matches-processed/cs2/tabular/2023/'

# Get inferno matches
match_list = os.listdir(MATCH_FOLDER_PATH)
inferno_match_list = [file for file in match_list if 'inferno' in file and 
    '-p1' not in file and 
    '-p2' not in file and 
    '-p3' not in file and 
    '-p4' not in file and 
    '-p5' not in file
]

# Save complete match list and process time
parsed_matches_list = []
error_matches_list = []
process_time_list = []

# Match index
match_index = 0



# Read the files if they exist
if os.path.exists('completed_matches.txt'):
    with open('completed_matches.txt', 'r') as file:
        parsed_matches_list = [line.strip() for line in file]

if os.path.exists('error_matches.txt'):
    with open('error_matches.txt', 'r') as file:
        error_matches_list = [line.strip() for line in file]

if os.path.exists('process_times.txt'):
    with open('process_times.txt', 'r') as file:
        process_time_list = [line.strip() for line in file]

if os.path.exists('match_index.txt'):
    with open('match_index.txt', 'r') as file:
        match_index = int(file.read())


# Map nodes dataset
nodes = pd.read_csv('../../data/map_graph_model/de_inferno/nodes.csv')



# --------------------------------------------------
# PARSE MATCHES
# --------------------------------------------------

# Parse Inferno matches
for match in inferno_match_list:

    # If the match has already been parsed or is corrupted, skip it
    if match in parsed_matches_list or match in error_matches_list:
        continue

    # Start timer
    start_time = time.time()

    # Match path
    match_path = MATCH_FOLDER_PATH + match

    # Create tabular snapshot object
    tg = TabularGraphSnapshot()

    print('-----------------------------------------------------------------------------------------------------------------------------')
    print('                                                        PARSING MATCH                                                        \n')
    print('Match: ' + match)

    try:
        # Create tabular snapshot database
        df, df_dict, active_infernos, active_smokes, active_he_smokes = tg.process_match(
            match_path=match_path,
            player_stats_data_path='../../data/player-stats/scraped-in-2024/2022/norm_player_stats_2022.csv',
            missing_player_stats_data_path='../../data/player-stats/missing_players_df_2022.csv',
            weapon_data_path='../../data/weapon_info/ammo_info.csv',

            ticks_per_second=4,
            numerical_match_id=100000 + match_index,
            num_permutations_per_round=1,
            build_dictionary=True,

            package='pandas'
        )

    except Exception as e:
        # Print info and save error matches
        print('Error occured while parsing the match. Skipping match.')

        error_matches_list.append(match)
        with open('error_matches.txt', 'w') as file:
            for item in error_matches_list:
                file.write(f"{item}\n")
                
        continue



    # Impute missing values
    its = ImputeTabularGraphSnapshot()
    df = its.impute(df)

    # Tokenize match
    tokenizer = Tokenizer()
    df = tokenizer.tokenize_match(df, 'de_inferno', nodes)

    # Save dataframes
    df.to_csv(SAVE_PATH + match + '_df.csv', index=False)
    df_dict.to_csv(SAVE_PATH + match + '_df_dict.csv', index=False)
    active_infernos.to_csv(SAVE_PATH + match + '_active_infernos.csv', index=False)
    active_smokes.to_csv(SAVE_PATH + match + '_active_smokes.csv', index=False)
    active_he_smokes.to_csv(SAVE_PATH + match + '_active_he_smokes.csv', index=False)

    # Post-process
    match_index += 1
    parsed_matches_list.append(match)



    # --------------------------------------------------
    # LOG AND SAVE TIME
    # --------------------------------------------------
    
    # Time
    end_time = time.time()
    process_time = end_time - start_time
    process_time_list.append(process_time)

    # Write results to file
    with open('completed_matches.txt', 'w') as file:
        for item in parsed_matches_list:
            file.write(f"{item}\n")
    
    with open('process_times.txt', 'w') as file:
        for item in process_time_list:
            file.write(f"{item}\n")
    
    with open('match_index.txt', 'w') as file:
        file.write(str(match_index))

    print('Parse completed. Duration: ' + str(process_time) + ' seconds.')