In [9]:
from awpy import Demo

import torch
from torch_geometric_temporal.signal import DynamicHeteroGraphTemporalSignal

import pandas as pd
import polars as pl
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from termcolor import colored
import time
import json
import pickle
import sys
import os

pd.set_option('display.max_columns', 100)
pd.set_option('future.no_silent_downcasting', True)

pl.Config.set_tbl_rows(10)

sys.path.append(os.path.abspath('../../package'))

from CS2.graph import TabularGraphSnapshot, HeteroGraphSnapshot, TemporalHeteroGraphSnapshot
from CS2.token import Tokenizer
from CS2.preprocess import Dictionary, NormalizePosition, NormalizeTabularGraphSnapshot, ImputeTabularGraphSnapshot
from CS2.visualize import HeteroGraphVisualizer

DATA_PATH = '../../data/matches-processed/cs2/hetero-graph/'
DATA_SAVE_PATH = '../../data/matches-processed/cs2/temporal-hetero-graph/'
PROCESS_SAVE_PATH = './parses/temp-hetero-parse-2024.10.10/'

# Create Temporal dataset

In [26]:
hetero_graph_matches = [f for f in os.listdir(DATA_PATH) if os.path.isfile(os.path.join(DATA_PATH, f)) and f not in ['train.pt', 'val.pt']]
processed_matches = []

dataset_lengths = []
overall_length = 0

# Load processed matches
if os.path.exists(os.path.join(PROCESS_SAVE_PATH, 'process.txt')):
    with open(os.path.join(PROCESS_SAVE_PATH, 'process.txt'), 'r') as f:
        processed_matches = f.readlines()

# Remove whitespace characters like `\n` at the end of each line
for match_idx in range(len(processed_matches)):
    processed_matches[match_idx] = processed_matches[match_idx].strip()


for file in hetero_graph_matches:

    if file in processed_matches:
        print(colored(f'{file} already processed. Skipping...', 'yellow'))
        continue

    print(colored(f'Processing {file}...', 'light_blue'))

    match = torch.load(DATA_PATH + file, weights_only=False)
    thgs = TemporalHeteroGraphSnapshot()
    dyn_graphs = None
    dyn_graphs = thgs.process_match(match, interval=20, shifted_intervals=True)

    dataset_lengths.append(len(dyn_graphs))
    overall_length += len(dyn_graphs)

    print('DTDG Dataset Length:', len(dyn_graphs))
    print(colored(f'{file} processed.', 'green'))

    torch.save(dyn_graphs, DATA_SAVE_PATH + file)

    with open(os.path.join(PROCESS_SAVE_PATH, 'process.txt'), 'a') as f:
        f.write(f'{file}\n')

[94mProcessing 100000.pt...[0m
DTDG Dataset Length: 708
[32m100000.pt processed.[0m
[94mProcessing 100001.pt...[0m
DTDG Dataset Length: 792
[32m100001.pt processed.[0m
[94mProcessing 100002.pt...[0m
DTDG Dataset Length: 990
[32m100002.pt processed.[0m
[94mProcessing 100003.pt...[0m
DTDG Dataset Length: 761
[32m100003.pt processed.[0m
[94mProcessing 100004.pt...[0m
[1m[31mError:[0mError: There are missing ticks in the graph sequence. The error occured while parsing match 100004.0 at round                                 0.1666666716337204 between ticks 35158.0-35463.0. Skipping the sequence.
DTDG Dataset Length: 875
[32m100004.pt processed.[0m
[94mProcessing 100005.pt...[0m
DTDG Dataset Length: 862
[32m100005.pt processed.[0m
[94mProcessing 100006.pt...[0m
DTDG Dataset Length: 804
[32m100006.pt processed.[0m
[94mProcessing 100007.pt...[0m
DTDG Dataset Length: 756
[32m100007.pt processed.[0m
[94mProcessing 100008.pt...[0m
DTDG Dataset Length: 1061
[3

In [27]:
print(overall_length)

98098
