### Preprocessing of Dag2023 data using existing methods
---

*Last updated: 17 June 2024*


In [None]:
# @title Imports

import os
import subprocess

from omegaconf import OmegaConf
from utils import init_random_seeds
from torch_geometric.data import download_url
from sklearn.preprocessing import StandardScaler
from _utils import Dag2023Preprocessor
from utils import ROOT_DIR, EXPERIMENT_DATASETS

# Initialize the random seeds
init_random_seeds(42)

In [None]:
# @title Setup: Download the dataset

# download the dataset
dataset_name = "Dag2023"
assert dataset_name in EXPERIMENT_DATASETS, f"{dataset_name} is not a valid dataset."
config = OmegaConf.load("../configs/submodule/preprocess.yaml")
url = config.preprocess.opensource_url
zipfile = config.preprocess.opensource_zipfile
zip_path = os.path.join(ROOT_DIR, zipfile)
source_path = os.path.join(ROOT_DIR, zipfile.strip(".zip"))

if not os.path.exists(os.path.join(source_path, dataset_name)):
    download_url(url=url, folder=ROOT_DIR, filename=zipfile)
    # unzip the dataset folder
    bash_command = [
        "unzip",
        zip_path,
        "{}/*".format(dataset_name),
        "-d",
        source_path,
    ]
    std_out = subprocess.run(bash_command, text=True)  # Run the bash command
    print(std_out, end="\n\n")
    # delete the zip file
    os.unlink(zip_path)
else:
    print(f"{dataset_name} dataset already downloaded and unzipped.")

DATA_DIR = os.path.join(source_path, dataset_name)

In [None]:
transform = StandardScaler()
smooth_method = "exponential"
interpolate_method = "linear"
resample_dt = 0.333

preprocessor = Dag2023Preprocessor(transform, smooth_method, interpolate_method, resample_dt)
preprocessor.preprocess()