In [1]:
from importables.preprocessing_managers import DatasetDictionary, PreprocessingManagers
import importables.constants
import importables.utilities
import json
import sklearn.model_selection
import os
import pandas

In [2]:
NTU_ARD_PREGEN_ROOT = "/mnt/c/Skripsi/dataset-pregen"
UCF101_ARD_PREGEN_ROOT = "/mnt/c/Skripsi/UCF-101-pregen"
HMDB51_ARD_PREGEN_ROOT = "/mnt/c/Skripsi/HMDB51-pregen"
DEFAULT_SPLIT = 0.3

# NTU ARD Train Test Split

In [24]:
split_dict = {
    "all" :{
        "train" : [],
        "test" : []
    },
    "single":{
        "train": [],
        "test": []
    }
}

In [25]:
curr_mapping = DatasetDictionary.Mappings.NTU_ACTION_RECOGNITION_DATASET
pd_pregen = DatasetDictionary(os.path.join(NTU_ARD_PREGEN_ROOT,"generated_dictionary.csv"), curr_mapping).as_DataFrame()

## ALL

In [31]:
importables.utilities.Utilities.set_all_seed(importables.constants.RANDOM_SEED_BYTES)
train_pd, test_pd = sklearn.model_selection.train_test_split(pd_pregen, test_size=DEFAULT_SPLIT, shuffle=True, stratify=pd_pregen[curr_mapping["A"]])

In [32]:
split_dict["all"]["train"] = list(train_pd.sort_index().index)
split_dict["all"]["test"] = list(test_pd.sort_index().index)

## Single Person

In [33]:
pd_pregen_filter = pd_pregen.loc[pd_pregen[curr_mapping["A"]].isin(importables.constants.NTU_ACTION_DAILY_ACTIONS_SET)]
importables.utilities.Utilities.set_all_seed(importables.constants.RANDOM_SEED_BYTES)
train_pd, test_pd = sklearn.model_selection.train_test_split(pd_pregen_filter, test_size=DEFAULT_SPLIT, shuffle=True, stratify=pd_pregen_filter[curr_mapping["A"]])

In [34]:
split_dict["single"]["train"] = list(train_pd.sort_index().index)
split_dict["single"]["test"] = list(test_pd.sort_index().index)

In [38]:
filepath = os.path.join(NTU_ARD_PREGEN_ROOT,"train_test_split.json")
with open(filepath, "w") as f:
    f.write(json.JSONEncoder(indent=4).encode(split_dict))

# UCF101 ARD Train Test Split

In [53]:
split_dict = {
    "preset1" :{
        "train" : [],
        "test" : []
    },
    "preset2":{
        "train": [],
        "test": []
    },
    "preset3":{
        "train": [],
        "test": []
    }
}

In [54]:
curr_mapping = DatasetDictionary.Mappings.UCF101_ACTION_RECOGNITION_DATASET
pd_pregen = DatasetDictionary(os.path.join(UCF101_ARD_PREGEN_ROOT,"generated_dictionary.csv"), curr_mapping).as_DataFrame()
name_adapter = DatasetDictionary.NameAdapters.UCF_101_ACTION_RECOGNITION_DATASET()

In [55]:
for preset_index in range(1,4):
    train_idx_pd = pandas.read_csv(os.path.join(UCF101_ARD_PREGEN_ROOT,f"ucfTrainTestlist/trainlist0{preset_index}.txt"), sep=" ", header=None, names=["file", "class"])
    train_pd = pd_pregen.loc[pd_pregen["GeneratedFileName"].isin(
                    train_idx_pd["file"].apply(
                        lambda x: name_adapter(
                        str(x)
                        .split("/")[-1]
                        .split(".")[0]
                    )
                    + ".h5"))]

    test_idx_pd = pandas.read_csv(os.path.join(UCF101_ARD_PREGEN_ROOT,f"ucfTrainTestlist/testlist0{preset_index}.txt"), sep=" ", header=None, names=["file", "class"])
    test_pd = pd_pregen.loc[pd_pregen["GeneratedFileName"].isin(
                    test_idx_pd["file"].apply(
                        lambda x: name_adapter(
                        str(x)
                        .split("/")[-1]
                        .split(".")[0]
                    )
                    + ".h5"))]

    assert len(list(set(train_pd["GeneratedFileName"]) & set(test_pd["GeneratedFileName"]))) == 0

    split_dict[f"preset{preset_index}"]["train"] = list(train_pd.sort_index().index)
    split_dict[f"preset{preset_index}"]["test"] = list(test_pd.sort_index().index)

In [None]:
filepath = os.path.join(UCF101_ARD_PREGEN_ROOT,"train_test_split.json")
with open(filepath, "w") as f:
    f.write(json.JSONEncoder(indent=4).encode(split_dict))

# HMDB51 ARD Train Test Split

In [3]:
split_dict = {
    "preset1" :{
        "train" : [],
        "test" : [],        
        "extra" : [],
    },
    "preset2":{
        "train": [],
        "test": [],        
        "extra": [],
    },
    "preset3":{
        "train": [],
        "test": [],        
        "extra": [],
    }
}

In [4]:
curr_mapping = DatasetDictionary.Mappings.HMDB51_ACTION_RECOGNITION_DATASET
pd_pregen = DatasetDictionary(os.path.join(HMDB51_ARD_PREGEN_ROOT,"generated_dictionary.csv"), curr_mapping).as_DataFrame()
name_adapter = DatasetDictionary.NameAdapters.HMDB_51_ACTION_RECOGNITION_DATASET()

In [7]:
for preset_index in range(1,4):
    train_indices = []
    test_indices = []
    extra_indices = []

    for class_name in importables.constants.HMDB51_DATASET_MAP.values():
        idx_pd = pandas.read_csv(os.path.join(HMDB51_ARD_PREGEN_ROOT,f"testTrainMulti_7030_splits/{class_name}_test_split{preset_index}.txt"),index_col=False, sep=" ", header=None, names=["file", "split"])
        train_idx_pd = idx_pd.loc[idx_pd["split"] == 1]
        test_idx_pd = idx_pd.loc[idx_pd["split"] == 2]
        extra_idx_pd = idx_pd.loc[idx_pd["split"] == 0]

        train_idx_pd = train_idx_pd["file"].apply(
                            lambda x: name_adapter(
                            str(x)[:-4]
                        )
                        + ".h5")
        test_idx_pd = test_idx_pd["file"].apply(
                            lambda x: name_adapter(
                            str(x)[:-4]
                        )
                        + ".h5")
        
        extra_idx_pd = extra_idx_pd["file"].apply(
                            lambda x: name_adapter(
                            str(x)[:-4]
                        )
                        + ".h5")

        train_pd = pd_pregen.loc[pd_pregen["GeneratedFileName"].isin(train_idx_pd)]
        test_pd = pd_pregen.loc[pd_pregen["GeneratedFileName"].isin(test_idx_pd)]
        extra_pd = pd_pregen.loc[pd_pregen["GeneratedFileName"].isin(extra_idx_pd)]
        
        assert len(train_pd) == 70
        assert len(test_pd) == 30

        train_indices.extend(list(train_pd.sort_index().index))
        test_indices.extend(list(test_pd.sort_index().index))
        extra_indices.extend(list(extra_pd.sort_index().index))

    assert len(list(set(train_indices) & set(test_indices) & set(extra_indices))) == 0

    split_dict[f"preset{preset_index}"]["train"] = list(sorted(train_indices))
    split_dict[f"preset{preset_index}"]["test"] = list(sorted(test_indices))
    split_dict[f"preset{preset_index}"]["extra"] = list(sorted(extra_indices))

In [None]:
filepath = os.path.join(HMDB51_ARD_PREGEN_ROOT,"train_test_split.json")
with open(filepath, "w") as f:
    f.write(json.JSONEncoder(indent=4).encode(split_dict))