In [None]:
import os
import shutil
from tqdm import tqdm

OLD_ROOT = "/data/TimeSeriesResearch/datasets/Satya"
NEW_ROOT = "/data/TimeSeriesResearch/datasets/Satya_New"

def restructure_dataset(old_root=OLD_ROOT, new_root=NEW_ROOT):
    os.makedirs(new_root, exist_ok=True)

    for folder in tqdm(os.listdir(old_root), desc="Processing Folders"):
        folder_path = os.path.join(old_root, folder)
        
        if not os.path.isdir(folder_path):
            continue

        for file_name in os.listdir(folder_path):
            if not file_name.lower().endswith(".csv"):
                continue
            
            parts = file_name.split('_')
            if len(parts) >= 3:
                label = '_'.join(parts[2:]).replace('.csv', '')
                index = parts[1]
            else:
                label = "Unknown"
                index = parts[1] if len(parts) > 1 else "0"

            label_folder = os.path.join(new_root, folder, label)
            os.makedirs(label_folder, exist_ok=True)

            new_file_name = f"file_{index}.csv"
            src_path = os.path.join(folder_path, file_name)
            dst_path = os.path.join(label_folder, new_file_name)

            try:
                shutil.copy(src_path, dst_path)
                print(f"Moved: {src_path} -> {dst_path}")
            except Exception as e:
                print(f"Error moving {src_path} to {dst_path}: {e}")

    print("Restructuring complete.")

restructure_dataset()


In [None]:
import os
import h5py
import pandas as pd
from tqdm import tqdm

ROOT_DIRS = [
    '/data/TimeSeriesResearch/datasets/Satya_New'
]

H5_FILE = '/data/TimeSeriesResearch/datasets/satya_new_data1.h5'

def create_h5py_dataset_from_root_dirs_v2(root_dirs=ROOT_DIRS, h5_file=H5_FILE):
    with h5py.File(h5_file, "w") as h5f:
        for root_dir in root_dirs:
            root_prefix = os.path.basename(os.path.normpath(root_dir))

            for folder in tqdm(os.listdir(root_dir), desc=f"Processing {root_dir}"):
                folder_path = os.path.join(root_dir, folder)
                if not os.path.isdir(folder_path):
                    continue
                #For labels
                for subfolder in os.listdir(folder_path):
                    subfolder_path = os.path.join(folder_path, subfolder)
                    if not os.path.isdir(subfolder_path):
                        continue

                    for csv_file in os.listdir(subfolder_path):
                        if not csv_file.lower().endswith(".csv"):
                            continue

                        try:
                            group_path = f"{root_prefix}/{folder}/{subfolder}"
                            group = h5f.require_group(group_path)

                            csv_path = os.path.join(subfolder_path, csv_file)
                            df = pd.read_csv(csv_path)

                            drop_columns = ['Unnamed: 0', 'time_sec', 'Time', 'Label']
                            df = df.drop(columns=[col for col in drop_columns if col in df.columns], errors='ignore')

                            column_names = df.columns.tolist()
                            data_array = df.to_numpy()
                            dataset_name = f"file_{len(group)}"
                            dset = group.create_dataset(dataset_name, data=data_array)
                            dset.attrs["descriptions"] = list(column_names)

                            print(f"Created dataset at: {group_path}/{dataset_name}")

                        except Exception as e:
                            print(f"Error processing {csv_path}: {e}")
                            continue

create_h5py_dataset_from_root_dirs_v2()


In [None]:
from collections import defaultdict
from ftse.data.Dataset import UnwindowedDataset
from tqdm.auto import tqdm
import os
import h5py
import numpy as np
import pandas as pd
import torch

def load_h5py_dataset(h5_file=H5_FILE, window_size=None, stride=None, concat=False):
    datasets = defaultdict(list)
    def recursive_group_traversal(group, path=""):
        for key in group.keys():
            item = group[key]

            if isinstance(item, h5py.Group):
                print("item: ", item)
                recursive_group_traversal(item, path + "/" + key)
            elif isinstance(item, h5py.Dataset):
                label = path.split("/")[-1]
                dataset_name = "/".join(path.split("/")[1:-1])

                datasets[dataset_name].append(
                    UnwindowedDataset(
                        data=item,
                        dataset_name=dataset_name,
                        descriptions=item.attrs.get("descriptions", []),
                        label=label
                    )
                )

    f = h5py.File(h5_file, 'r')
    recursive_group_traversal(f)

    if window_size and stride:
        for dataset_name in datasets:
            datasets[dataset_name] = [
                dataset.window(window_size=window_size, stride=stride)
                for dataset in datasets[dataset_name]
            ]

    if concat:
        for dataset_name in datasets:
            datasets[dataset_name] = torch.utils.data.ConcatDataset(
                datasets[dataset_name]
            )

    return datasets

In [None]:
datasets = load_h5py_dataset(H5_FILE, window_size=128, stride=1, concat=True)

In [None]:
datasets.keys()