In [None]:
import h5py
import os
import pandas as pd
from tqdm import tqdm

H5_FILE = '/data/TimeSeriesResearch/datasets/satya_data1.h5'
OUTPUT_DIR = '/data/TimeSeriesResearch/datasets/Satya_Restored/'

# Specify the folders to be restored
FOLDERS_TO_RESTORE = [
    "Accelerometer", "Airquality_pattern", "BCSV", "California", "Delaware", 
    "Florida", "Gyrometer", "HAR", "IBCSV", "IOCSV", "Illinois", "Maryland", 
    "Michigan", "NewYork", "OCSV", "Texas", "Wyoming", "sleepedf", "turbofan"
]

def restore_selected_folders(h5_file, output_dir, folders_to_restore):
    os.makedirs(output_dir, exist_ok=True)
    
    with h5py.File(h5_file, "r") as h5f:
        # Iterate over each dataset key in the HDF5 file
        for key in tqdm(h5f.keys(), desc="Restoring datasets"):
            # Extract metadata and data
            dset = h5f[key]
            data_array = dset[()]
            column_names = dset.attrs.get("descriptions", [])
            
            # Split the key to reconstruct the original folder structure and filename
            dataset_folder, label, file_idx = key.split('___')
            
            # Only proceed if the folder is in the specified list
            if dataset_folder not in folders_to_restore:
                continue
            
            # Construct the output folder path
            folder_path = os.path.join(output_dir, dataset_folder)
            os.makedirs(folder_path, exist_ok=True)
            
            # Filename construction with label
            csv_filename = f"file_{file_idx}_{label}.csv"
            csv_path = os.path.join(folder_path, csv_filename)
            
            # Convert the data array to a DataFrame
            df = pd.DataFrame(data_array, columns=column_names)
            
            # Save the DataFrame to a CSV file
            df.to_csv(csv_path, index=False)
    
    print(f"Selected folders restored with labels to {output_dir}")

restore_selected_folders(H5_FILE, OUTPUT_DIR, FOLDERS_TO_RESTORE)
