In [9]:
import os
import random
import pickle

# Set the input and output folder paths
input_folder = 'processed_data_final'
output_folder_train = 'processed_data_final_train'
output_folder_validate = 'processed_data_final_validate'

# Create the output folders if they don't exist
os.makedirs(output_folder_train, exist_ok=True)
os.makedirs(output_folder_validate, exist_ok=True)

# Set the train-validation split ratio
split_ratio = 0.3

# Loop over the pickle files in the input folder
for file_name in os.listdir(input_folder):
    # Load the pickle file
    with open(os.path.join(input_folder, file_name), 'rb') as f:
        device_data = pickle.load(f)
    
    # Get the number of samples to be included in validation set
    num_samples_validate = int(len(device_data) * split_ratio)
    
    # Shuffle the sample IDs
    sample_ids = list(device_data.keys())
    random.shuffle(sample_ids)
    
    # Split the sample IDs into train and validation sets
    sample_ids_train = sample_ids[num_samples_validate:]
    sample_ids_validate = sample_ids[:num_samples_validate]
    
    # Create the train data dictionary
    device_data_train = {}
    for sample_id in sample_ids_train:
        device_data_train[sample_id] = device_data[sample_id]
    
    # Create the validation data dictionary
    device_data_validate = {}
    for sample_id in sample_ids_validate:
        device_data_validate[sample_id] = device_data[sample_id]
    
    # Save the train data dictionary to a pickle file
    with open(os.path.join(output_folder_train, file_name), 'wb') as f:
        pickle.dump(device_data_train, f)
    
    # Save the validation data dictionary to a pickle file
    with open(os.path.join(output_folder_validate, file_name), 'wb') as f:
        pickle.dump(device_data_validate, f)
