## 10 folds cross-validation: 80/% train - 10/% validation - 10%test

In [1]:
import os
import numpy as np
import shutil
from sklearn.model_selection import StratifiedKFold, train_test_split

def prepare_and_save_folds(train_dir, test_dir, folds_dir, id, n_splits_outer=10):
    
    if os.path.exists(folds_dir):
        shutil.rmtree(folds_dir)
        print(f"Carpeta '{folds_dir}' eliminada.")
    os.makedirs(folds_dir)
    print(f"Carpeta '{folds_dir}' creada.")

    train_files = [f for f in os.listdir(train_dir) if f.endswith('_X.npy')]
    labels = [0 if 'AVPEPUDEAC' in f else 1 for f in train_files]

    test_files = [f for f in os.listdir(test_dir) if f.endswith('_X.npy')]
    test_labels = [0 if 'AVPEPUDEAC' in f else 1 for f in test_files]
    print(train_files==test_files)

    skf_outer = StratifiedKFold(n_splits=n_splits_outer, shuffle=True)

    for fold_idx, (train_val_index, test_index) in enumerate(skf_outer.split(train_files, labels)):
        # Print the test files for the current fold
        print(f"Fold {fold_idx + 1} Test Files:")
        for i in test_index:
            print(f"File: {test_files[i]}, Label: {test_labels[i]}")

        # Load test data
        test_data = [np.load(os.path.join(test_dir, test_files[i])) for i in test_index]
        test_data = np.vstack(test_data)
        test_labels_fold = np.array([test_labels[i] for i in test_index])
        test_labels_fold = np.repeat(test_labels_fold, test_data.shape[0] // len(test_labels_fold))
        test_data, test_labels_fold = shuffle_data(test_data, test_labels_fold)  # Shuffle test data


        # Assign data to train and validation using the inner split
        train_index, val_index = train_test_split(train_val_index, test_size=10, stratify=[labels[i] for i in train_val_index], shuffle=True)

        # Print the train files for the current fold
        print(f"Fold {fold_idx + 1} Train Files:")
        for i in train_index:
            print(f"File: {train_files[i]}, Label: {labels[i]}")

        # Print the validation files for the current fold
        print(f"Fold {fold_idx + 1} Validation Files:")
        for i in val_index:
            print(f"File: {train_files[i]}, Label: {labels[i]}")

        train_data = [np.load(os.path.join(train_dir, train_files[i])) for i in train_index]
        train_data = np.vstack(train_data)
        train_labels = np.array([labels[i] for i in train_index])
        train_labels = np.repeat(train_labels, train_data.shape[0] // len(train_labels))
        train_data, train_labels = shuffle_data(train_data, train_labels)  # Shuffle train data

        val_data = [np.load(os.path.join(train_dir, train_files[i])) for i in val_index]
        val_data = np.vstack(val_data)
        val_labels = np.array([labels[i] for i in val_index])
        val_labels = np.repeat(val_labels, val_data.shape[0] // len(val_labels))
        val_data, val_labels = shuffle_data(val_data, val_labels)


        # Save the data to .npy files
        np.save(os.path.join(folds_dir, f'X_train_{id}_fold_{fold_idx+1}.npy'), train_data)
        np.save(os.path.join(folds_dir, f'y_train_{id}_fold_{fold_idx+1}.npy'), train_labels)
        np.save(os.path.join(folds_dir, f'X_val_{id}_fold_{fold_idx+1}.npy'), val_data)
        np.save(os.path.join(folds_dir, f'y_val_{id}_fold_{fold_idx+1}.npy'), val_labels)
        np.save(os.path.join(folds_dir, f'X_test_{id}_fold_{fold_idx+1}.npy'), test_data)
        np.save(os.path.join(folds_dir, f'y_test_{id}_fold_{fold_idx+1}.npy'), test_labels_fold)

    print("Archivos .npy guardados para cada fold en la carpeta de folds.")

def shuffle_data(data, labels):
    shuffle_indices = np.random.permutation(len(labels))
    data = data[shuffle_indices]
    labels = labels[shuffle_indices]
    return data, labels

In [2]:
prepare_and_save_folds(
    train_dir='TFM_MartaRey/datos/sets/generated_together_train_40_1e5',
    test_dir='TFM_MartaRey/datos/sets/test_together',
    folds_dir='TFM_MartaRey/datos/sets/folds_10/files_40_1e5',
    id='40_1e5'
)

Carpeta 'TFM_MartaRey/datos/sets/folds_10/files_40_1e5' creada.
True
Fold 1 Test Files:
File: AVPEPUDEA0049_X.npy, Label: 1
File: AVPEPUDEAC0022_X.npy, Label: 0
File: AVPEPUDEA0014_X.npy, Label: 1
File: AVPEPUDEAC0047_X.npy, Label: 0
File: AVPEPUDEAC0037_X.npy, Label: 0
File: AVPEPUDEA0032_X.npy, Label: 1
File: AVPEPUDEAC0049_X.npy, Label: 0
File: AVPEPUDEA0041_X.npy, Label: 1
File: AVPEPUDEA0015_X.npy, Label: 1
File: AVPEPUDEAC0048_X.npy, Label: 0
Fold 1 Train Files:
File: AVPEPUDEA0002_X.npy, Label: 1
File: AVPEPUDEAC0008_X.npy, Label: 0
File: AVPEPUDEAC0011_X.npy, Label: 0
File: AVPEPUDEAC0029_X.npy, Label: 0
File: AVPEPUDEAC0013_X.npy, Label: 0
File: AVPEPUDEA0038_X.npy, Label: 1
File: AVPEPUDEAC0015_X.npy, Label: 0
File: AVPEPUDEAC0042_X.npy, Label: 0
File: AVPEPUDEAC0051_X.npy, Label: 0
File: AVPEPUDEA0005_X.npy, Label: 1
File: AVPEPUDEA0022_X.npy, Label: 1
File: AVPEPUDEA0056_X.npy, Label: 1
File: AVPEPUDEA0013_X.npy, Label: 1
File: AVPEPUDEA0054_X.npy, Label: 1
File: AVPEPUDEA0

In [3]:
prepare_and_save_folds(
    train_dir='TFM_MartaRey/datos/sets/generated_together_train_40_1e5_N',
    test_dir='TFM_MartaRey/datos/sets/test_together_N',
    folds_dir='TFM_MartaRey/datos/sets/folds_10/files_40_1e5_N',
    id='40_1e5_N'
)

Carpeta 'TFM_MartaRey/datos/sets/folds_10/files_40_1e5_N' creada.
True
Fold 1 Test Files:
File: AVPEPUDEA0054_X.npy, Label: 1
File: AVPEPUDEA0020_X.npy, Label: 1
File: AVPEPUDEAC0050_X.npy, Label: 0
File: AVPEPUDEA0039_X.npy, Label: 1
File: AVPEPUDEAC0045_X.npy, Label: 0
File: AVPEPUDEA0025_X.npy, Label: 1
File: AVPEPUDEAC0057_X.npy, Label: 0
File: AVPEPUDEA0006_X.npy, Label: 1
File: AVPEPUDEAC0048_X.npy, Label: 0
File: AVPEPUDEAC0041_X.npy, Label: 0
Fold 1 Train Files:
File: AVPEPUDEAC0012_X.npy, Label: 0
File: AVPEPUDEAC0016_X.npy, Label: 0
File: AVPEPUDEAC0051_X.npy, Label: 0
File: AVPEPUDEAC0040_X.npy, Label: 0
File: AVPEPUDEA0046_X.npy, Label: 1
File: AVPEPUDEA0008_X.npy, Label: 1
File: AVPEPUDEAC0017_X.npy, Label: 0
File: AVPEPUDEAC0047_X.npy, Label: 0
File: AVPEPUDEAC0025_X.npy, Label: 0
File: AVPEPUDEAC0019_X.npy, Label: 0
File: AVPEPUDEAC0021_X.npy, Label: 0
File: AVPEPUDEAC0035_X.npy, Label: 0
File: AVPEPUDEA0021_X.npy, Label: 1
File: AVPEPUDEA0016_X.npy, Label: 1
File: AVPEP

In [4]:
prepare_and_save_folds(
    train_dir='TFM_MartaRey/datos/sets/generated_together_train_40_1e6',
    test_dir='TFM_MartaRey/datos/sets/test_together',
    folds_dir='TFM_MartaRey/datos/sets/folds_10/files_40_1e6',
    id='40_1e6'
)

Carpeta 'TFM_MartaRey/datos/sets/folds_10/files_40_1e6' creada.
True
Fold 1 Test Files:
File: AVPEPUDEA0049_X.npy, Label: 1
File: AVPEPUDEAC0033_X.npy, Label: 0
File: AVPEPUDEAC0014_X.npy, Label: 0
File: AVPEPUDEA0050_X.npy, Label: 1
File: AVPEPUDEA0048_X.npy, Label: 1
File: AVPEPUDEA0008_X.npy, Label: 1
File: AVPEPUDEA0056_X.npy, Label: 1
File: AVPEPUDEAC0008_X.npy, Label: 0
File: AVPEPUDEAC0003_X.npy, Label: 0
File: AVPEPUDEAC0043_X.npy, Label: 0
Fold 1 Train Files:
File: AVPEPUDEAC0030_X.npy, Label: 0
File: AVPEPUDEAC0050_X.npy, Label: 0
File: AVPEPUDEAC0010_X.npy, Label: 0
File: AVPEPUDEA0007_X.npy, Label: 1
File: AVPEPUDEAC0015_X.npy, Label: 0
File: AVPEPUDEA0054_X.npy, Label: 1
File: AVPEPUDEAC0028_X.npy, Label: 0
File: AVPEPUDEA0045_X.npy, Label: 1
File: AVPEPUDEA0023_X.npy, Label: 1
File: AVPEPUDEAC0035_X.npy, Label: 0
File: AVPEPUDEA0001_X.npy, Label: 1
File: AVPEPUDEAC0031_X.npy, Label: 0
File: AVPEPUDEA0059_X.npy, Label: 1
File: AVPEPUDEA0055_X.npy, Label: 1
File: AVPEPUDEA0

In [5]:
prepare_and_save_folds(
    train_dir='TFM_MartaRey/datos/sets/generated_together_train_40_1e6_N',
    test_dir='TFM_MartaRey/datos/sets/test_together_N',
    folds_dir='TFM_MartaRey/datos/sets/folds_10/files_40_1e6_N',
    id='40_1e6_N'
)

Carpeta 'TFM_MartaRey/datos/sets/folds_10/files_40_1e6_N' creada.
True
Fold 1 Test Files:
File: AVPEPUDEAC0033_X.npy, Label: 0
File: AVPEPUDEA0003_X.npy, Label: 1
File: AVPEPUDEAC0025_X.npy, Label: 0
File: AVPEPUDEA0055_X.npy, Label: 1
File: AVPEPUDEAC0057_X.npy, Label: 0
File: AVPEPUDEA0007_X.npy, Label: 1
File: AVPEPUDEA0015_X.npy, Label: 1
File: AVPEPUDEAC0015_X.npy, Label: 0
File: AVPEPUDEAC0041_X.npy, Label: 0
File: AVPEPUDEA0053_X.npy, Label: 1
Fold 1 Train Files:
File: AVPEPUDEAC0018_X.npy, Label: 0
File: AVPEPUDEA0011_X.npy, Label: 1
File: AVPEPUDEAC0053_X.npy, Label: 0
File: AVPEPUDEAC0004_X.npy, Label: 0
File: AVPEPUDEA0017_X.npy, Label: 1
File: AVPEPUDEA0013_X.npy, Label: 1
File: AVPEPUDEA0034_X.npy, Label: 1
File: AVPEPUDEA0029_X.npy, Label: 1
File: AVPEPUDEA0031_X.npy, Label: 1
File: AVPEPUDEAC0040_X.npy, Label: 0
File: AVPEPUDEAC0008_X.npy, Label: 0
File: AVPEPUDEA0024_X.npy, Label: 1
File: AVPEPUDEA0050_X.npy, Label: 1
File: AVPEPUDEAC0045_X.npy, Label: 0
File: AVPEPUDEA

In [7]:
a = np.load('TFM_MartaRey/datos/sets/folds_10/files_40_1e5_N/X_test_40_1e5_N_fold_5.npy')