In [2]:
import os
import numpy as np
import h5py
from sklearn.model_selection import train_test_split
from collections import Counter

# Load the dataset in smaller chunks
output_hdf5_path = 'final_datasetv3AllRecords.h5'
chunk_size = 10000  # Define a chunk size that fits into memory

# Lists to store chunks
X_segments_chunks = []
y_labels_chunks = []
X_rp_images_chunks = []
X_gaf_images_chunks = []
X_mtf_images_chunks = []

# Open the HDF5 file and read the dataset in chunks
with h5py.File(output_hdf5_path, 'r') as hdf5_file:
    total_samples = hdf5_file['segments'].shape[0]

    # Read data in chunks
    for i in range(0, total_samples, chunk_size):
        X_segments_chunks.append(hdf5_file['segments'][i:i+chunk_size])
        y_labels_chunks.append(hdf5_file['labels'][i:i+chunk_size])
        X_rp_images_chunks.append(hdf5_file['rp_images'][i:i+chunk_size])
        X_gaf_images_chunks.append(hdf5_file['gaf_images'][i:i+chunk_size])
        X_mtf_images_chunks.append(hdf5_file['mtf_images'][i:i+chunk_size])

# Concatenate the chunks into a single array
X_segments = np.concatenate(X_segments_chunks, axis=0)
y_labels = np.concatenate(y_labels_chunks, axis=0)
X_rp_images = np.concatenate(X_rp_images_chunks, axis=0)
X_gaf_images = np.concatenate(X_gaf_images_chunks, axis=0)
X_mtf_images = np.concatenate(X_mtf_images_chunks, axis=0)

# Ensure we have a sufficient number of samples for each class
class_counts = Counter(y_labels)
total_counts = sum(class_counts.values())

# Calculate the proportions of each class
class_ratios = {cls: count / total_counts for cls, count in class_counts.items()}

# Set the number of samples for the smallest class (VEB in this case)
min_samples_per_class = min(class_counts.values())

# Create a balanced subset
balanced_indices = []
for class_label in class_counts:
    class_indices = np.where(y_labels == class_label)[0]
    balanced_indices.extend(class_indices)

# Select the balanced subset
X_rp_subset = X_rp_images[balanced_indices]
X_gaf_subset = X_gaf_images[balanced_indices]
X_mtf_subset = X_mtf_images[balanced_indices]
y_subset = y_labels[balanced_indices]

# Split the data into training, validation, and test sets
X_rp_train, X_rp_temp, X_gaf_train, X_gaf_temp, X_mtf_train, X_mtf_temp, y_train, y_temp = train_test_split(
    X_rp_subset, X_gaf_subset, X_mtf_subset, y_subset, test_size=0.3, stratify=y_subset, random_state=42)
X_rp_val, X_rp_test, X_gaf_val, X_gaf_test, X_mtf_val, X_mtf_test, y_val, y_test = train_test_split(
    X_rp_temp, X_gaf_temp, X_mtf_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# Save the split data
save_dir = './split_datav5_full'
os.makedirs(save_dir, exist_ok=True)
np.save(os.path.join(save_dir, 'X_rp_train.npy'), X_rp_train)
np.save(os.path.join(save_dir, 'X_gaf_train.npy'), X_gaf_train)
np.save(os.path.join(save_dir, 'X_mtf_train.npy'), X_mtf_train)
np.save(os.path.join(save_dir, 'y_train.npy'), y_train)

np.save(os.path.join(save_dir, 'X_rp_val.npy'), X_rp_val)
np.save(os.path.join(save_dir, 'X_gaf_val.npy'), X_gaf_val)
np.save(os.path.join(save_dir, 'X_mtf_val.npy'), X_mtf_val)
np.save(os.path.join(save_dir, 'y_val.npy'), y_val)

np.save(os.path.join(save_dir, 'X_rp_test.npy'), X_rp_test)
np.save(os.path.join(save_dir, 'X_gaf_test.npy'), X_gaf_test)
np.save(os.path.join(save_dir, 'X_mtf_test.npy'), X_mtf_test)
np.save(os.path.join(save_dir, 'y_test.npy'), y_test)

print(f"Data successfully split and saved in directory: {save_dir}")

# Check the number of samples per class in the training, validation, and test sets
def count_samples_per_class(y):
    class_counts = Counter(y)
    for cls, count in class_counts.items():
        print(f"Class {cls}: {count} samples")

print("Number of samples per class in the training set:")
count_samples_per_class(y_train)

print("\nNumber of samples per class in the validation set:")
count_samples_per_class(y_val)

print("\nNumber of samples per class in the test set:")
count_samples_per_class(y_test)


: 

In [1]:
#using smaller chunks
import os
import numpy as np
import h5py
from sklearn.model_selection import train_test_split
from collections import Counter

# Load the dataset in smaller chunks
output_hdf5_path = 'final_datasetv3AllRecords.h5'
chunk_size = 5000  # Define a smaller chunk size to fit into memory

# Lists to store chunks
X_segments_chunks = []
y_labels_chunks = []
X_rp_images_chunks = []
X_gaf_images_chunks = []
X_mtf_images_chunks = []

# Open the HDF5 file and read the dataset in chunks
with h5py.File(output_hdf5_path, 'r') as hdf5_file:
    total_samples = hdf5_file['segments'].shape[0]

    # Read data in chunks
    for i in range(0, total_samples, chunk_size):
        X_segments_chunks.append(hdf5_file['segments'][i:i+chunk_size])
        y_labels_chunks.append(hdf5_file['labels'][i:i+chunk_size])
        X_rp_images_chunks.append(hdf5_file['rp_images'][i:i+chunk_size])
        X_gaf_images_chunks.append(hdf5_file['gaf_images'][i:i+chunk_size])
        X_mtf_images_chunks.append(hdf5_file['mtf_images'][i:i+chunk_size])

# Concatenate the chunks into a single array
X_segments = np.concatenate(X_segments_chunks, axis=0)
y_labels = np.concatenate(y_labels_chunks, axis=0)
X_rp_images = np.concatenate(X_rp_images_chunks, axis=0)
X_gaf_images = np.concatenate(X_gaf_images_chunks, axis=0)
X_mtf_images = np.concatenate(X_mtf_images_chunks, axis=0)

# Ensure we have a sufficient number of samples for each class
class_counts = Counter(y_labels)
total_counts = sum(class_counts.values())

# Calculate the proportions of each class
class_ratios = {cls: count / total_counts for cls, count in class_counts.items()}

# Create a balanced subset
balanced_indices = []
for class_label in class_counts:
    class_indices = np.where(y_labels == class_label)[0]
    balanced_indices.extend(class_indices)

# Select the balanced subset
X_rp_subset = X_rp_images[balanced_indices]
X_gaf_subset = X_gaf_images[balanced_indices]
X_mtf_subset = X_mtf_images[balanced_indices]
y_subset = y_labels[balanced_indices]

# Split the data into training, validation, and test sets
X_rp_train, X_rp_temp, X_gaf_train, X_gaf_temp, X_mtf_train, X_mtf_temp, y_train, y_temp = train_test_split(
    X_rp_subset, X_gaf_subset, X_mtf_subset, y_subset, test_size=0.3, stratify=y_subset, random_state=42)
X_rp_val, X_rp_test, X_gaf_val, X_gaf_test, X_mtf_val, X_mtf_test, y_val, y_test = train_test_split(
    X_rp_temp, X_gaf_temp, X_mtf_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# Save the split data
save_dir = './split_datav5_full'
os.makedirs(save_dir, exist_ok=True)
np.save(os.path.join(save_dir, 'X_rp_train.npy'), X_rp_train)
np.save(os.path.join(save_dir, 'X_gaf_train.npy'), X_gaf_train)
np.save(os.path.join(save_dir, 'X_mtf_train.npy'), X_mtf_train)
np.save(os.path.join(save_dir, 'y_train.npy'), y_train)

np.save(os.path.join(save_dir, 'X_rp_val.npy'), X_rp_val)
np.save(os.path.join(save_dir, 'X_gaf_val.npy'), X_gaf_val)
np.save(os.path.join(save_dir, 'X_mtf_val.npy'), X_mtf_val)
np.save(os.path.join(save_dir, 'y_val.npy'), y_val)

np.save(os.path.join(save_dir, 'X_rp_test.npy'), X_rp_test)
np.save(os.path.join(save_dir, 'X_gaf_test.npy'), X_gaf_test)
np.save(os.path.join(save_dir, 'X_mtf_test.npy'), X_mtf_test)
np.save(os.path.join(save_dir, 'y_test.npy'), y_test)

print(f"Data successfully split and saved in directory: {save_dir}")

# Check the number of samples per class in the training, validation, and test sets
def count_samples_per_class(y):
    class_counts = Counter(y)
    for cls, count in class_counts.items():
        print(f"Class {cls}: {count} samples")

print("Number of samples per class in the training set:")
count_samples_per_class(y_train)

print("\nNumber of samples per class in the validation set:")
count_samples_per_class(y_val)

print("\nNumber of samples per class in the test set:")
count_samples_per_class(y_test)


: 

In [1]:
#process each chunk sequentially
import os
import numpy as np
import h5py
from sklearn.model_selection import train_test_split
from collections import Counter

# Function to process each chunk
def process_chunk(X_rp_images_chunk, X_gaf_images_chunk, X_mtf_images_chunk, y_labels_chunk):
    # Ensure we have a sufficient number of samples for each class
    class_counts = Counter(y_labels_chunk)
    total_counts = sum(class_counts.values())

    # Calculate the proportions of each class
    class_ratios = {cls: count / total_counts for cls, count in class_counts.items()}

    # Create a balanced subset
    balanced_indices = []
    for class_label in class_counts:
        class_indices = np.where(y_labels_chunk == class_label)[0]
        balanced_indices.extend(class_indices)

    # Select the balanced subset
    X_rp_subset = X_rp_images_chunk[balanced_indices]
    X_gaf_subset = X_gaf_images_chunk[balanced_indices]
    X_mtf_subset = X_mtf_images_chunk[balanced_indices]
    y_subset = y_labels_chunk[balanced_indices]

    return X_rp_subset, X_gaf_subset, X_mtf_subset, y_subset

# Lists to store the processed data
X_rp_images_processed = []
X_gaf_images_processed = []
X_mtf_images_processed = []
y_labels_processed = []

# Load the dataset in smaller chunks
output_hdf5_path = 'final_datasetv3AllRecords.h5'
chunk_size = 5000  # Define a smaller chunk size to fit into memory

# Open the HDF5 file and read the dataset in chunks
with h5py.File(output_hdf5_path, 'r') as hdf5_file:
    total_samples = hdf5_file['segments'].shape[0]

    # Read data in chunks
    for i in range(0, total_samples, chunk_size):
        X_rp_images_chunk = hdf5_file['rp_images'][i:i+chunk_size]
        X_gaf_images_chunk = hdf5_file['gaf_images'][i:i+chunk_size]
        X_mtf_images_chunk = hdf5_file['mtf_images'][i:i+chunk_size]
        y_labels_chunk = hdf5_file['labels'][i:i+chunk_size]

        # Process each chunk
        X_rp_subset, X_gaf_subset, X_mtf_subset, y_subset = process_chunk(
            X_rp_images_chunk, X_gaf_images_chunk, X_mtf_images_chunk, y_labels_chunk)
        
        X_rp_images_processed.append(X_rp_subset)
        X_gaf_images_processed.append(X_gaf_subset)
        X_mtf_images_processed.append(X_mtf_subset)
        y_labels_processed.append(y_subset)

# Concatenate the processed chunks into a single array
X_rp_images = np.concatenate(X_rp_images_processed, axis=0)
X_gaf_images = np.concatenate(X_gaf_images_processed, axis=0)
X_mtf_images = np.concatenate(X_mtf_images_processed, axis=0)
y_labels = np.concatenate(y_labels_processed, axis=0)

# Split the data into training, validation, and test sets
X_rp_train, X_rp_temp, X_gaf_train, X_gaf_temp, X_mtf_train, X_mtf_temp, y_train, y_temp = train_test_split(
    X_rp_images, X_gaf_images, X_mtf_images, y_labels, test_size=0.3, stratify=y_labels, random_state=42)
X_rp_val, X_rp_test, X_gaf_val, X_gaf_test, X_mtf_val, X_mtf_test, y_val, y_test = train_test_split(
    X_rp_temp, X_gaf_temp, X_mtf_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# Save the split data
save_dir = './split_datav5_full'
os.makedirs(save_dir, exist_ok=True)
np.save(os.path.join(save_dir, 'X_rp_train.npy'), X_rp_train)
np.save(os.path.join(save_dir, 'X_gaf_train.npy'), X_gaf_train)
np.save(os.path.join(save_dir, 'X_mtf_train.npy'), X_mtf_train)
np.save(os.path.join(save_dir, 'y_train.npy'), y_train)

np.save(os.path.join(save_dir, 'X_rp_val.npy'), X_rp_val)
np.save(os.path.join(save_dir, 'X_gaf_val.npy'), X_gaf_val)
np.save(os.path.join(save_dir, 'X_mtf_val.npy'), X_mtf_val)
np.save(os.path.join(save_dir, 'y_val.npy'), y_val)

np.save(os.path.join(save_dir, 'X_rp_test.npy'), X_rp_test)
np.save(os.path.join(save_dir, 'X_gaf_test.npy'), X_gaf_test)
np.save(os.path.join(save_dir, 'X_mtf_test.npy'), X_mtf_test)
np.save(os.path.join(save_dir, 'y_test.npy'), y_test)

print(f"Data successfully split and saved in directory: {save_dir}")

# Check the number of samples per class in the training, validation, and test sets
def count_samples_per_class(y):
    class_counts = Counter(y)
    for cls, count in class_counts.items():
        print(f"Class {cls}: {count} samples")

print("Number of samples per class in the training set:")
count_samples_per_class(y_train)

print("\nNumber of samples per class in the validation set:")
count_samples_per_class(y_val)

print("\nNumber of samples per class in the test set:")
count_samples_per_class(y_test)


: 

In [1]:
#Process and Save Chunks Independently:

import os
import numpy as np
import h5py
from collections import Counter
from sklearn.model_selection import train_test_split

# Function to process each chunk and save it to disk
def process_and_save_chunk(X_rp_images_chunk, X_gaf_images_chunk, X_mtf_images_chunk, y_labels_chunk, chunk_id, save_dir):
    # Ensure we have a sufficient number of samples for each class
    class_counts = Counter(y_labels_chunk)
    total_counts = sum(class_counts.values())

    # Calculate the proportions of each class
    class_ratios = {cls: count / total_counts for cls, count in class_counts.items()}

    # Create a balanced subset
    balanced_indices = []
    for class_label in class_counts:
        class_indices = np.where(y_labels_chunk == class_label)[0]
        balanced_indices.extend(class_indices)

    # Select the balanced subset
    X_rp_subset = X_rp_images_chunk[balanced_indices]
    X_gaf_subset = X_gaf_images_chunk[balanced_indices]
    X_mtf_subset = X_mtf_images_chunk[balanced_indices]
    y_subset = y_labels_chunk[balanced_indices]

    # Save the chunk to disk
    np.save(os.path.join(save_dir, f'X_rp_chunk_{chunk_id}.npy'), X_rp_subset)
    np.save(os.path.join(save_dir, f'X_gaf_chunk_{chunk_id}.npy'), X_gaf_subset)
    np.save(os.path.join(save_dir, f'X_mtf_chunk_{chunk_id}.npy'), X_mtf_subset)
    np.save(os.path.join(save_dir, f'y_chunk_{chunk_id}.npy'), y_subset)

# Define directories
save_dir = './split_datav5_full'
os.makedirs(save_dir, exist_ok=True)

# Load the dataset in smaller chunks
output_hdf5_path = 'final_datasetv3AllRecords.h5'
chunk_size = 5000  # Define a smaller chunk size to fit into memory

# Open the HDF5 file and read the dataset in chunks
with h5py.File(output_hdf5_path, 'r') as hdf5_file:
    total_samples = hdf5_file['segments'].shape[0]

    # Read data in chunks
    for i in range(0, total_samples, chunk_size):
        X_rp_images_chunk = hdf5_file['rp_images'][i:i+chunk_size]
        X_gaf_images_chunk = hdf5_file['gaf_images'][i:i+chunk_size]
        X_mtf_images_chunk = hdf5_file['mtf_images'][i:i+chunk_size]
        y_labels_chunk = hdf5_file['labels'][i:i+chunk_size]

        # Process and save each chunk
        process_and_save_chunk(X_rp_images_chunk, X_gaf_images_chunk, X_mtf_images_chunk, y_labels_chunk, i // chunk_size, save_dir)

# Concatenate the processed chunks from disk
def load_and_concatenate_chunks(chunk_prefix, save_dir):
    chunks = []
    for chunk_file in sorted(os.listdir(save_dir)):
        if chunk_file.startswith(chunk_prefix):
            chunks.append(np.load(os.path.join(save_dir, chunk_file)))
    return np.concatenate(chunks, axis=0)

X_rp_images = load_and_concatenate_chunks('X_rp_chunk_', save_dir)
X_gaf_images = load_and_concatenate_chunks('X_gaf_chunk_', save_dir)
X_mtf_images = load_and_concatenate_chunks('X_mtf_chunk_', save_dir)
y_labels = load_and_concatenate_chunks('y_chunk_', save_dir)

# Split the data into training, validation, and test sets
X_rp_train, X_rp_temp, X_gaf_train, X_gaf_temp, X_mtf_train, X_mtf_temp, y_train, y_temp = train_test_split(
    X_rp_images, X_gaf_images, X_mtf_images, y_labels, test_size=0.3, stratify=y_labels, random_state=42)
X_rp_val, X_rp_test, X_gaf_val, X_gaf_test, X_mtf_val, X_mtf_test, y_val, y_test = train_test_split(
    X_rp_temp, X_gaf_temp, X_mtf_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# Save the split data
np.save(os.path.join(save_dir, 'X_rp_train.npy'), X_rp_train)
np.save(os.path.join(save_dir, 'X_gaf_train.npy'), X_gaf_train)
np.save(os.path.join(save_dir, 'X_mtf_train.npy'), X_mtf_train)
np.save(os.path.join(save_dir, 'y_train.npy'), y_train)

np.save(os.path.join(save_dir, 'X_rp_val.npy'), X_rp_val)
np.save(os.path.join(save_dir, 'X_gaf_val.npy'), X_gaf_val)
np.save(os.path.join(save_dir, 'X_mtf_val.npy'), X_mtf_val)
np.save(os.path.join(save_dir, 'y_val.npy'), y_val)

np.save(os.path.join(save_dir, 'X_rp_test.npy'), X_rp_test)
np.save(os.path.join(save_dir, 'X_gaf_test.npy'), X_gaf_test)
np.save(os.path.join(save_dir, 'X_mtf_test.npy'), X_mtf_test)
np.save(os.path.join(save_dir, 'y_test.npy'), y_test)

print(f"Data successfully split and saved in directory: {save_dir}")

# Check the number of samples per class in the training, validation, and test sets
def count_samples_per_class(y):
    class_counts = Counter(y)
    for cls, count in class_counts.items():
        print(f"Class {cls}: {count} samples")

print("Number of samples per class in the training set:")
count_samples_per_class(y_train)

print("\nNumber of samples per class in the validation set:")
count_samples_per_class(y_val)

print("\nNumber of samples per class in the test set:")
count_samples_per_class(y_test)


OSError: Not enough free space to write 2440705212 bytes

In [1]:
#save data in a compressed format:
import os
import numpy as np
import h5py
from collections import Counter
from sklearn.model_selection import train_test_split

# Function to process each chunk and save it to disk
def process_and_save_chunk(X_rp_images_chunk, X_gaf_images_chunk, X_mtf_images_chunk, y_labels_chunk, chunk_id, save_dir):
    # Ensure we have a sufficient number of samples for each class
    class_counts = Counter(y_labels_chunk)
    total_counts = sum(class_counts.values())

    # Calculate the proportions of each class
    class_ratios = {cls: count / total_counts for cls, count in class_counts.items()}

    # Create a balanced subset
    balanced_indices = []
    for class_label in class_counts:
        class_indices = np.where(y_labels_chunk == class_label)[0]
        balanced_indices.extend(class_indices)

    # Select the balanced subset
    X_rp_subset = X_rp_images_chunk[balanced_indices]
    X_gaf_subset = X_gaf_images_chunk[balanced_indices]
    X_mtf_subset = X_mtf_images_chunk[balanced_indices]
    y_subset = y_labels_chunk[balanced_indices]

    # Save the chunk to disk in compressed format
    np.savez_compressed(os.path.join(save_dir, f'chunk_{chunk_id}.npz'),
                        X_rp=X_rp_subset, X_gaf=X_gaf_subset, X_mtf=X_mtf_subset, y=y_subset)

# Define directories
save_dir = './split_datav5_full_compressed'
os.makedirs(save_dir, exist_ok=True)

# Load the dataset in smaller chunks
output_hdf5_path = 'final_datasetv3AllRecords.h5'
chunk_size = 5000  # Define a smaller chunk size to fit into memory

# Open the HDF5 file and read the dataset in chunks
with h5py.File(output_hdf5_path, 'r') as hdf5_file:
    total_samples = hdf5_file['segments'].shape[0]

    # Read data in chunks
    for i in range(0, total_samples, chunk_size):
        X_rp_images_chunk = hdf5_file['rp_images'][i:i+chunk_size]
        X_gaf_images_chunk = hdf5_file['gaf_images'][i:i+chunk_size]
        X_mtf_images_chunk = hdf5_file['mtf_images'][i:i+chunk_size]
        y_labels_chunk = hdf5_file['labels'][i:i+chunk_size]

        # Process and save each chunk
        process_and_save_chunk(X_rp_images_chunk, X_gaf_images_chunk, X_mtf_images_chunk, y_labels_chunk, i // chunk_size, save_dir)

# Concatenate the processed chunks from disk
def load_and_concatenate_chunks(save_dir):
    X_rp_chunks = []
    X_gaf_chunks = []
    X_mtf_chunks = []
    y_chunks = []

    for chunk_file in sorted(os.listdir(save_dir)):
        if chunk_file.endswith('.npz'):
            data = np.load(os.path.join(save_dir, chunk_file))
            X_rp_chunks.append(data['X_rp'])
            X_gaf_chunks.append(data['X_gaf'])
            X_mtf_chunks.append(data['X_mtf'])
            y_chunks.append(data['y'])

    X_rp_images = np.concatenate(X_rp_chunks, axis=0)
    X_gaf_images = np.concatenate(X_gaf_chunks, axis=0)
    X_mtf_images = np.concatenate(X_mtf_chunks, axis=0)
    y_labels = np.concatenate(y_chunks, axis=0)

    return X_rp_images, X_gaf_images, X_mtf_images, y_labels

X_rp_images, X_gaf_images, X_mtf_images, y_labels = load_and_concatenate_chunks(save_dir)

# Split the data into training, validation, and test sets
X_rp_train, X_rp_temp, X_gaf_train, X_gaf_temp, X_mtf_train, X_mtf_temp, y_train, y_temp = train_test_split(
    X_rp_images, X_gaf_images, X_mtf_images, y_labels, test_size=0.3, stratify=y_labels, random_state=42)
X_rp_val, X_rp_test, X_gaf_val, X_gaf_test, X_mtf_val, X_mtf_test, y_val, y_test = train_test_split(
    X_rp_temp, X_gaf_temp, X_mtf_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# Save the split data in compressed format
np.savez_compressed(os.path.join(save_dir, 'train_data.npz'),
                    X_rp=X_rp_train, X_gaf=X_gaf_train, X_mtf=X_mtf_train, y=y_train)
np.savez_compressed(os.path.join(save_dir, 'val_data.npz'),
                    X_rp=X_rp_val, X_gaf=X_gaf_val, X_mtf=X_mtf_val, y=y_val)
np.savez_compressed(os.path.join(save_dir, 'test_data.npz'),
                    X_rp=X_rp_test, X_gaf=X_gaf_test, X_mtf=X_mtf_test, y=y_test)

print(f"Data successfully split and saved in directory: {save_dir}")

# Check the number of samples per class in the training, validation, and test sets
def count_samples_per_class(y):
    class_counts = Counter(y)
    for cls, count in class_counts.items():
        print(f"Class {cls}: {count} samples")

print("Number of samples per class in the training set:")
count_samples_per_class(y_train)

print("\nNumber of samples per class in the validation set:")
count_samples_per_class(y_val)

print("\nNumber of samples per class in the test set:")
count_samples_per_class(y_test)


Data successfully split and saved in directory: ./split_datav5_full_compressed
Number of samples per class in the training set:
Class 1: 4906 samples
Class 3: 63056 samples
Class 0: 1947 samples
Class 2: 561 samples
Class 4: 10 samples

Number of samples per class in the validation set:
Class 3: 13512 samples
Class 0: 417 samples
Class 1: 1051 samples
Class 2: 120 samples
Class 4: 3 samples

Number of samples per class in the test set:
Class 3: 13512 samples
Class 0: 417 samples
Class 2: 121 samples
Class 1: 1051 samples
Class 4: 2 samples
