# Autoencoder training

In [6]:
import datetime
import h5py
import keras_preprocessing
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import random
import seaborn as sns
import tensorflow as tf

from PIL import Image
from sys import platform
from tensorflow import keras

sns.set()
sns.set_style("whitegrid")
sns.set_context("paper")
sns.color_palette("hls", 8)

%matplotlib inline

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [9]:
def load_random_hdf5_files(directory, n = None):
    """
    Load random HDF5 files containing images from a specified directory and concatenate them into a single NumPy array.

    Args:
        directory (str): The path to the directory containing HDF5 files.
        n (int): The number of random files to load.

    Returns:
        train_data: A  NumPy array containing all the images from the train dataset of the loaded HDF5 files.
        validation_data: A single NumPy array containing all the images from the validation dataset of the loaded HDF5 files.
        test_data: A single NumPy array containing all the images from the the test dataset of the loaded HDF5 files.

    The function selects 'n' random HDF5 files from the specified 'directory', reads the datasets from each
    file, and concatenates these datasets into a single NumPy array. This array contains all the images from the loaded
    HDF5 files. The function returns three arrays, one each for training, validation and test.

    Note:
    - Ensure that the HDF5 files in the directory have datasets named 'train', 'validation', and  'test' (or adjust accordingly).
    - If 'n' is greater than the number of available HDF5 files in the directory, all available files will be loaded.
    - if 'n' is not specified, all available files will be loaded

    Example usage:
    >>> directory_path = '/path/to/your/directory'
    >>> n = 3
    >>> train_data, val_data, test_data = load_random_hdf5_files(directory_path, n_files_to_load)
    """

    file_list = []
    # List all HDF5 files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".hdf5"):
            file_list.append(os.path.join(directory, filename))
    if not file_list:
        print("No HDF5 files found in the specified directory.")
        return None
    if (n > len(file_list)) or (n is None):
        print(f"Requested to load {n} files, but there are only {len(file_list)} files available.")
        n = len(file_list)

    # Randomly select n files
    random_files = random.sample(file_list, n)
    train_data = []
    validation_data = []
    test_data = []
    for file_path in random_files:
        with h5py.File(file_path, 'r') as hf:
            d1 = np.array(hf["train"])
            d2 = np.array(hf["validation"])
            d3 = np.array(hf["test"])
            train_data.append(np.array(d1))
            validation_data.append(np.array(d2))
            test_data.append(np.array(d3))
    # Concatenate the individual arrays into a single NumPy array
    train_data = np.concatenate(train_data, axis=0)
    validation_data = np.concatenate(validation_data, axis=0)
    test_data = np.concatenate(test_data, axis=0)

    return train_data, validation_data, test_data

In [10]:
if platform == 'darwin':
    images_path = "/Users/lmiguelmartinez/Tesis/datasets/highD/images_1000ms"
else:
    images_path = "/home/lmmartinez/Tesis/datasets/highD/images_1000ms"

train_data, val_data, test_data = load_random_hdf5_files(images_path, n = 1)

  d1 = np.array(hf["train"])
  d2 = np.array(hf["validation"])
  d3 = np.array(hf["test"])


In [12]:
IMAGE_SIZE = train_data[0].shape
BATCH_SIZE = 256

(121, 201)