## Import libraries and define dataset path



In [2]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.applications import VGG16
from tensorflow.keras.losses import MeanSquaredError

dataset_path = 'dataset/'  # Adjusted to point to the new folder


---

## Optimized Approach


### Batch Loading with Image Generators

In [3]:
import os
import numpy as np
import tensorflow as tf
import cv2

def npz_image_generator(folder, batch_size=32, img_size=(128, 128)):
    files = [f for f in os.listdir(folder) if f.endswith('.npz')]
    num_files = len(files)
    
    while True:
        for i in range(0, num_files, batch_size):
            batch_files = files[i:i + batch_size]
            batch_images = []
            batch_blurry_images = []
            
            for file in batch_files:
                data = np.load(os.path.join(folder, file))
                color_images = data['colorImages']  # Shape (height, width, channels, num_images)
                
                for j in range(color_images.shape[3]):
                    # Extract image and preprocess
                    img = color_images[:, :, :, j]
                    img_resized = cv2.resize(img, img_size)
                    img_resized = img_resized / 255.0  # Normalize
                    batch_images.append(img_resized)
                    
                    # Create blurry image
                    blurry_img = cv2.GaussianBlur(img_resized, (5, 5), 0)
                    batch_blurry_images.append(blurry_img)
                    
            yield (np.array(batch_blurry_images), np.array(batch_images))  # Return blurry and high-quality images

# Path to dataset
dataset_path = 'dataset/'  # Adjust to point to your dataset folder

# Example usage of the generator
train_generator = npz_image_generator(dataset_path, batch_size=32)


### Training Using tf.keras Image Data Pipeline

In [None]:
def create_dataset(generator, batch_size=32):
    return tf.data.Dataset.from_generator(
        generator,
        output_signature=(
            tf.TensorSpec(shape=(None, 128, 128, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(None, 128, 128, 3), dtype=tf.float32)
        )
    ).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

# Create dataset object
train_dataset = create_dataset(lambda: npz_image_generator(dataset_path, batch_size=32))

# Build your advanced autoencoder model (same as before)
advanced_autoencoder = build_advanced_autoencoder((128, 128, 3))

# Compile the model
advanced_autoencoder.compile(optimizer='adam', loss='mse')

# Train the model with the generator
history = advanced_autoencoder.fit(train_dataset, epochs=50, steps_per_epoch=200)


### Saving Preprocessed Data for Reuse

In [None]:
import h5py

def save_preprocessed_data_as_h5(folder, output_file, img_size=(128, 128)):
    with h5py.File(output_file, 'w') as h5file:
        for file in os.listdir(folder):
            if file.endswith('.npz'):
                data = np.load(os.path.join(folder, file))
                color_images = data['colorImages']
                for i in range(color_images.shape[3]):
                    img = color_images[:, :, :, i]
                    img_resized = cv2.resize(img, img_size)
                    img_resized = img_resized / 255.0  # Normalize

                    # Save to HDF5
                    dataset_name = f"{file}_{i}"
                    h5file.create_dataset(dataset_name, data=img_resized)

# Save preprocessed data
save_preprocessed_data_as_h5(dataset_path, 'preprocessed_data.h5')


### Loading Data from HDF5

In [None]:
def h5_image_generator(h5_file, batch_size=32):
    with h5py.File(h5_file, 'r') as h5file:
        dataset_names = list(h5file.keys())
        num_samples = len(dataset_names)
        
        while True:
            for i in range(0, num_samples, batch_size):
                batch_datasets = dataset_names[i:i + batch_size]
                batch_images = []
                batch_blurry_images = []
                
                for dataset_name in batch_datasets:
                    img_resized = h5file[dataset_name][:]
                    batch_images.append(img_resized)
                    
                    # Create blurry image
                    blurry_img = cv2.GaussianBlur(img_resized, (5, 5), 0)
                    batch_blurry_images.append(blurry_img)
                
                yield (np.array(batch_blurry_images), np.array(batch_images))

# Example usage
h5_train_generator = h5_image_generator('preprocessed_data.h5', batch_size=32)
train_dataset = tf.data.Dataset.from_generator(
    lambda: h5_train_generator,
    output_signature=(
        tf.TensorSpec(shape=(None, 128, 128, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, 128, 128, 3), dtype=tf.float32)
    )
).batch(32).prefetch(buffer_size=tf.data.AUTOTUNE)


---

## Load dataset function and load the images



In [None]:
# Load data from all npz files in the dataset
def load_dataset(folder):
    images = []
    for file in os.listdir(folder):
        if file.endswith('.npz'):
            data = np.load(os.path.join(folder, file))
            color_images = data['colorImages']  # Shape (height, width, channels, num_images)
            for i in range(color_images.shape[3]):
                images.append(color_images[:, :, :, i])  # Extract and append each image
    return np.array(images)

# Load all images
images = load_dataset(dataset_path)


## Preprocess images



In [None]:
# Preprocess the images: resize and normalize
def preprocess_images(images, img_size=(128, 128)):
    resized_images = []
    for img in images:
        img_resized = cv2.resize(img, img_size)
        img_resized = img_resized / 255.0  # Normalize to range [0, 1]
        resized_images.append(img_resized)
    return np.array(resized_images)

# Apply preprocessing
images = preprocess_images(images)


## Create blurry images



In [None]:
# Create blurry images by applying Gaussian blur (this simulates low-quality input)
def blur_images(images):
    blurred_images = []
    for img in images:
        blurred_img = cv2.GaussianBlur(img, (5, 5), 0)  # Apply Gaussian blur
        blurred_images.append(blurred_img)
    return np.array(blurred_images)

# Create the blurry versions
blurred_images = blur_images(images)


## Split the dataset



In [None]:
# Split into train and validation sets
x_train, x_val, y_train, y_val = train_test_split(blurred_images, images, test_size=0.1, random_state=42)


## Define the advanced model architecture



In [None]:
### Advanced Model Architecture ###
# Residual Block for better learning of features
def residual_block(x, filters):
    skip = x
    x = layers.Conv2D(filters, (3, 3), activation='relu', padding='same')(x)
    x = layers.Conv2D(filters, (3, 3), padding='same')(x)
    x = layers.add([x, skip])
    return x

# U-Net Architecture with residual connections
def build_advanced_autoencoder(input_shape):
    # Encoder
    input_img = layers.Input(shape=input_shape)
    
    # Down-sampling with residual blocks
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
    x = layers.MaxPooling2D((2, 2), padding='same')(x)
    x = residual_block(x, 64)
    
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2), padding='same')(x)
    x = residual_block(x, 128)
    
    # Bottleneck
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    
    # Decoder with skip connections (U-Net style)
    x = layers.Conv2DTranspose(128, (3, 3), strides=2, activation='relu', padding='same')(x)
    x = residual_block(x, 128)
    
    x = layers.Conv2DTranspose(64, (3, 3), strides=2, activation='relu', padding='same')(x)
    x = residual_block(x, 64)
    
    # Output layer
    x = layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
    
    return models.Model(input_img, x)


## Build and compile the model



In [None]:
# Build the advanced autoencoder model
advanced_autoencoder = build_advanced_autoencoder((128, 128, 3))

# Compile the model with MSE Loss and Adam optimizer
advanced_autoencoder.compile(optimizer='adam', loss='mse')


## Train the model



In [None]:
# Train the model
history = advanced_autoencoder.fit(x_train, y_train, epochs=50, batch_size=32, validation_data=(x_val, y_val))

# Save the trained model
advanced_autoencoder.save('advanced_face_reconstruction_autoencoder.h5')


## Plot training history



In [None]:
# Visualize the training process
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.show()


## Test the model on validation samples



In [None]:
# Test the model on some validation samples
def test_advanced_autoencoder(model, blurred_images, original_images, num_samples=5):
    reconstructed_images = model.predict(blurred_images[:num_samples])
    for i in range(num_samples):
        # Plot original, blurry, and reconstructed images side by side
        plt.figure(figsize=(10, 4))
        plt.subplot(1, 3, 1)
        plt.imshow(original_images[i])
        plt.title('Original')
        plt.axis('off')

        plt.subplot(1, 3, 2)
        plt.imshow(blurred_images[i])
        plt.title('Blurry')
        plt.axis('off')

        plt.subplot(1, 3, 3)
        plt.imshow(reconstructed_images[i])
        plt.title('Reconstructed')
        plt.axis('off')
        plt.show()

# Test the model on the validation set
test_advanced_autoencoder(advanced_autoencoder, x_val, y_val)


dummy


In [6]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from multiprocessing import Pool

dataset_path = 'dataset/'  # Adjusted to point to the new folder

# Load data from npz files in the dataset with resizing and normalization
def load_and_preprocess_single_file(file, img_size=(128, 128)):
    data = np.load(file)
    color_images = data['colorImages']
    resized_images = [cv2.resize(img, img_size) / 255.0 for img in color_images]
    return np.array(resized_images)

def preprocess_file(file):
    return load_and_preprocess_single_file(file)

# Load all images in parallel
def load_all_images_parallel(folder, img_size=(128, 128)):
    files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.npz')]
    with Pool() as pool:
        images = pool.map(preprocess_file, files)  # Use the defined function here
    return np.concatenate(images, axis=0)  # Concatenate along the first axis

# Load all images
images = load_all_images_parallel(dataset_path)

# Create blurry images by applying Gaussian blur
def blur_images(images):
    return np.array([cv2.GaussianBlur(img, (5, 5), 0) for img in images])

# Create the blurry versions
blurred_images = blur_images(images)

# Split into train and validation sets
x_train, x_val, y_train, y_val = train_test_split(blurred_images, images, test_size=0.1, random_state=42)

### Advanced Model Architecture ###
# Residual Block for better learning of features
def residual_block(x, filters):
    skip = x
    x = layers.Conv2D(filters, (3, 3), activation='relu', padding='same')(x)
    x = layers.Conv2D(filters, (3, 3), padding='same')(x)
    x = layers.add([x, skip])
    return x

# U-Net Architecture with residual connections
def build_advanced_autoencoder(input_shape):
    input_img = layers.Input(shape=input_shape)
    
    # Down-sampling with residual blocks
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
    x = layers.MaxPooling2D((2, 2), padding='same')(x)
    x = residual_block(x, 64)
    
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2), padding='same')(x)
    x = residual_block(x, 128)
    
    # Bottleneck
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    
    # Decoder with skip connections (U-Net style)
    x = layers.Conv2DTranspose(128, (3, 3), strides=2, activation='relu', padding='same')(x)
    x = residual_block(x, 128)
    
    x = layers.Conv2DTranspose(64, (3, 3), strides=2, activation='relu', padding='same')(x)
    x = residual_block(x, 64)
    
    # Output layer
    x = layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
    
    return models.Model(input_img, x)

# Build the advanced autoencoder model
advanced_autoencoder = build_advanced_autoencoder((128, 128, 3))

# Compile the model with MSE Loss and Adam optimizer
advanced_autoencoder.compile(optimizer='adam', loss='mse')

# Train the model
history = advanced_autoencoder.fit(x_train, y_train, epochs=50, batch_size=32, validation_data=(x_val, y_val))

# Save the trained model
advanced_autoencoder.save('advanced_face_reconstruction_autoencoder.h5')

# Visualize the training process
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.show()

# Test the model on some validation samples
def test_advanced_autoencoder(model, blurred_images, original_images, num_samples=5):
    reconstructed_images = model.predict(blurred_images[:num_samples])
    for i in range(num_samples):
        # Plot original, blurry, and reconstructed images side by side
        plt.figure(figsize=(10, 4))
        plt.subplot(1, 3, 1)
        plt.imshow(original_images[i])
        plt.title('Original')
        plt.axis('off')

        plt.subplot(1, 3, 2)
        plt.imshow(blurred_images[i])
        plt.title('Blurry')
        plt.axis('off')

        plt.subplot(1, 3, 3)
        plt.imshow(reconstructed_images[i])
        plt.title('Reconstructed')
        plt.axis('off')
        plt.show()

# Test the model on the validation set
test_advanced_autoencoder(advanced_autoencoder, x_val, y_val)
