## Step 1: Import Necessary Libraries


In [None]:
import os
import numpy as np
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models


## Step 2: Define a Data Generator for Efficient Loading


In [None]:
def npz_image_generator(folder, batch_size=32, img_size=(128, 128)):
    files = [f for f in os.listdir(folder) if f.endswith('.npz')]
    num_files = len(files)
    
    while True:
        for i in range(0, num_files, batch_size):
            batch_files = files[i:i + batch_size]
            batch_images = []
            batch_blurry_images = []
            
            for file in batch_files:
                data = np.load(os.path.join(folder, file))
                color_images = data['colorImages']  # (231, 237, 3, num_images)
                
                for j in range(color_images.shape[3]):
                    img = color_images[:, :, :, j]
                    img_resized = cv2.resize(img, img_size)
                    img_resized = img_resized / 255.0  # Normalize to [0, 1]
                    batch_images.append(img_resized)
                    
                    # Apply Gaussian blur to simulate low-quality CCTV footage
                    blurry_img = cv2.GaussianBlur(img_resized, (5, 5), 0)
                    batch_blurry_images.append(blurry_img)
            
            yield (np.array(batch_blurry_images), np.array(batch_images))  # (blurry, high-res)


## Step 3: Define the Advanced Autoencoder Model for Image Reconstruction


In [None]:
def build_advanced_autoencoder(input_shape=(128, 128, 3)):
    encoder_input = layers.Input(shape=input_shape)

    # Encoder
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(encoder_input)
    x = layers.MaxPooling2D((2, 2), padding='same')(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2), padding='same')(x)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    encoded = layers.MaxPooling2D((2, 2), padding='same')(x)

    # Decoder
    x = layers.Conv2DTranspose(256, (3, 3), activation='relu', padding='same')(encoded)
    x = layers.UpSampling2D((2, 2))(x)
    x = layers.Conv2DTranspose(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    x = layers.Conv2DTranspose(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.UpSampling2D((2, 2))(x)
    
    decoded = layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

    autoencoder = models.Model(encoder_input, decoded)
    return autoencoder


## Step 4: Compile the Model


In [None]:
# Compile the autoencoder
autoencoder = build_advanced_autoencoder(input_shape=(128, 128, 3))
autoencoder.compile(optimizer='adam', loss='mse')  # Mean Squared Error for image reconstruction
autoencoder.summary()


## Step 5: Create a Training Pipeline using Data Generators


In [None]:
# Define dataset folder path
dataset_folder = 'dataset/'

# Create the train generator
train_generator = npz_image_generator(dataset_folder, batch_size=32, img_size=(128, 128))

# Define the training dataset from the generator
def create_dataset(generator, batch_size=32):
    return tf.data.Dataset.from_generator(
        generator,
        output_signature=(
            tf.TensorSpec(shape=(None, 128, 128, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(None, 128, 128, 3), dtype=tf.float32)
        )
    ).batch(batch_size).prefetch(tf.data.AUTOTUNE)

train_dataset = create_dataset(lambda: npz_image_generator(dataset_folder, batch_size=32))


## Step 6: Train the Autoencoder


In [None]:
# Train the model
history = autoencoder.fit(train_dataset, epochs=50, steps_per_epoch=200)


## Step 7: Visualize Reconstruction Results


In [None]:
# Function to visualize the original (blurry) and reconstructed images
def display_reconstructed_images(model, dataset, num_images=5):
    for blurry_images, high_res_images in dataset.take(1):  # Take a single batch
        reconstructed_images = model.predict(blurry_images)
        
        plt.figure(figsize=(15, 5))
        for i in range(num_images):
            # Display original blurry image
            ax = plt.subplot(3, num_images, i + 1)
            plt.imshow(blurry_images[i])
            plt.title("Blurry Image")
            plt.axis("off")
            
            # Display reconstructed image
            ax = plt.subplot(3, num_images, i + 1 + num_images)
            plt.imshow(reconstructed_images[i])
            plt.title("Reconstructed Image")
            plt.axis("off")
            
            # Display high-res image (ground truth)
            ax = plt.subplot(3, num_images, i + 1 + 2 * num_images)
            plt.imshow(high_res_images[i])
            plt.title("Original High-Res Image")
            plt.axis("off")
        plt.show()

# Visualize some reconstructions
display_reconstructed_images(autoencoder, train_dataset, num_images=5)


## Step 8: (Optional) Save the Model for Future Inference


In [None]:
# Save the model
autoencoder.save('face_reconstruction_autoencoder.h5')

# Load the model (later for inference)
# from tensorflow.keras.models import load_model
# autoencoder = load_model('face_reconstruction_autoencoder.h5')


## Step 9: (Optional) Convert .npz Files to TFRecords for Future Efficiency


In [None]:
import tensorflow as tf

def convert_to_tfrecord(npz_folder, output_file):
    with tf.io.TFRecordWriter(output_file) as writer:
        for file in os.listdir(npz_folder):
            if file.endswith('.npz'):
                data = np.load(os.path.join(npz_folder, file))
                color_images = data['colorImages']
                
                for j in range(color_images.shape[3]):
                    img = color_images[:, :, :, j]
                    img_bytes = tf.io.encode_jpeg(img.astype(np.uint8)).numpy()
                    
                    feature = {
                        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_bytes]))
                    }
                    example = tf.train.Example(features=tf.train.Features(feature=feature))
                    writer.write(example.SerializeToString())

# Convert dataset to TFRecords
convert_to_tfrecord('dataset/', 'face_reconstruction.tfrecord')


## Summary of the Approach:
- **Efficient Loading**: We use a data generator to avoid loading the entire dataset into memory at once.

- **Model Architecture**: An advanced autoencoder architecture is used for reconstructing faces.

- **Training Pipeline**: tf.data API is used to efficiently manage large datasets and ensure fast data loading.

- **Visualization**: We visualize the results to compare the blurry input images with the reconstructed high-resolution ones.

- **Optimization**: Optionally, converting .npz files to TFRecords speeds up file I/O operations in future training sessions.

*optimized for both memory and speed when dealing with large datasets for CCTV footage enhancement and reconstruction.*