In [None]:
# %% Import necessary libraries
import os
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
from skimage.metrics import peak_signal_noise_ratio as psnr, structural_similarity as ssim
from sklearn.metrics import mean_squared_error

In [None]:
# %% Define helper functions
def load_color_image(image_path):
    """
    Load an image in color without resizing.
    """
    img = Image.open(image_path)  # keep original colors and size
    return np.array(img)

In [None]:
# %% Autoencoder with training and testing on the same image each time
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Model

data_dir = Path("../data/Kodak")
image_paths = list(data_dir.glob("*.png"))
if not image_paths:
    raise FileNotFoundError("No images found in data/Kodak folder!")


def rgb2gray(rgb):
    return np.dot(rgb[..., :3], [0.2989, 0.5870, 0.1140])


for i in range(len(image_paths)):
    sample_image_path = image_paths[i]
    print("Using image:", sample_image_path)
    original_image = load_color_image(sample_image_path)

    # Prepare image data
    input_shape = original_image.shape  # use the original image size and 3 channels
    x = original_image.astype("float32") / 255.0  # normalize to [0,1]
    x_input = np.expand_dims(x, axis=0)  # add batch dimension

    # %% Define a simple convolutional autoencoder model for the image shape
    input_img = Input(shape=input_shape)
    # Encoder
    x_enc = Conv2D(32, (3, 3), activation="relu", padding="same")(input_img)
    x_enc = MaxPooling2D((2, 2), padding="same")(x_enc)
    encoded = Conv2D(16, (3, 3), activation="relu", padding="same")(x_enc)
    encoded = MaxPooling2D((2, 2), padding="same")(encoded)
    # Decoder
    x_dec = Conv2D(16, (3, 3), activation="relu", padding="same")(encoded)
    x_dec = UpSampling2D((2, 2))(x_dec)
    x_dec = Conv2D(32, (3, 3), activation="relu", padding="same")(x_dec)
    x_dec = UpSampling2D((2, 2))(x_dec)
    decoded = Conv2D(3, (3, 3), activation="sigmoid", padding="same")(x_dec)

    autoencoder = Model(input_img, decoded)
    autoencoder.compile(optimizer="adam", loss="binary_crossentropy")

    # %% Train the autoencoder on the current image (for demonstration)
    # In practice, train on a larger dataset.
    autoencoder.fit(x_input, x_input, epochs=10, verbose=0)

    # %% Reconstruct the image
    reconstructed = autoencoder.predict(x_input)[0]
    reconstructed_image = (reconstructed * 255).astype(np.uint8)

    # %% Compute evaluation metrics (PSNR, SSIM and MSE) on grayscale version of the images
    original_gray = rgb2gray(original_image)
    reconstructed_gray = rgb2gray(reconstructed_image)
    psnr_value = psnr(original_gray, reconstructed_gray, data_range=255)
    ssim_value = ssim(original_gray, reconstructed_gray, data_range=255)
    mse_value = mean_squared_error(original_gray, reconstructed_gray)
    print("Autoencoder Reconstruction:")
    print("PSNR:", psnr_value)
    print("SSIM:", ssim_value)
    print("MSE:", mse_value)

    # %% Visualize the original and reconstructed images
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    axes[0].imshow(original_image)
    axes[0].set_title("Original Image")
    axes[0].axis("off")

    axes[1].imshow(reconstructed_image)
    axes[1].set_title("Reconstructed (Autoencoder)")
    axes[1].axis("off")

    plt.tight_layout()
    plt.show()
    print("********************************************************")

In [None]:
# %% Train-Test Split and Autoencoder Training on Kodak images with 512x512 target

import os
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Model
from skimage.metrics import peak_signal_noise_ratio as psnr, structural_similarity as ssim
from sklearn.metrics import mean_squared_error


# Helper function to load a color image
def load_color_image(image_path):
    img = Image.open(image_path)
    return np.array(img)


# Define target size as 512x512
TARGET_SIZE = (512, 512)  # 512 width and 512 height are the smallest in the Kodak dataset


# Helper function to load and resize an image to TARGET_SIZE
def load_and_resize(image_path, target_size=TARGET_SIZE):
    image = load_color_image(image_path)
    # Convert image to float32 in [0,1] if needed
    if image.dtype == np.uint8 or image.max() > 1.0:
        image = image.astype("float32") / 255.0
    image_resized = tf.image.resize(image, target_size, method="bilinear").numpy()
    return image_resized


# Helper function to convert an RGB image to grayscale
def rgb2gray(rgb):
    return np.dot(rgb[..., :3], [0.2989, 0.5870, 0.1140])


# Load Kodak image paths
data_dir = Path("../data/Kodak")
image_paths = sorted(list(data_dir.glob("*.png")))
if len(image_paths) < 4:
    raise FileNotFoundError("Need at least 4 images to perform train-test split!")

# Split paths: use all but the last 2 for training, and the last 2 for testing
train_paths = image_paths[:-2]
test_paths = image_paths[-2:]

# Load and resize training images to 512x512
train_images = [load_and_resize(p) for p in train_paths]
input_shape = train_images[0].shape  # Should be (512, 512, channels)
x_train = np.stack(train_images, axis=0)

# Visualize a few training images
fig, axes = plt.subplots(1, min(6, len(train_images)), figsize=(15, 5))
for i, img in enumerate(train_images[:6]):
    axes[i].imshow(img)
    axes[i].set_title(f"Train Image {i+1}")
    axes[i].axis("off")
plt.tight_layout()
plt.show()

print(f"Training on {x_train.shape[0]} images; resized shape: {input_shape}")

# Build the autoencoder model
input_img = Input(shape=input_shape)
# Encoder
x_enc = Conv2D(32, (3, 3), activation="relu", padding="same")(input_img)
x_enc = MaxPooling2D((2, 2), padding="same")(x_enc)
encoded = Conv2D(16, (3, 3), activation="relu", padding="same")(x_enc)
encoded = MaxPooling2D((2, 2), padding="same")(encoded)
# Decoder
x_dec = Conv2D(16, (3, 3), activation="relu", padding="same")(encoded)
x_dec = UpSampling2D((2, 2))(x_dec)
x_dec = Conv2D(32, (3, 3), activation="relu", padding="same")(x_dec)
x_dec = UpSampling2D((2, 2))(x_dec)
decoded = Conv2D(3, (3, 3), activation="sigmoid", padding="same")(x_dec)

autoencoder = Model(input_img, decoded)


# Custom SSIM loss function
def ssim_loss(y_true, y_pred):
    return 1 - tf.reduce_mean(tf.image.ssim(y_true, y_pred, max_val=1.0))


# Compile with Adam
autoencoder.compile(optimizer="adam", loss=ssim_loss)
autoencoder.summary()

# Train the autoencoder
autoencoder.fit(x_train, x_train, epochs=30, batch_size=1, verbose=1)

# %% Test the autoencoder on the 512x512 test images
for test_path in test_paths:
    print("Testing on image:", test_path)

    # Load test image and resize to 512x512
    x_test = load_and_resize(test_path)  # in [0,1]
    x_test_input = np.expand_dims(x_test, axis=0)  # Add batch dimension

    # Predict reconstruction
    reconstructed = autoencoder.predict(x_test_input)[0]
    reconstructed_image = (reconstructed * 255).astype(np.uint8)

    # Evaluation: load original image and resize to TARGET_SIZE
    original_image = load_color_image(test_path)
    if original_image.dtype == np.uint8 or original_image.max() > 1.0:
        original_image = original_image.astype("float32") / 255.0
    original_resized = tf.image.resize(original_image, TARGET_SIZE, method="bilinear").numpy()
    original_resized_uint8 = (original_resized * 255).astype(np.uint8)

    # Convert both images to grayscale
    original_gray = rgb2gray(original_resized_uint8)
    reconstructed_gray = rgb2gray(reconstructed_image)

    # Compute evaluation metrics
    psnr_value = psnr(original_gray, reconstructed_gray, data_range=255)
    ssim_value = ssim(original_gray, reconstructed_gray, data_range=255)
    mse_value = mean_squared_error(original_gray, reconstructed_gray)

    print("Autoencoder Reconstruction Metrics:")
    print("PSNR:", psnr_value)
    print("SSIM:", ssim_value)
    print("MSE:", mse_value)

    # Visualization
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    axes[0].imshow(original_resized_uint8)
    axes[0].set_title("Original Resized")
    axes[0].axis("off")
    axes[1].imshow(reconstructed_image)
    axes[1].set_title("Reconstructed")
    axes[1].axis("off")
    plt.tight_layout()
    plt.show()
    print("********************************************************")