# Deep and Reinforcement Learning 2024/2025 (M.IA003), FEUP/FCUP

## Deep Learning Project 
## **Develop deep learning discriminative and generative models, applied to the context of “deep fakes”**

work done by:
- Michal Kowalski up
- Pedro Pereira up201708807
- Pedro Azevedo up201905966

## 1.1) Import Necessary

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import glob
from _model import *
from tensorflow.keras.models import load_model
import json

# tf.keras.mixed_precision.set_global_policy('mixed_float16')

In [None]:
# Define dataset directories
data_dir = "data"
real_dir = os.path.join(data_dir, "real")
fake_dir = os.path.join(data_dir, "fake")

In [None]:
data_dir = "data/celeba"
real_dir = os.path.join(data_dir, "img_align_celeba")

# import tensorflow_datasets as tfds 

# celeba_builder = tfds.builder('celeb_a')
# celeba_builder.download_and_prepare(download_dir=data_dir)



## 1.2) Basic EDA

In [None]:
from _eda import base_stats

base_stats(real_dir, fake_dir)

## 1.3) File Standardization 

In [None]:
from _preprocess import standardize_files

standardize_files('data/inpainting', (64, 64), '64')
standardize_files('data/text2img', (64, 64), '64')
# standardize_files('data/fake', (64, 64), '64')

In [None]:
real_dir = os.path.join(data_dir, "real_normalized_64")
fake_dir = os.path.join(data_dir, "fake_normalized_64")
base_stats(real_dir, fake_dir)

## 1.4) Baseline

In [None]:
gan = build_gan(label_smoothing=True, wgan=True)

In [None]:
gan.generator.summary()

In [None]:

batch_size = 256
# Get all image file paths

def save_paths(path_list, fname):
    with open(fname, "w") as final:
	    json.dump(path_list, final)

# Define the preprocessing function
def load_and_preprocess(image_path):
    # Read the image file
    image = tf.io.read_file(image_path)

    # Decode the JPEG image
    image = tf.image.decode_jpeg(image, channels=3)

    # Normalize the image to the range [-1, 1]
    image = (tf.cast(image, tf.float32) / 127.5) - 1.0

    return image

def load_paths(train_file, test_file):
    with open(train_file, "r") as train:
	    train_paths = json.load(train)
    with open(test_file, "r") as test:
	    test_paths = json.load(test)
    return train_paths, test_paths

In [None]:
# image_paths = glob.glob("./data/celeba/img_align_celeba_normalized_64/*.jpg")
# train_paths = image_paths[:150000]
# test_paths = image_paths[150000:]
# save_paths(train_paths, 'train.json')
# save_paths(test_paths, 'test.json')

image_paths = glob.glob("./data/real_normalized_64/*.jpg")

#train_paths, test_paths = load_paths('./train.json', './test.json')

# Create the dataset
dataset = tf.data.Dataset.from_tensor_slices(train_paths)
dataset = dataset.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.shuffle(buffer_size=1000).batch(batch_size//2, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

In [None]:
gan.discriminator.summary()

In [None]:
d_losses, e_losses = train_gan(gan, dataset, epochs=40, batch_size=batch_size)

In [None]:
gan.save('celeba_wgan.keras')

In [None]:
final_d_losses = [float(x) for x in d_losses]
final_g_losses = [float(x) for x in e_losses]
with open('d_losses.json', "w") as f:
    json.dump(final_d_losses, f)

with open('g_losses.json', "w") as f:
    json.dump(final_g_losses, f)

# 1.5) Load Model

In [None]:
gan = keras.models.load_model("celeba_wgan.keras", custom_objects={"GAN": GAN})

In [None]:
gan.summary()

In [None]:
gan.generator.summary()

In [None]:
def generate_image(generator):
    noise = np.random.normal(0, 1, (1, LATENT_DIM))
    gen_images = generator.predict(noise)
    gen_images = (gen_images + 1) / 2  # Rescale images to [0,1]
    plt.imshow(gen_images[0])
    plt.show()

generate_image(gan.generator)


# 1.6) Test Generator

In [None]:
insight_paths = glob.glob("./data/fake_normalized_64/*.jpg")
inpainting_paths = glob.glob("./data/inpainting_normalized_64/*.jpg")
text2img_paths = glob.glob("./data/text2img_normalized_64/*.jpg")

_, real_paths = load_paths('./train.json', './test.json')

# Create the dataset
dataset_real = tf.data.Dataset.from_tensor_slices(real_paths)
dataset_real = dataset_real.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
dataset_real = dataset_real.shuffle(buffer_size=1000).batch(batch_size//2, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

dataset_insight = tf.data.Dataset.from_tensor_slices(insight_paths)
dataset_insight = dataset_insight.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
dataset_insight = dataset_insight.shuffle(buffer_size=1000).batch(batch_size//2, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

dataset_inpainting = tf.data.Dataset.from_tensor_slices(inpainting_paths)
dataset_inpainting = dataset_inpainting.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
dataset_inpainting = dataset_inpainting.shuffle(buffer_size=1000).batch(batch_size//2, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

dataset_text2img = tf.data.Dataset.from_tensor_slices(text2img_paths)
dataset_text2img = dataset_text2img.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
dataset_text2img = dataset_text2img.shuffle(buffer_size=1000).batch(batch_size//2, drop_remainder=True).prefetch(tf.data.AUTOTUNE)

In [None]:
from _fid import *

def gen_experiments(dataset_real, dataset_insight, dataset_inpainting, dataset_text2img, batch_size, num_batches=10):
    gen_fids = []
    insight_fids = []
    inpainting_fids = []
    text2img_fids = []
    for i in range(num_batches):

        noise = np.random.normal(0, 1, (batch_size, LATENT_DIM))
        gen_images = gan.generator.predict(noise)

        real_images = next(iter(dataset_real.take(1)))[0].numpy()
        real_images = real_images.astype(np.float32)

        insight_images = next(iter(dataset_insight.take(1)))[0].numpy()
        insight_images = insight_images.astype(np.float32)

        inpainting_images = next(iter(dataset_inpainting.take(1)))[0].numpy()
        inpainting_images = inpainting_images.astype(np.float32)

        text2img_images = next(iter(dataset_text2img.take(1)))[0].numpy()
        text2img_images = text2img_images.astype(np.float32)

        gen_images = img_scaler(gen_images, (75,75,3))
        real_images = img_scaler(real_images, (75,75,3))
        insight_images = img_scaler(insight_images, (75, 75, 3))
        inpainting_images = img_scaler(inpainting_images, (75, 75, 3))
        text2img_images = img_scaler(text2img_images, (75, 75, 3))

        gen_fid = calculate_fid(inception_model, gen_images, real_images)
        gen_fids.append(gen_fid)

        insight_fid = calculate_fid(inception_model, insight_images, real_images)
        insight_fids.append(insight_fid)

        inpainting_fid = calculate_fid(inception_model, inpainting_images, real_images)
        inpainting_fids.append(inpainting_fid)

        text2img_fid = calculate_fid(inception_model, text2img_images, real_images)
        text2img_fids.append(text2img_fid)

    return np.mean(gen_fids), gen_fids, np.mean(insight_fids), insight_fids, np.mean(inpainting_fids), inpainting_fids, np.mean(text2img_fids), text2img_fids

mean_gen, gen_fids, mean_ins, ins_fids, mean_inp, inp_fids, mean_t2i, t2i_fids = gen_experiments(dataset_real, dataset_insight, dataset_inpainting, dataset_text2img, batch_size//2, num_batches=1)

In [None]:
mean_t2i