# 0. Ready for Dataset

In [None]:
from google.colab import files
uploaded = files.upload()

!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c gan-getting-started
!unzip gan-getting-started.zip -d /content/gan/

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import os
import zipfile
from PIL import Image

In [None]:
np.random.seed(20211001)
tf.random.set_seed(20211001)

# 1. Problem & Data

## 1.1 Problem Definition

* The problem is to build a GAN model that transforms a general photograph into the artistic style of Claude Monet while preserving the structure of the original photo.

* A GAN (Generative Adversarial Network) is an architecture where two neural networks, a Generator and a Discriminator, learn by competing against each other.

 * The Generator creates realistic-looking fake data.

 * The Discriminator is trained to distinguish between the fake data created by the Generator and real data, with the goal of differentiating real Monet paintings from the fake data.

 * Why is it an 'Adversarial' Network? Because the Generator continuously creates more realistic data to fool the Discriminator, and the Discriminator, in turn, gets better at telling real from fake to avoid being fooled. As this 'competition' repeats, the Generator becomes capable of producing highly realistic results.**("Korean-To-English Translation")**

## 1-2. Data Description

* Data Format: Provided in two formats: TFRecord (.tfrec) and JPEG (.jpg).

* Dimensions: 256x256 pixels with 3 color channels (RGB).

* Data Structure and Size:

 * monet_jpg / monet_tfrec: 300 images (for training)

 * photo_jpg / photo_tfrec: 7,038 images (photos to be transformed into the Monet style)**("Korean-To-English Translation")**

# 2. EDA

In [None]:
MONET_FILENAMES = tf.io.gfile.glob('/content/gan/monet_tfrec/*.tfrec')
PHOTO_FILENAMES = tf.io.gfile.glob('/content/gan/photo_tfrec/*.tfrec')

In [None]:
IMAGE_SIZE = [256, 256]
BATCH_SIZE = 64
AUTOTUNE = tf.data.AUTOTUNE

def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    image = (image / 127.5) - 1.0
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

def read_tfrecord(example):
    tfrecord_format = {'image': tf.io.FixedLenFeature([], tf.string)}
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example['image'])
    return image

def load_dataset(filenames):
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(read_tfrecord, num_parallel_calls=AUTOTUNE)
    dataset = dataset.cache()
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

monet_dataset = load_dataset(MONET_FILENAMES)
photo_dataset = load_dataset(PHOTO_FILENAMES)

In [None]:
n_monet_samples = len(list(monet_dataset.unbatch().take(300)))

In [None]:
def display_images(dataset, n=5, title="Images"):
    plt.figure(figsize=(15, 3))
    for i, image in enumerate(dataset.take(n)):
        if i >= n:
            break
        plt.subplot(1, n, i + 1)
        plt.imshow((image[0] + 1) / 2)
        plt.axis('off')
    plt.suptitle(title, fontsize=16)
    plt.show()

display_images(monet_dataset, n=5, title="Sample Monet Paintings")
display_images(photo_dataset, n=5, title="Sample Photos")

# 3. Architecture

* Generator : It begins with a 100-dimensional random noise vector, which is reshaped into an 8x8 image. It then repeatedly uses Conv2DTranspose layers to upsample the image, ultimately creating a 256x256 color image.

 * BatchNorm : This normalizes the input data for each layer to have a mean of 0 and a variance of 1. It helps solve the "Internal Covariate Shift" problem, which occurs because the distribution of each layer's input data changes unstably as the weights of previous layers are updated during training.

 * LeakyReLU : The standard ReLU function, max(0, x), causes the "Dying ReLU" problem where learning stops because the gradient becomes zero for negative inputs. LeakyReLU fixes this by multiplying negative inputs by a very small constant instead of outputting zero.

* Discriminator : It takes a 256x256 image (real or fake) as input and uses a series of Conv2D layers to continuously reduce the image size while extracting key features. Finally, it outputs the probability of the image being real. **("Korean-To-English Translation")**

In [None]:
LATENT_DIM = 100

def build_generator():
    model = keras.Sequential([
        layers.Dense(8 * 8 * 512, input_shape=(LATENT_DIM,)),
        layers.Reshape((8, 8, 512)),

        layers.Conv2DTranspose(256, kernel_size=5, strides=2, padding='same'),
        layers.BatchNormalization(),
        layers.LeakyReLU(alpha=0.2),

        layers.Conv2DTranspose(128, kernel_size=5, strides=2, padding='same'),
        layers.BatchNormalization(),
        layers.LeakyReLU(alpha=0.2),

        layers.Conv2DTranspose(64, kernel_size=5, strides=2, padding='same'),
        layers.BatchNormalization(),
        layers.LeakyReLU(alpha=0.2),

        layers.Conv2DTranspose(32, kernel_size=5, strides=2, padding='same'),
        layers.BatchNormalization(),
        layers.LeakyReLU(alpha=0.2),

        layers.Conv2DTranspose(16, kernel_size=5, strides=2, padding='same'),
        layers.BatchNormalization(),
        layers.LeakyReLU(alpha=0.2),

        layers.Conv2D(3, kernel_size=5, padding='same', activation='tanh')
    ], name='generator')
    return model

In [None]:
def build_discriminator():
    model = keras.Sequential([
        layers.Conv2D(32, kernel_size=5, strides=2, padding='same', input_shape=[256, 256, 3]),
        layers.LeakyReLU(alpha=0.2),
        layers.Dropout(0.3),

        layers.Conv2D(64, kernel_size=5, strides=2, padding='same'),
        layers.BatchNormalization(),
        layers.LeakyReLU(alpha=0.2),
        layers.Dropout(0.3),

        layers.Conv2D(128, kernel_size=5, strides=2, padding='same'),
        layers.BatchNormalization(),
        layers.LeakyReLU(alpha=0.2),
        layers.Dropout(0.3),

        layers.Conv2D(256, kernel_size=5, strides=2, padding='same'),
        layers.BatchNormalization(),
        layers.LeakyReLU(alpha=0.2),
        layers.Dropout(0.3),

        layers.Conv2D(512, kernel_size=5, strides=2, padding='same'),
        layers.BatchNormalization(),
        layers.LeakyReLU(alpha=0.2),
        layers.Dropout(0.3),

        layers.Flatten(),
        layers.Dense(1, activation='sigmoid')
    ], name='discriminator')
    return model

In [None]:
generator = build_generator()
discriminator = build_discriminator()

generator.summary()
discriminator.summary()

In [None]:
cross_entropy = keras.losses.BinaryCrossentropy()

def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    return real_loss + fake_loss

def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

generator_optimizer = keras.optimizers.Adam(1e-4)
discriminator_optimizer = keras.optimizers.Adam(1e-4)

In [None]:
def generate_and_save_images(model, epoch, test_input):
    predictions = model(test_input, training=False)

    fig = plt.figure(figsize=(10, 10))
    for i in range(predictions.shape[0]):
        plt.subplot(4, 4, i+1)
        plt.imshow((predictions[i] + 1) / 2)
        plt.axis('off')

    plt.suptitle(f'Generated Images at Epoch {epoch}', fontsize=16)
    plt.tight_layout()
    plt.show()

In [None]:
EPOCHS = 100
num_examples_to_generate = 16
D_TRAIN_FREQ = 5
DISC_LOSS_THRESHOLD = 0.5

steps_per_epoch = n_monet_samples // BATCH_SIZE
seed = tf.random.normal([num_examples_to_generate, LATENT_DIM])

gen_losses = []
disc_losses = []

for epoch in range(EPOCHS):
    gen_loss_epoch = []
    disc_loss_epoch = []

    progbar = keras.utils.Progbar(steps_per_epoch)

    for i, image_batch in enumerate(monet_dataset.take(steps_per_epoch)):
        train_discriminator = True

        if i % D_TRAIN_FREQ != 0:
            train_discriminator = False

        if len(disc_loss_epoch) > 0 and np.mean(disc_loss_epoch[-5:]) < DISC_LOSS_THRESHOLD:
            train_discriminator = False

        noise = tf.random.normal([BATCH_SIZE, LATENT_DIM])

        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            generated_images = generator(noise, training=True)

            real_output = discriminator(image_batch, training=True)
            fake_output = discriminator(generated_images, training=True)

            gen_loss = generator_loss(fake_output)
            disc_loss = discriminator_loss(real_output, fake_output)

        gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
        generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))

        if train_discriminator:
            gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
            discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

        gen_loss_epoch.append(gen_loss)
        disc_loss_epoch.append(disc_loss)
        progbar.update(i+1)

    gen_losses.append(np.mean(gen_loss_epoch))
    disc_losses.append(np.mean(disc_loss_epoch))

    if (epoch + 1) % 10 == 0:
        generate_and_save_images(generator, epoch + 1, seed)
        generator.save_weights(f'/tmp/generator_epoch_{epoch+1}.weights.h5')
        discriminator.save_weights(f'/tmp/discriminator_epoch_{epoch+1}.weights.h5')

# 4. Results

In [None]:
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
plt.plot(gen_losses, label='Generator Loss', linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.title('Generator Loss During Training', fontsize=14)
plt.grid(True, alpha=0.3)
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(disc_losses, label='Discriminator Loss', color='orange', linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.title('Discriminator Loss During Training', fontsize=14)
plt.grid(True, alpha=0.3)
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
n_samples = 25
sample_noise = tf.random.normal([n_samples, LATENT_DIM])
generated_samples = generator(sample_noise, training=False)

plt.figure(figsize=(15, 15))
for i in range(n_samples):
    plt.subplot(5, 5, i+1)
    img = (generated_samples[i] + 1) / 2
    plt.imshow(img)
    plt.axis('off')

plt.suptitle('Final Generated Monet-Style Images', fontsize=20)
plt.tight_layout()
plt.show()

In [None]:
def compare_real_vs_generated(real_dataset, generator, n=5):
    fig, axes = plt.subplots(2, n, figsize=(15, 6))

    for i, real_img in enumerate(real_dataset.take(n)):
        axes[0, i].imshow((real_img[0] + 1) / 2)
        axes[0, i].axis('off')
        if i == 0:
            axes[0, i].set_ylabel('Real Monet', fontsize=14)

    noise = tf.random.normal([n, LATENT_DIM])
    generated = generator(noise, training=False)
    for i in range(n):
        axes[1, i].imshow((generated[i] + 1) / 2)
        axes[1, i].axis('off')
        if i == 0:
            axes[1, i].set_ylabel('Generated', fontsize=14)

    plt.suptitle('Real Monet Paintings vs Generated Images', fontsize=16)
    plt.tight_layout()
    plt.show()

compare_real_vs_generated(monet_dataset, generator)

In [None]:
base_noise = tf.random.normal([1, LATENT_DIM])
variations = []

for i in range(9):
    variation = base_noise + tf.random.normal([1, LATENT_DIM]) * 0.1
    variations.append(variation)

variations = tf.concat(variations, axis=0)
generated_variations = generator(variations, training=False)

plt.figure(figsize=(12, 12))
for i in range(9):
    plt.subplot(3, 3, i+1)
    plt.imshow((generated_variations[i] + 1) / 2)
    plt.axis('off')

plt.suptitle('Diversity Test: Variations from Similar Noise Vectors', fontsize=16)
plt.tight_layout()
plt.show()

# 5. Conclusion

* Results and Interpretation

 * The Generator Loss oscillates between 0.4 and 1.8, while the Discriminator Loss rises sharply until epoch 20 and then fluctuates widely between 4 and 7.

 * A high Discriminator Loss when the Generator Loss is low is a classic sign of the adversarial training characteristic in GANs.

 * The generated images did not come close to the artistic style of Claude Monet.

* Shortcomings

 * The Discriminator Loss shows high volatility, fluctuating between 4 and 7, which indicates unoptimized training. Furthermore, even after 100 epochs, the loss fails to converge or stabilize, continuing to oscillate. This suggests a need to increase the number of epochs or, if it still fails to stabilize, to introduce a new GAN architecture, improve the Generator and Discriminator, or implement a new loss function.

 * The evaluation relies on subjective visual assessment. However, human perception is subjective; an image might be quantitatively closer to Monet's style even if it doesn't appear so. Therefore, it's necessary to introduce objective metrics like MiFiD, as suggested by the Kaggle Dataset.

* Future Improvements

 * Currently, the Discriminator is trained only once every five steps, which causes large fluctuations in the loss. The training frequency could be dynamically adjusted. For instance, if the Discriminator Loss exceeds 6, it could be trained every step, and if it drops below 2, it could be trained only once every 10 steps.

 * Binary Cross-Entropy can cause the gradient vanishing problem in the early stages of GAN training (Goodfellow et al., 2014). Introducing the Wasserstein Loss, as proposed by Arjovsky et al. (2017) to solve such training instabilities, could mitigate this problem.

 * It seems necessary to adopt the CycleGAN architecture, as introduced in the Kaggle Dataset.**("Korean-To-English Translation")**

# 6. Submission

In [None]:
os.makedirs('/tmp/generated_images', exist_ok=True)

num_images = 7500
batch_size = 50
num_batches = num_images // batch_size

for batch in range(num_batches):
    noise = tf.random.normal([batch_size, LATENT_DIM])
    generated_images = generator(noise, training=False)

    for i, img in enumerate(generated_images):
        img_array = ((img.numpy() + 1) * 127.5).astype(np.uint8)
        img_pil = Image.fromarray(img_array)
        img_pil.save(f'/tmp/generated_images/monet_{batch * batch_size + i:05d}.jpg', quality=95)

with zipfile.ZipFile('images.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
    for i in range(num_images):
        filename = f'monet_{i:05d}.jpg'
        zipf.write(f'/tmp/generated_images/{filename}', filename)

import shutil
shutil.rmtree('/tmp/generated_images')

In [None]:
with zipfile.ZipFile('images.zip', 'r') as zipf:
    file_list = zipf.namelist()
    with zipf.open(file_list[0]) as img_file:
        img = Image.open(img_file)
        plt.figure(figsize=(6, 6))
        plt.imshow(img)
        plt.axis('off')
        plt.title('Sample Submission Image')
        plt.show()

In [None]:
from google.colab import files
files.download('images.zip')

# 7. References

* Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., & Bengio, Y. (2014). Generative adversarial nets. Advances in Neural Information Processing Systems, 27, 2672-2680.

* Arjovsky, M., Chintala, S., & Bottou, L. (2017). Wasserstein generative adversarial networks. Proceedings of the 34th International Conference on Machine Learning, 70, 214-223.

* "Korean-To-English Translation" prompt. Gemini, Google, 29 July 2025, https://g.co/gemini/share/8d0a80f6bc41.

# 8. Github

- https://github.com/mulkib/csca5642.git