Cycle Generative Adversarial Network

This implementation creates a CycleGAN (Cycle-Consistent Generative Adversarial Network) that transforms regular photographs into Monet-style paintings. The model learns to translate between two image domains without requiring paired training data.

Architecture
The CycleGAN consists of four neural networks:

Generator A→B: Converts photos to Monet-style images
Generator B→A: Converts Monet-style images back to photos
Discriminator A: Distinguishes real photos from generated photos
Discriminator B: Distinguishes real Monet paintings from generated ones

Training Process

Data Loading: Load Monet paintings and photos from GCS buckets
Model Creation: Initialize generators and discriminators
Training Loop:

For each batch, compute forward pass
Calculate adversarial and cycle consistency losses
Update generator and discriminator weights alternately


Inference: Generate Monet-style versions of all photos
Export: Save results and create downloadable archive

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import glob
import numpy as np
from PIL import Image
import shutil
from kaggle_datasets import KaggleDatasets

# ============================================================================
# CONFIGURATION PARAMETERS
# ============================================================================

IMAGE_SIZE = 256    # All images will be resized to 256x256 pixels
BATCH_SIZE = 1      # Process one image at a time (memory efficient for large images)
EPOCHS = 15         # Number of complete passes through the dataset
LAMBDA = 10         # Weight for cycle consistency loss (higher = more faithful reconstruction)

class Config:
    pass  # Placeholder for configuration - no longer needed for output directory

# ============================================================================
# SETUP AND UTILITY FUNCTIONS
# ============================================================================

def setup_gpu():
    """
    Configure GPU usage for optimal performance.
    Enables memory growth to prevent TensorFlow from allocating all GPU memory at once.
    """
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        print(f'Using GPU: {len(gpus)} device(s)')
        # Enable memory growth for each GPU to avoid memory allocation errors
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    else:
        print('Using CPU')

def load_and_preprocess_image(path):
    """
    Load and preprocess a single image for GAN training.
    
    Args:
        path: String path to the image file
        
    Returns:
        Preprocessed image tensor normalized to [-1, 1] range
    """
    # Read image file as raw bytes
    image = tf.io.read_file(path)
    
    # Decode JPEG to tensor with 3 color channels (RGB)
    image = tf.image.decode_jpeg(image, channels=3)
    
    # Convert to float32 for neural network processing
    image = tf.cast(image, tf.float32)
    
    # Resize to consistent dimensions (256x256)
    image = tf.image.resize(image, [IMAGE_SIZE, IMAGE_SIZE])
    
    # Normalize from [0, 255] to [-1, 1] range (standard for GANs)
    # This helps with training stability
    return (image / 127.5) - 1.0

def create_dataset_from_gcs(file_pattern, batch_size=BATCH_SIZE):
    """
    Create a TensorFlow dataset from Google Cloud Storage file pattern.
    
    Args:
        file_pattern: Glob pattern to match files (e.g., 'gs://bucket/folder/*.jpg')
        batch_size: Number of images per batch
        
    Returns:
        tf.data.Dataset: Preprocessed and batched dataset
    """
    # Find all files matching the pattern
    files = tf.io.gfile.glob(file_pattern)
    if not files:
        raise ValueError(f"No files found matching pattern: {file_pattern}")
    
    print(f"Found {len(files)} files matching pattern: {file_pattern}")
    
    # Create dataset from file paths
    dataset = tf.data.Dataset.from_tensor_slices(files)
    
    # Apply preprocessing to each image in parallel for speed
    dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    
    # Batch the data and prefetch next batches while processing current batch
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# ============================================================================
# NEURAL NETWORK BUILDING BLOCKS
# ============================================================================

def conv_block(filters, size, strides=2, norm=True, activation='leaky', transpose=False):
    """
    Create a reusable convolutional block for generators and discriminators.
    
    Args:
        filters: Number of output channels/filters
        size: Kernel size (typically 4x4)
        strides: Stride for convolution (2 = downsample, 1 = same size)
        norm: Whether to apply batch normalization
        activation: Type of activation ('leaky' or 'relu')
        transpose: If True, use transposed convolution for upsampling
        
    Returns:
        tf.keras.Sequential: Complete convolutional block
    """
    block = tf.keras.Sequential()
    
    if transpose:  
        # Transposed convolution for upsampling (decoder/generator)
        block.add(tf.keras.layers.Conv2DTranspose(filters, size, strides, padding='same', use_bias=False))
    else:  
        # Regular convolution for downsampling (encoder/discriminator)
        block.add(tf.keras.layers.Conv2D(filters, size, strides, padding='same', use_bias=False))
    
    if norm:
        # Batch normalization helps with training stability
        # Note: Usually skipped in the first discriminator layer
        block.add(tf.keras.layers.BatchNormalization())
    
    # Choose activation function
    if activation == 'leaky':
        # LeakyReLU allows small negative values (good for discriminators)
        block.add(tf.keras.layers.LeakyReLU(0.2))
    elif activation == 'relu':
        # Standard ReLU (good for generators)
        block.add(tf.keras.layers.ReLU())
    
    return block

def build_generator():
    """
    Build a U-Net style generator for image-to-image translation.
    
    U-Net architecture:
    - Encoder: Downsamples input image to capture high-level features
    - Decoder: Upsamples back to original size while preserving spatial details
    - Skip connections: Connect encoder to decoder to preserve spatial information
    
    Returns:
        tf.keras.Model: Generator model that transforms images between domains
    """
    # Input: 256x256x3 image
    inputs = tf.keras.layers.Input([IMAGE_SIZE, IMAGE_SIZE, 3])
    
    # ========== ENCODER (DOWNSAMPLING PATH) ==========
    # Each layer reduces spatial dimensions by half, increases channels
    d1 = conv_block(64, 4, norm=False)(inputs)     # 128x128x64 (no norm in first layer)
    d2 = conv_block(128, 4)(d1)                    # 64x64x128
    d3 = conv_block(256, 4)(d2)                    # 32x32x256
    d4 = conv_block(512, 4)(d3)                    # 16x16x512
    d5 = conv_block(512, 4)(d4)                    # 8x8x512 (bottleneck)
    
    # ========== DECODER (UPSAMPLING PATH) ==========
    # Each layer doubles spatial dimensions, includes skip connections
    
    # First decoder layer: 8x8 -> 16x16
    u1 = conv_block(512, 4, strides=2, activation='relu', transpose=True)(d5)  # 16x16x512
    u1 = tf.keras.layers.Concatenate()([u1, d4])    # Skip connection: combine with d4
    
    # Second decoder layer: 16x16 -> 32x32
    u2 = conv_block(256, 4, strides=2, activation='relu', transpose=True)(u1)  # 32x32x256
    u2 = tf.keras.layers.Concatenate()([u2, d3])    # Skip connection: combine with d3
    
    # Third decoder layer: 32x32 -> 64x64
    u3 = conv_block(128, 4, strides=2, activation='relu', transpose=True)(u2)  # 64x64x128
    u3 = tf.keras.layers.Concatenate()([u3, d2])    # Skip connection: combine with d2
    
    # Fourth decoder layer: 64x64 -> 128x128
    u4 = conv_block(64, 4, strides=2, activation='relu', transpose=True)(u3)   # 128x128x64
    u4 = tf.keras.layers.Concatenate()([u4, d1])    # Skip connection: combine with d1
    
    # ========== OUTPUT LAYER ==========
    # Final layer: 128x128 -> 256x256x3
    # Tanh activation produces output in [-1, 1] range (matching input normalization)
    output = tf.keras.layers.Conv2DTranspose(3, 4, strides=2, padding='same', activation='tanh')(u4)
    
    return tf.keras.Model(inputs, output)

def build_discriminator():
    """
    Build a PatchGAN discriminator for image classification.
    
    Unlike traditional discriminators that output a single real/fake prediction,
    PatchGAN outputs predictions for overlapping patches of the image.
    This focuses on local texture patterns rather than global structure.
    
    Returns:
        tf.keras.Model: Discriminator model that classifies image patches
    """
    # Input: 256x256x3 image
    inputs = tf.keras.layers.Input([IMAGE_SIZE, IMAGE_SIZE, 3])
    
    # ========== DOWNSAMPLING LAYERS ==========
    # Each layer reduces spatial size and increases feature depth
    x = conv_block(64, 4, norm=False)(inputs)      # 128x128x64 (no batch norm in first layer)
    x = conv_block(128, 4)(x)                      # 64x64x128
    x = conv_block(256, 4)(x)                      # 32x32x256
    x = conv_block(512, 4, strides=1)(x)           # 32x32x512 (no downsampling in last conv)
    
    # ========== OUTPUT LAYER ==========
    # Output raw logits for each patch (no activation)
    # Each output value corresponds to real/fake prediction for a patch
    output = tf.keras.layers.Conv2D(1, 4, padding='same')(x)    # 32x32x1
    
    return tf.keras.Model(inputs, output)

# ============================================================================
# CYCLEGAN MODEL IMPLEMENTATION
# ============================================================================

class SimpleCycleGAN(tf.keras.Model):
    """
    Complete CycleGAN implementation with training logic.
    
    CycleGAN learns to translate between two image domains (A and B) without paired data.
    It uses two generators (A->B and B->A) and two discriminators (for A and B).
    
    Key innovation: Cycle consistency loss ensures that A->B->A = A (and vice versa).
    """
    
    def __init__(self):
        """Initialize all four networks of the CycleGAN."""
        super().__init__()
        
        # ========== GENERATORS ==========
        self.gen_AB = build_generator()  # Transforms domain A to B (photo -> Monet)
        self.gen_BA = build_generator()  # Transforms domain B to A (Monet -> photo)
        
        # ========== DISCRIMINATORS ==========
        self.disc_A = build_discriminator()  # Distinguishes real vs fake domain A images
        self.disc_B = build_discriminator()  # Distinguishes real vs fake domain B images
        
    def compile(self, gen_opt, disc_opt):
        """
        Configure optimizers for training.
        
        Args:
            gen_opt: Optimizer for generators
            disc_opt: Unused (kept for compatibility)
        """
        super().compile()
        self.gen_opt = gen_opt
        
        # Create separate optimizers for each discriminator
        # Learning rate 2e-4 and beta_1=0.5 are standard for GANs
        self.disc_A_opt = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
        self.disc_B_opt = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
        
    @tf.function  # Compile for faster execution
    def train_step(self, batch):
        """
        Execute one training step of the CycleGAN.
        
        Args:
            batch: Tuple of (real_A, real_B) image batches
            
        Returns:
            Dict of loss values for monitoring training progress
        """
        real_A, real_B = batch
        
        # Use persistent tape to compute multiple gradients
        with tf.GradientTape(persistent=True) as tape:
            
            # ========== FORWARD PASS ==========
            # Generate fake images in opposite domains
            fake_B = self.gen_AB(real_A, training=True)    # Photo -> Monet style
            fake_A = self.gen_BA(real_B, training=True)    # Monet -> Photo style
            
            # ========== CYCLE CONSISTENCY ==========
            # Reconstruct original images (should match input)
            cycled_A = self.gen_BA(fake_B, training=True)  # Photo -> Monet -> Photo
            cycled_B = self.gen_AB(fake_A, training=True)  # Monet -> Photo -> Monet
            
            # ========== DISCRIMINATOR PREDICTIONS ==========
            # Get discriminator outputs for real and fake images
            disc_real_A = self.disc_A(real_A, training=True)
            disc_real_B = self.disc_B(real_B, training=True)
            disc_fake_A = self.disc_A(fake_A, training=True)
            disc_fake_B = self.disc_B(fake_B, training=True)
            
            # ========== GENERATOR LOSSES ==========
            # Adversarial loss: generators try to fool discriminators
            # Use binary crossentropy with "real" labels for fake images
            gen_AB_loss = tf.keras.losses.binary_crossentropy(
                tf.ones_like(disc_fake_B), disc_fake_B, from_logits=True)
            gen_BA_loss = tf.keras.losses.binary_crossentropy(
                tf.ones_like(disc_fake_A), disc_fake_A, from_logits=True)
            
            # ========== CYCLE CONSISTENCY LOSS ==========
            # L1 loss between original and reconstructed images
            # This prevents mode collapse and ensures meaningful translations
            cycle_loss = (tf.reduce_mean(tf.abs(real_A - cycled_A)) + 
                         tf.reduce_mean(tf.abs(real_B - cycled_B)))
            
            # ========== TOTAL GENERATOR LOSS ==========
            # Combine adversarial and cycle consistency losses
            total_gen_loss = gen_AB_loss + gen_BA_loss + LAMBDA * cycle_loss
            
            # ========== DISCRIMINATOR LOSSES ==========
            # Each discriminator learns to distinguish real from fake
            # Loss = 0.5 * (loss_real + loss_fake)
            
            # Discriminator A loss
            disc_A_loss = (
                tf.keras.losses.binary_crossentropy(tf.ones_like(disc_real_A), disc_real_A, from_logits=True) +
                tf.keras.losses.binary_crossentropy(tf.zeros_like(disc_fake_A), disc_fake_A, from_logits=True)
            ) * 0.5
            
            # Discriminator B loss
            disc_B_loss = (
                tf.keras.losses.binary_crossentropy(tf.ones_like(disc_real_B), disc_real_B, from_logits=True) +
                tf.keras.losses.binary_crossentropy(tf.zeros_like(disc_fake_B), disc_fake_B, from_logits=True)
            ) * 0.5
        
        # ========== GRADIENT COMPUTATION AND APPLICATION ==========
        
        # Get all generator variables (both generators trained together)
        gen_vars = self.gen_AB.trainable_variables + self.gen_BA.trainable_variables
        
        # Compute gradients for all networks
        gen_grads = tape.gradient(total_gen_loss, gen_vars)
        disc_A_grads = tape.gradient(disc_A_loss, self.disc_A.trainable_variables)
        disc_B_grads = tape.gradient(disc_B_loss, self.disc_B.trainable_variables)
        
        # Apply gradients to update network weights
        self.gen_opt.apply_gradients(zip(gen_grads, gen_vars))
        self.disc_A_opt.apply_gradients(zip(disc_A_grads, self.disc_A.trainable_variables))
        self.disc_B_opt.apply_gradients(zip(disc_B_grads, self.disc_B.trainable_variables))
        
        # Return loss values for monitoring
        return {
            'gen_loss': total_gen_loss,
            'disc_A_loss': disc_A_loss,
            'disc_B_loss': disc_B_loss,
            'cycle_loss': cycle_loss
        }

# ============================================================================
# VISUALIZATION AND OUTPUT FUNCTIONS
# ============================================================================

def display_sample_predictions(model, photo_ds, num_samples=5):
    """
    Display sample predictions to visualize model performance.
    
    Args:
        model: Trained CycleGAN model
        photo_ds: Dataset of photo images
        num_samples: Number of sample predictions to show
    """
    print("Displaying sample predictions...")
    
    # Create subplot grid: num_samples rows, 2 columns
    _, ax = plt.subplots(num_samples, 2, figsize=(12, 12))
    if num_samples == 1:
        ax = ax.reshape(1, -1)  # Ensure 2D array for consistent indexing
    
    # Generate and display predictions
    for i, img in enumerate(photo_ds.take(num_samples)):
        # ========== GENERATE PREDICTION ==========
        # Use generator to create Monet-style version
        prediction = model.gen_AB(img, training=False)[0].numpy()
        
        # Convert from [-1, 1] to [0, 255] range for display
        prediction = (prediction * 127.5 + 127.5).astype(np.uint8)
        
        # ========== PREPARE ORIGINAL IMAGE ==========
        # Convert original image for display
        original = (img[0] * 127.5 + 127.5).numpy().astype(np.uint8)
        
        # ========== DISPLAY IMAGES ==========
        ax[i, 0].imshow(original)
        ax[i, 1].imshow(prediction)
        ax[i, 0].set_title("Input Photo")
        ax[i, 1].set_title("Monet-esque")
        ax[i, 0].axis("off")  # Remove axis ticks and labels
        ax[i, 1].axis("off")
    
    plt.tight_layout()
    plt.show()

def save_all_generated_images(model, photo_ds):
    """
    Generate Monet-style versions of all photos and save to disk.
    
    Args:
        model: Trained CycleGAN model
        photo_ds: Dataset containing all photo images
        
    Returns:
        int: Total number of images generated and saved
    """
    # Create output directory
    os.makedirs("../images", exist_ok=True)
    
    print("Generating and saving Monet-style images...")
    i = 1
    
    # Process each image in the dataset
    for img in photo_ds:
        # ========== GENERATE MONET-STYLE IMAGE ==========
        # Use the trained generator to transform photo to Monet style
        prediction = model.gen_AB(img, training=False)[0].numpy()
        
        # Convert from [-1, 1] normalized range to [0, 255] pixel values
        prediction = (prediction * 127.5 + 127.5).astype(np.uint8)
        
        # ========== SAVE IMAGE ==========
        # Convert numpy array to PIL Image and save as JPEG
        im = Image.fromarray(prediction)
        im.save("../images/" + str(i) + ".jpg")
        
        # Progress indicator (print every 200 images)
        if i % 200 == 0:
            print(f"Saved {i} images...")
        i += 1
    
    total_images = i - 1
    print(f"Finished! Saved {total_images} generated images to ../images/")
    return total_images

def create_zip_archive():
    """
    Create a zip archive of all generated images for easy download.
    
    Returns:
        str or None: Path to created zip file, or None if creation failed
    """
    try:
        # Create zip archive of the images directory
        shutil.make_archive("/kaggle/working/images", 'zip', "/kaggle/images")
        print("Created zip file: /kaggle/working/images.zip")
        return "/kaggle/working/images.zip"
    except Exception as e:
        print(f"Error creating zip file: {e}")
        print("You can manually zip the images folder if needed.")
        return None

# ============================================================================
# MAIN TRAINING PIPELINE
# ============================================================================

def main():
    """
    Main function that orchestrates the entire CycleGAN training and inference pipeline.
    """
    
    # ========== SETUP ==========
    print("Setting up GPU configuration...")
    setup_gpu()
    
    # ========== DATA LOADING ==========
    print("Loading datasets from Google Cloud Storage...")
    
    # Get the GCS path provided by Kaggle
    GCS_PATH = KaggleDatasets().get_gcs_path()
    print(f"GCS Path: {GCS_PATH}")
    
    # Load datasets using GCS paths
    try:
        # Create file patterns for JPG files in GCS buckets
        monet_pattern = str(GCS_PATH + '/monet_jpg/*.jpg')    # Monet paintings
        photo_pattern = str(GCS_PATH + '/photo_jpg/*.jpg')    # Regular photos
        
        # Create TensorFlow datasets from the file patterns
        monet_ds = create_dataset_from_gcs(monet_pattern)
        photo_ds = create_dataset_from_gcs(photo_pattern)
        print("Datasets loaded successfully from GCS")
    except ValueError as e:
        print(f"Error loading datasets: {e}")
        return
    
    # ========== MODEL CREATION ==========
    print("Creating CycleGAN model...")
    
    # Initialize the CycleGAN with all four networks
    model = SimpleCycleGAN()
    
    # Configure optimizers for training
    # Adam with lr=2e-4 and beta_1=0.5 are standard GAN settings
    model.compile(
        gen_opt=tf.keras.optimizers.Adam(2e-4, beta_1=0.5),
        disc_opt=None  # Not used anymore, we create separate optimizers in compile()
    )
    
    # ========== TRAINING ==========
    print("Starting training...")
    
    # Combine photo and Monet datasets for paired training
    # Each batch will contain one photo and one Monet painting
    combined_ds = tf.data.Dataset.zip((photo_ds, monet_ds))
    
    # Train the model for specified number of epochs
    model.fit(combined_ds, epochs=EPOCHS)
    
    # ========== INFERENCE AND VISUALIZATION ==========
    print("Training completed! Generating sample predictions...")
    
    # Display sample predictions to visualize results
    display_sample_predictions(model, photo_ds, num_samples=5)
    
    # ========== GENERATE ALL IMAGES ==========
    print("Generating Monet-style versions of all photos...")
    
    # Process entire photo dataset and save generated images
    total_images = save_all_generated_images(model, photo_ds)
    
    # ========== CREATE DOWNLOADABLE ARCHIVE ==========
    print("Creating downloadable archive...")
    
    # Create zip file for easy download
    zip_file = create_zip_archive()
    
    # ========== SUMMARY ==========
    print(f"\n" + "="*50)
    print("TRAINING AND GENERATION COMPLETED!")
    print("="*50)
    print(f"Generated images saved in: ../images/")
    print(f"Total images generated: {total_images}")
    if zip_file:
        print(f"Zip archive created: {zip_file}")
    print("="*50)
    
    print("\nYour CycleGAN has successfully learned to transform photos into Monet-style paintings!")

# ============================================================================
# SCRIPT EXECUTION
# ============================================================================

if __name__ == "__main__":
    # Run the complete pipeline when script is executed directly
    main()