<a href="https://colab.research.google.com/github/sanadv/CubixelGAN/blob/main/CubixelGAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt

# Simplified Cubixel transformation: Convert pixels to Cubixels based on RGB values
def pixel_to_cubixel(image):
    cubixels = tf.cast(image, tf.float32)  # Ensure the image is in float32

    # Separate the R, G, B channels as width, length, and height of Cubixels
    cubixel_w = cubixels[:, :, 0]  # Red -> Width
    cubixel_l = cubixels[:, :, 1]  # Green -> Length
    cubixel_h = cubixels[:, :, 2]  # Blue -> Height

    # Combine them into a 3D volume representing the Cubixel dimensions
    cubixel_volume = tf.stack([cubixel_w, cubixel_l, cubixel_h], axis=-1)  # Shape (H, W, 3)

    # Simply expand the dimensions to include a depth dimension (D = 3 for RGB)
    cubixel_volume = tf.expand_dims(cubixel_volume, axis=2)  # Shape (H, W, 1, 3)
    return cubixel_volume  # Shape (H, W, D=1, 3)

# Define the Generator model (producing simplified Cubixel volumes)
def build_generator(latent_dim, img_size):
    model = tf.keras.Sequential()

    model.add(layers.Dense(256 * (img_size // 4)**2, activation="relu", input_dim=latent_dim))
    model.add(layers.Reshape((img_size // 4, img_size // 4, 256)))

    model.add(layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

    model.add(layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

    model.add(layers.Conv2D(3, (3, 3), padding='same', activation='tanh'))  # Produce RGB channels

    return model

# Define the Discriminator model for simplified Cubixel volumes
def build_discriminator(img_size):
    model = tf.keras.Sequential()

    model.add(layers.InputLayer(input_shape=(img_size, img_size, 3)))  # Simplified input
    model.add(layers.Conv2D(64, (4, 4), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(128, (4, 4), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.3))

    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation='sigmoid'))

    return model

# Volume of Void (VoV) Regularization
def compute_vov(cubixels):
    # Simplified VoV: Compute the difference between neighboring pixel values in the RGB channels
    w = cubixels[:, 1:, :, 0]  # Differences in width (R channel)
    prev_w = cubixels[:, :-1, :, 0]

    l = cubixels[:, :, 1:, 1]  # Differences in length (G channel)
    prev_l = cubixels[:, :, :-1, 1]

    h = cubixels[:, :, :, 2]  # Height is constant in this case (B channel)

    # Compute absolute differences as a proxy for the "void" between adjacent pixels
    vov = tf.reduce_mean(tf.abs(w - prev_w)) + tf.reduce_mean(tf.abs(l - prev_l))

    return vov


# Define the GAN model with the generator, discriminator, and VoV regularization
class CubixelGAN(tf.keras.Model):
    def __init__(self, generator, discriminator, latent_dim, lambda_vov=10):
        super(CubixelGAN, self).__init__()
        self.generator = generator
        self.discriminator = discriminator
        self.latent_dim = latent_dim
        self.lambda_vov = lambda_vov
        self.cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=False)

    def compile(self, g_optimizer, d_optimizer):
        super(CubixelGAN, self).compile()
        self.g_optimizer = g_optimizer
        self.d_optimizer = d_optimizer

    def g_loss_fn(self, fake_output):
        return self.cross_entropy(tf.ones_like(fake_output), fake_output)

    def d_loss_fn(self, real_output, fake_output):
        real_loss = self.cross_entropy(tf.ones_like(real_output) * 0.9, real_output)  # Label smoothing
        fake_loss = self.cross_entropy(tf.zeros_like(fake_output), fake_output)
        return real_loss + fake_loss

    def train_step(self, real_images):
        batch_size = tf.shape(real_images)[0]
        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))

        # Train the generator
        with tf.GradientTape() as g_tape:
            generated_images = self.generator(random_latent_vectors)
            fake_output = self.discriminator(generated_images)
            g_loss = self.g_loss_fn(fake_output)
            vov_loss = compute_vov(generated_images)
            total_g_loss = g_loss + self.lambda_vov * vov_loss

        g_gradients = g_tape.gradient(total_g_loss, self.generator.trainable_variables)
        self.g_optimizer.apply_gradients(zip(g_gradients, self.generator.trainable_variables))

        # Train the discriminator
        with tf.GradientTape() as d_tape:
            real_output = self.discriminator(real_images)
            fake_output = self.discriminator(generated_images)
            d_loss = self.d_loss_fn(real_output, fake_output)

        d_gradients = d_tape.gradient(d_loss, self.discriminator.trainable_variables)
        self.d_optimizer.apply_gradients(zip(d_gradients, self.discriminator.trainable_variables))

        return {"g_loss": g_loss, "d_loss": d_loss, "vov_loss": vov_loss}

# Hyperparameters
latent_dim = 100
img_size = 32
epochs = 100  # Reduced epoch count for faster convergence
batch_size = 32
lambda_vov = 5

# Initialize models
generator = build_generator(latent_dim, img_size)
discriminator = build_discriminator(img_size)
cubixel_gan = CubixelGAN(generator, discriminator, latent_dim, lambda_vov)

# Compile the GAN with reduced learning rates and TTUR
cubixel_gan.compile(
    g_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),  # Slower learning rate for generator
    d_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0004)   # Faster learning rate for discriminator
)

# Load CIFAR-10 data and convert it to Cubixel volumes
def convert_to_cubixels(images):
    return pixel_to_cubixel(images)

# Load CIFAR-10 dataset and convert images to Cubixels
(ds_train, _), ds_info = tfds.load('cifar10', split=['train', 'test'], with_info=True)
ds_train = ds_train.map(lambda x: (tf.cast(x['image'], tf.float32) / 127.5) - 1.0)  # Normalize images
ds_train = ds_train.batch(batch_size)

# Train the GAN
cubixel_gan.fit(ds_train, epochs=epochs)

# Generate and visualize Cubixel volumes
def generate_and_plot_cubixels(generator, latent_dim):
    random_latent_vectors = tf.random.normal(shape=(16, latent_dim))
    generated_images = generator(random_latent_vectors)
    generated_images = (generated_images + 1) / 2.0  # Rescale to [0, 1]

    fig, axs = plt.subplots(4, 4, figsize=(8, 8))
    count = 0
    for i in range(4):
        for j in range(4):
            axs[i, j].imshow(generated_images[count][:, :, :])  # Visualizing the Cubixel image
            axs[i, j].axis('off')
            count += 1
    plt.show()

generate_and_plot_cubixels(generator, latent_dim)


In [None]:
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np
import os
import zipfile

# Function to generate and save images in a zip file
def generate_save_and_zip(generator, latent_dim, zip_filename='generated_images.zip'):
    save_dir = 'generated_images'

    # Ensure the save directory exists
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    random_latent_vectors = tf.random.normal(shape=(16, latent_dim))
    generated_images = generator(random_latent_vectors)
    generated_images = (generated_images + 1) / 2.0  # Rescale to [0, 1]

    # Save each image as a PNG file
    for i, img in enumerate(generated_images):
        img_np = img.numpy()  # Convert tensor to numpy array
        img_np = np.clip(img_np * 255, 0, 255).astype(np.uint8)  # Convert to uint8 format

        # Save the image
        plt.imsave(f"{save_dir}/generated_image_{i+1}.png", img_np)

    # Create a zip file containing all generated images
    with zipfile.ZipFile(zip_filename, 'w') as zipf:
        for root, dirs, files in os.walk(save_dir):
            for file in files:
                zipf.write(os.path.join(root, file))

    return zip_filename

# Generate, save, and zip the images
zip_filename = generate_save_and_zip(generator, latent_dim)

# Download the zip file
from google.colab import files
files.download(zip_filename)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from scipy.linalg import sqrtm

# Create directories to save images for FID calculation
real_image_dir = 'real_images'
generated_image_dir = 'generated_images'

if not os.path.exists(real_image_dir):
    os.makedirs(real_image_dir)
if not os.path.exists(generated_image_dir):
    os.makedirs(generated_image_dir)

# Function to save images to directories
def save_images_to_dir(images, directory, prefix):
    for i, img in enumerate(images[:100]):  # Limiting to 100 images for FID
        img = (img + 1) * 127.5  # Rescale to [0, 255]
        img = img.numpy().astype(np.uint8)  # Convert to uint8
        img_path = os.path.join(directory, f'{prefix}_{i}.png')
        cv2.imwrite(img_path, img)

# Cubixel transformation: Convert pixels to Cubixels based on RGB values
def pixel_to_cubixel(image):
    cubixels = tf.cast(image, tf.float32)

    cubixel_w = cubixels[:, :, 0]  # Red -> Width
    cubixel_l = cubixels[:, :, 1]  # Green -> Length
    cubixel_h = cubixels[:, :, 2]  # Blue -> Height

    cubixel_volume = tf.stack([cubixel_w, cubixel_l, cubixel_h], axis=-1)
    cubixel_volume = tf.expand_dims(cubixel_volume, axis=2)  # Add depth axis (H, W, 1, 3)
    return cubixel_volume

# Define the Generator model
def build_generator(latent_dim, img_size):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256 * (img_size // 4)**2, activation="relu", input_dim=latent_dim))
    model.add(layers.Reshape((img_size // 4, img_size // 4, 256)))
    model.add(layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    model.add(layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())
    model.add(layers.Conv2D(3, (3, 3), padding='same', activation='tanh'))  # Output RGB channels
    return model

# Define the Discriminator model
def build_discriminator(img_size):
    model = tf.keras.Sequential()
    model.add(layers.InputLayer(input_shape=(img_size, img_size, 3)))
    model.add(layers.Conv2D(64, (4, 4), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.3))
    model.add(layers.Conv2D(128, (4, 4), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.3))
    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

# Volume of Void (VoV) Regularization: Penalize large differences in cubixels
def compute_vov(cubixels):
    w = cubixels[:, 1:, :, 0]
    prev_w = cubixels[:, :-1, :, 0]
    l = cubixels[:, :, 1:, 1]
    prev_l = cubixels[:, :, :-1, 1]
    vov = tf.reduce_mean(tf.abs(w - prev_w)) + tf.reduce_mean(tf.abs(l - prev_l))
    return vov

# Define the GAN model with the generator, discriminator, and VoV regularization
class CubixelGAN(tf.keras.Model):
    def __init__(self, generator, discriminator, latent_dim, lambda_vov=10):
        super(CubixelGAN, self).__init__()
        self.generator = generator
        self.discriminator = discriminator
        self.latent_dim = latent_dim
        self.lambda_vov = lambda_vov
        self.cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=False)

    def compile(self, g_optimizer, d_optimizer):
        super(CubixelGAN, self).compile()
        self.g_optimizer = g_optimizer
        self.d_optimizer = d_optimizer

    def g_loss_fn(self, fake_output):
        return self.cross_entropy(tf.ones_like(fake_output), fake_output)

    def d_loss_fn(self, real_output, fake_output):
        real_loss = self.cross_entropy(tf.ones_like(real_output) * 0.9, real_output)
        fake_loss = self.cross_entropy(tf.zeros_like(fake_output), fake_output)
        return real_loss + fake_loss

    def train_step(self, real_images):
        batch_size = tf.shape(real_images)[0]
        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))

        # Train the generator
        with tf.GradientTape() as g_tape:
            generated_images = self.generator(random_latent_vectors)
            fake_output = self.discriminator(generated_images)
            g_loss = self.g_loss_fn(fake_output)
            vov_loss = compute_vov(generated_images)
            total_g_loss = g_loss + self.lambda_vov * vov_loss

        g_gradients = g_tape.gradient(total_g_loss, self.generator.trainable_variables)
        self.g_optimizer.apply_gradients(zip(g_gradients, self.generator.trainable_variables))

        # Train the discriminator
        with tf.GradientTape() as d_tape:
            real_images_reshaped = tf.reshape(real_images, [-1, img_size, img_size, 3])  # Reshape to match the expected shape
            real_output = self.discriminator(real_images_reshaped)
            fake_output = self.discriminator(generated_images)
            d_loss = self.d_loss_fn(real_output, fake_output)

        d_gradients = d_tape.gradient(d_loss, self.discriminator.trainable_variables)
        self.d_optimizer.apply_gradients(zip(d_gradients, self.discriminator.trainable_variables))

        return {"g_loss": g_loss, "d_loss": d_loss, "vov_loss": vov_loss}

# FID calculation functions
def calculate_fid(real_images, generated_images):
    # Ensure both real and generated images have correct 4D shape
    real_images = tf.reshape(real_images, (-1, img_size, img_size, 3))
    generated_images = tf.reshape(generated_images, (-1, img_size, img_size, 3))

    # Load InceptionV3 model
    model = InceptionV3(include_top=False, pooling='avg', input_shape=(299, 299, 3))

    # Resize images to (299, 299) and preprocess
    real_images_resized = tf.image.resize(real_images, (299, 299))
    generated_images_resized = tf.image.resize(generated_images, (299, 299))

    # Preprocess images for InceptionV3
    real_images_resized = preprocess_input(real_images_resized)
    generated_images_resized = preprocess_input(generated_images_resized)

    # Get the activations
    real_activations = model.predict(real_images_resized)
    generated_activations = model.predict(generated_images_resized)

    # Calculate mean and covariance of activations
    mu_real = np.mean(real_activations, axis=0)
    sigma_real = np.cov(real_activations, rowvar=False)
    mu_generated = np.mean(generated_activations, axis=0)
    sigma_generated = np.cov(generated_activations, rowvar=False)

    # Calculate FID score
    ssdiff = np.sum((mu_real - mu_generated)**2.0)
    covmean, _ = sqrtm(sigma_real.dot(sigma_generated), disp=False)

    # Handle complex numbers from sqrtm
    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid = ssdiff + np.trace(sigma_real + sigma_generated - 2.0 * covmean)
    return fid

# Hyperparameters
latent_dim = 100
img_size = 32
epochs = 1
batch_size = 32
lambda_vov = 5

# Initialize models
generator = build_generator(latent_dim, img_size)
discriminator = build_discriminator(img_size)
cubixel_gan = CubixelGAN(generator, discriminator, latent_dim, lambda_vov)

# Compile the GAN
cubixel_gan.compile(
    g_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    d_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0004)
)

# Load CIFAR-10 dataset and convert images to Cubixels
(ds_train, ds_test), ds_info = tfds.load('cifar10', split=['train', 'test'], with_info=True)
ds_train = ds_train.map(lambda x: pixel_to_cubixel((tf.cast(x['image'], tf.float32) / 127.5) - 1.0)).batch(batch_size)

# Train the GAN
cubixel_gan.fit(ds_train, epochs=epochs)

# Generate images for FID calculation
real_images = next(iter(ds_train))[:100]  # Limiting to 100 real images
random_latent_vectors = tf.random.normal(shape=(100, latent_dim))
generated_images = generator(random_latent_vectors)

# Calculate FID
fid_score = calculate_fid(real_images, generated_images)
print(f"FID Score: {fid_score}")
