<a href="https://colab.research.google.com/github/laresamdeola/GANS-Models/blob/main/ConditionalGANS_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow numpy matplotlib gensim



In [15]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import gensim
from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.models import KeyedVectors
from tensorflow.keras.preprocessing.image import load_img, img_to_array

Load Custom Images

In [16]:
def load_custom_images(image_dir, img_size=(64, 64)):
    images = []
    for filename in os.listdir(image_dir):
        img = load_img(os.path.join(image_dir, filename), color_mode="rgb", target_size=img_size)
        img_array = img_to_array(img) / 255.0  # Normalize pixel values to [0, 1]
        images.append(img_array)
    return np.array(images)

Text Embedding with GloVe

In [6]:
# Load the GloVe model
def load_glove_model(glove_file):
    tmp_file = "glove_word2vec.txt"
    glove2word2vec(glove_file, tmp_file)
    return KeyedVectors.load_word2vec_format(tmp_file)

# Convert a text description to an embedding
def text_to_embedding(text, glove_model, embedding_dim=100):
    words = text.lower().split()
    embedding = np.zeros((embedding_dim,))
    valid_words = 0
    for word in words:
        if word in glove_model:
            # print(word)
            embedding += glove_model[word]
            # print(embedding)
            valid_words += 1
    if valid_words > 0:
        embedding /= valid_words
    return embedding

Generator and Discriminator

In [11]:
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, LeakyReLU, Embedding, multiply
from tensorflow.keras.models import Model
from tensorflow.keras import layers

# Modified Generator to accept text embedding
def build_generator(latent_dim, embedding_dim=100):
    noise_input = Input(shape=(latent_dim,))
    text_input = Input(shape=(embedding_dim,))

    # Concatenate noise and text embedding
    combined_input = layers.Concatenate()([noise_input, text_input])

    # Fully connected layers to upscale
    x = Dense(8 * 8 * 128)(combined_input)
    x = Reshape((8, 8, 128))(x)
    x = layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # Final layer to output an image
    output_image = layers.Conv2DTranspose(3, (7, 7), activation='tanh', padding='same')(x)

    model = Model([noise_input, text_input], output_image)
    return model

# Modified Discriminator to accept text embedding
def build_discriminator(img_shape, embedding_dim=100):
    img_input = Input(shape=img_shape)
    text_input = Input(shape=(embedding_dim,))

    # Process the image
    x = layers.Conv2D(64, (4, 4), strides=(2, 2), padding='same')(img_input)
    x = layers.LeakyReLU(alpha=0.2)(x)

    x = layers.Conv2D(128, (4, 4), strides=(2, 2), padding='same')(x)
    x = layers.LeakyReLU(alpha=0.2)(x)

    x = Flatten()(x)

    # Concatenate image features with text embedding
    combined_input = layers.Concatenate()([x, text_input])

    # Fully connected layers for classification
    x = Dense(256)(combined_input)
    x = layers.LeakyReLU(alpha=0.2)(x)
    x = Dropout(0.4)(x)

    output = Dense(1, activation='sigmoid')(x)

    model = Model([img_input, text_input], output)
    return model

Modify GAN

In [12]:
def build_gan(generator, discriminator):
    discriminator.trainable = False
    noise_input = Input(shape=(latent_dim,))
    text_input = Input(shape=(embedding_dim,))

    generated_image = generator([noise_input, text_input])
    output = discriminator([generated_image, text_input])

    gan = Model([noise_input, text_input], output)
    return gan

Generate and Save sample images during training

In [17]:
def sample_images(generator, epoch, img_size=(64, 64)):
    r, c = 5, 5  # Grid size
    noise = np.random.normal(0, 1, (r * c, latent_dim))
    generated_images = generator.predict(noise)
    generated_images = 0.5 * generated_images + 0.5  # Rescale to [0, 1]

    fig, axs = plt.subplots(r, c)
    cnt = 0
    for i in range(r):
        for j in range(c):
            axs[i, j].imshow(generated_images[cnt])
            axs[i, j].axis('off')
            cnt += 1
    plt.savefig(f"generated_image_epoch_{epoch}.png")
    plt.close()

Train the GAN

In [18]:
def train_gan(generator, discriminator, gan, dataset, text_descriptions, glove_model, latent_dim, embedding_dim=100, epochs=10000, batch_size=32, sample_interval=1000):
    # Labels for real and fake images
    real_label = np.ones((batch_size, 1))
    fake_label = np.zeros((batch_size, 1))

    for epoch in range(epochs):
        # Select a random batch of images and corresponding text
        idx = np.random.randint(0, dataset.shape[0], batch_size)
        real_images = dataset[idx]
        texts = [text_descriptions[i] for i in idx]
        text_embeddings = np.array([text_to_embedding(text, glove_model) for text in texts])

        # Generate fake images from noise and text embeddings
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        generated_images = generator.predict([noise, text_embeddings])

        # Train the discriminator
        d_loss_real = discriminator.train_on_batch([real_images, text_embeddings], real_label)
        d_loss_fake = discriminator.train_on_batch([generated_images, text_embeddings], fake_label)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train the generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        g_loss = gan.train_on_batch([noise, text_embeddings], real_label)

        # Print losses
        if epoch % sample_interval == 0:
            print(f"{epoch} [D loss: {d_loss}] [G loss: {g_loss}]")
            sample_images(generator, epoch, text_embeddings)

# Main script to load dataset, train the cGAN
if __name__ == "__main__":
    img_size = (64, 64, 3)
    latent_dim = 100
    embedding_dim = 100  # Same as the size of word embeddings
    epochs = 10000
    batch_size = 32
    sample_interval = 1000

    # Load the custom dataset
    dataset = load_custom_images("images/", img_size=img_size[:2])

    # Load GloVe embeddings
    glove_model = load_glove_model("glove.6B.100d.txt")

    # Example text descriptions corresponding to the dataset
    text_descriptions = ["red car", "green apple", "blue sky", ... ]  # Should match the size of the dataset

    # Build the generator, discriminator, and GAN
    generator = build_generator(latent_dim, embedding_dim)
    discriminator = build_discriminator(img_size, embedding_dim)
    discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    gan = build_gan(generator, discriminator)
    gan.compile(loss='binary_crossentropy', optimizer='adam')

    # Train the GAN with text-based input
    train_gan(generator, discriminator, gan, dataset, text_descriptions, glove_model, latent_dim, embedding_dim, epochs=epochs, batch_size=batch_size, sample_interval=sample_interval)


  glove2word2vec(glove_file, tmp_file)


FileNotFoundError: [Errno 2] No such file or directory: 'glove.6B.100d.txt'