In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
from PIL import Image
import io

# Load the dataset from Parquet file
df = pd.read_parquet('train-00000-of-00001-b64601da56687a05.parquet')

# Shorten the dataset to 800 images and corresponding text entries
shortened_df = df.head(800)

# Preprocess the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(shortened_df['text'])
vocab_size = len(tokenizer.word_index) + 1
max_length = 100
# Assuming noise_dim is defined elsewhere in your code
noise_dim = 100
epochs = 500
batch_size = 32

# Convert text to sequences
text_sequences = tokenizer.texts_to_sequences(shortened_df['text'])
padded_text_sequences = pad_sequences(text_sequences, maxlen=max_length, padding='post')

# Preprocess the images
def preprocess_image(image_bytes):
    image = Image.open(io.BytesIO(image_bytes['bytes']))  # Open image from bytes
    image = image.convert('RGB')  # Convert to RGB format if necessary
    image = image.resize((64, 64))  # Resize image
    image = np.array(image)  # Convert PIL image to numpy array
    image = image.astype(np.float32) / 255.0  # Convert pixel values to [0, 1]
    return image

# Convert image bytes to numpy arrays
images = np.array([preprocess_image(image) for image in shortened_df['image']])

# Define the Generator network
from tensorflow.keras import layers, models

def build_generator(max_length, vocab_size, noise_dim):
    embedding_dim = 128

    input_text = layers.Input(shape=(max_length,))
    input_noise = layers.Input(shape=(noise_dim,))

    # Text embedding
    text_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(input_text)
    lstm = layers.LSTM(256)(text_embedding)  # LSTM layer to capture sequential information
    noise_dense = layers.Dense(256)(input_noise)

    # Concatenate text and noise inputs
    concatenated = layers.Concatenate()([lstm, noise_dense])

    # Generator layers
    x = layers.Dense(256 * 8 * 8, activation='relu')(concatenated)
    x = layers.Reshape((8, 8, 256))(x)
    x = layers.Conv2DTranspose(128, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    x = layers.Conv2DTranspose(64, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    generator_output = layers.Conv2DTranspose(3, kernel_size=5, strides=2, padding='same', activation='sigmoid')(x)

    generator_model = models.Model(inputs=[input_text, input_noise], outputs=generator_output)

    return generator_model

def build_discriminator(max_length, vocab_size):
    embedding_dim = 128

    input_image = layers.Input(shape=(64, 64, 3))
    input_text = layers.Input(shape=(max_length,))

    # Image embedding layers
    x = layers.Conv2D(64, kernel_size=5, strides=2, padding='same', activation='relu')(input_image)
    x = layers.Conv2D(128, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    image_embedding = layers.Flatten()(x)

    # Text embedding
    text_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(input_text)
    lstm = layers.LSTM(256)(text_embedding)  # LSTM layer to capture sequential information

    # Concatenate image and text embeddings
    concatenated = layers.Concatenate()([image_embedding, lstm])

    # Discriminator layers
    x = layers.Dense(256, activation='relu')(concatenated)
    discriminator_output = layers.Dense(1, activation='sigmoid')(x)

    discriminator_model = models.Model(inputs=[input_image, input_text], outputs=discriminator_output)

    return discriminator_model

# Define the Discriminator network


# Define the conditional gan model
def build_cgan(generator, discriminator):
    input_text = layers.Input(shape=(max_length,))
    input_noise = layers.Input(shape=(noise_dim,))

    fake_image = generator([input_text, input_noise])
    validity = discriminator([fake_image, input_text])

    cgan = models.Model(inputs=[input_text, input_noise], outputs=validity)

    return cgan

# Instantiate the models
generator = build_generator(max_length, vocab_size, noise_dim)
discriminator = build_discriminator(max_length, vocab_size)
cgan = build_cgan(generator, discriminator)

# Compile the models
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
cgan.compile(optimizer='adam', loss='binary_crossentropy')

# Train the GAN models
def train_gan(generator, discriminator, cgan, images, text_embeddings, noise_dim, epochs=5, batch_size=16):
    for epoch in range(epochs):
        for batch_start in range(0, len(images), batch_size):
            # Generate noise
            noise = np.random.normal(0, 1, (batch_size, noise_dim))

            # Select a random batch of images and text embeddings
            real_images_batch = images[batch_start:batch_start+batch_size]
            real_text_embeddings_batch = text_embeddings[batch_start:batch_start+batch_size]

            # Generate fake images from noise and text embeddings
            generated_images = generator.predict([real_text_embeddings_batch, noise])

            # Train the Discriminator
            discriminator_loss_real = discriminator.train_on_batch([real_images_batch, real_text_embeddings_batch], np.ones((batch_size, 1)))
            discriminator_loss_fake = discriminator.train_on_batch([generated_images, real_text_embeddings_batch], np.zeros((batch_size, 1)))
            discriminator_loss = 0.5 * np.add(discriminator_loss_real, discriminator_loss_fake)

            # Train the Generator (via cGAN)
            noise = np.random.normal(0, 1, (batch_size, noise_dim))  # Generate new noise for training Generator
            cgan_loss = cgan.train_on_batch([real_text_embeddings_batch, noise], np.ones((batch_size, 1)))

            # Print training progress
            print(f"Epoch {epoch+1}/{epochs}, Batch {batch_start//batch_size+1}/{len(images)//batch_size}, Discriminator Loss: {discriminator_loss[0]}, Generator Loss: {cgan_loss}")

# Train the GAN models
train_gan(generator, discriminator, cgan, images, padded_text_sequences, noise_dim, epochs, batch_size)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
Epoch 1/500, Batch 1/25, Discriminator Loss: 3.3123703002929688, Generator Loss: 0.09682589769363403
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
Epoch 1/500, Batch 2/25, Discriminator Loss: 4.610913276672363, Generator Loss: 0.1362152099609375
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
Epoch 1/500, Batch 3/25, Discriminator Loss: 4.238081932067871, Generator Loss: 0.19954614341259003
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
Epoch 1/500, Batch 4/25, Discriminator Loss: 3.665175199508667, Generator Loss: 0.2679663300514221
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
Epoch 1/500, Batch 5/25, Discriminator Loss: 3.186093807220459, Generator Loss: 0.31917816400527954
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
Epoch 1/500, Batch 6/25, Discriminator Loss: 2.813051223754883, Gen

In [None]:
def generate_image(generator_model, text_description, noise_dim, max_length, tokenizer):
    # Tokenize and pad the text description
    text_sequence = tokenizer.texts_to_sequences([text_description])
    padded_text_sequence = pad_sequences(text_sequence, maxlen=max_length, padding='post')

    # Generate noise
    noise = np.random.normal(0, 1, (1, noise_dim))

    # Generate image
    generated_image = generator_model.predict([padded_text_sequence, noise])

    return generated_image

# Example usage:
generated_image = generate_image(generator, "a logo of coffee shop, take-away coffee cardboard glass with white and brown stripes and dark brown lid, coffee circle with three cream drops, white background, brown foreground, minimalism, modern", noise_dim, max_length, tokenizer)


In [None]:
import matplotlib.pyplot as plt

def display_generated_image(image):
    plt.imshow(image)
    plt.axis('off')  # Hide axis
    plt.show()

# Example usage:
display_generated_image(generated_image[0])

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
from PIL import Image
import io

# Load the dataset from Parquet file
df = pd.read_parquet('train-00000-of-00001-b64601da56687a05.parquet')

# Shorten the dataset to 800 images and corresponding text entries
shortened_df = df.head(800)
noise_dim=100
# Preprocess the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(shortened_df['text'])
vocab_size = len(tokenizer.word_index) + 1
max_length = 100
epochs = 1000
batch_size = 32
# Convert text to sequences
text_sequences = tokenizer.texts_to_sequences(shortened_df['text'])
padded_text_sequences = pad_sequences(text_sequences, maxlen=max_length, padding='post')

# Preprocess the images
def preprocess_image(image_bytes):
    image = Image.open(io.BytesIO(image_bytes['bytes']))  # Open image from bytes
    image = image.convert('RGB')  # Convert to RGB format if necessary
    image = image.resize((64, 64))  # Resize image
    image = np.array(image)  # Convert PIL image to numpy array
    image = image.astype(np.float32) / 255.0  # Convert pixel values to [0, 1]
    return image

# Convert image bytes to numpy arrays
images = np.array([preprocess_image(image) for image in shortened_df['image']])

# Define the Generator network
def build_generator(max_length, vocab_size, noise_dim):
    embedding_dim = 128

    input_text = layers.Input(shape=(max_length,))
    input_noise = layers.Input(shape=(noise_dim,))

    # Text embedding
    text_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(input_text)
    text_embedding = layers.Flatten()(text_embedding)

    # Noise dense layer
    noise_dense = layers.Dense(256)(input_noise)

    # Concatenate text and noise inputs
    concatenated = layers.Concatenate()([text_embedding, noise_dense])

    # Generator layers
    x = layers.Dense(256 * 8 * 8, activation='relu')(concatenated)
    x = layers.Reshape((8, 8, 256))(x)
    x = layers.Conv2DTranspose(128, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    x = layers.Conv2DTranspose(64, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    generator_output = layers.Conv2DTranspose(3, kernel_size=5, strides=2, padding='same', activation='sigmoid')(x)

    generator_model = models.Model(inputs=[input_text, input_noise], outputs=generator_output)

    return generator_model

# Define the Discriminator network
def build_discriminator(max_length, vocab_size):
    embedding_dim = 128

    input_image = layers.Input(shape=(64, 64, 3))
    input_text = layers.Input(shape=(max_length,))

    # Image embedding layers
    x = layers.Conv2D(64, kernel_size=5, strides=2, padding='same', activation='relu')(input_image)
    x = layers.Conv2D(128, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    image_embedding = layers.Flatten()(x)

    # Text embedding
    text_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(input_text)
    text_embedding = layers.Flatten()(text_embedding)

    # Concatenate image and text embeddings
    concatenated = layers.Concatenate()([image_embedding, text_embedding])

    # Discriminator layers
    x = layers.Dense(256, activation='relu')(concatenated)
    discriminator_output = layers.Dense(1, activation='sigmoid')(x)

    discriminator_model = models.Model(inputs=[input_image, input_text], outputs=discriminator_output)

    return discriminator_model

# Define the conditional gan model
def build_cgan(generator, discriminator):
    input_text = layers.Input(shape=(max_length,))
    input_noise = layers.Input(shape=(noise_dim,))

    fake_image = generator([input_text, input_noise])
    validity = discriminator([fake_image, input_text])

    cgan = models.Model(inputs=[input_text, input_noise], outputs=validity)

    return cgan

# Instantiate the models
generator = build_generator(max_length, vocab_size, noise_dim)
discriminator = build_discriminator(max_length, vocab_size)
cgan = build_cgan(generator, discriminator)

# Compile the models
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
cgan.compile(optimizer='adam', loss='binary_crossentropy')

# Train the GAN models
def train_gan(generator, discriminator, cgan, images, text_embeddings, noise_dim, epochs=10, batch_size=32):
    for epoch in range(epochs):
        for batch_start in range(0, len(images), batch_size):
            # Generate noise
            noise = np.random.normal(0, 1, (batch_size, noise_dim))

            # Select a random batch of images and text embeddings
            real_images_batch = images[batch_start:batch_start+batch_size]
            real_text_embeddings_batch = text_embeddings[batch_start:batch_start+batch_size]

            # Generate fake images from noise and text embeddings
            generated_images = generator.predict([real_text_embeddings_batch, noise])

            # Train the Discriminator
            discriminator_loss_real = discriminator.train_on_batch([real_images_batch, real_text_embeddings_batch], np.ones((batch_size, 1)))
            discriminator_loss_fake = discriminator.train_on_batch([generated_images, real_text_embeddings_batch], np.zeros((batch_size, 1)))
            discriminator_loss = 0.5 * np.add(discriminator_loss_real, discriminator_loss_fake)

            # Train the Generator (via cGAN)
            noise = np.random.normal(0, 1, (batch_size, noise_dim))  # Generate new noise for training Generator
            cgan_loss = cgan.train_on_batch([real_text_embeddings_batch, noise], np.ones((batch_size, 1)))

            # Print training progress
            print(f"Epoch {epoch+1}/{epochs}, Batch {batch_start//batch_size+1}/{len(images)//batch_size}, Discriminator Loss: {discriminator_loss[0]}, Generator Loss: {cgan_loss}")

# Train the GAN models
train_gan(generator, discriminator, cgan, images, padded_text_sequences, noise_dim, epochs, batch_size)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step
Epoch 1/1000, Batch 1/25, Discriminator Loss: 3.9449617862701416, Generator Loss: 0.1616356074810028
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
Epoch 1/1000, Batch 2/25, Discriminator Loss: 5.254464149475098, Generator Loss: 0.2643628716468811
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
Epoch 1/1000, Batch 3/25, Discriminator Loss: 4.606675148010254, Generator Loss: 0.3499049246311188
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
Epoch 1/1000, Batch 4/25, Discriminator Loss: 3.9432268142700195, Generator Loss: 0.3774418830871582
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
Epoch 1/1000, Batch 5/25, Discriminator Loss: 3.46954345703125, Generator Loss: 0.3782372772693634
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
Epoch 1/1000, Batch 6/25, Discriminator Loss: 3.1537885665893555,

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Function to generate images based on text descriptions
def generate_images_from_text(generator, text_sequences, noise_dim, num_images=10):
    # Generate random noise vectors
    noise = np.random.normal(0, 1, (num_images, noise_dim))

    # Generate images from the Generator using text sequences and noise
    generated_images = generator.predict([text_sequences[:num_images], noise])

    return generated_images

# Assuming noise_dim is defined elsewhere in your code
noise_dim = 100

# Get text input from the user
text_input = "a logo of coffee shop, take-away coffee cardboard glass with white and brown stripes and dark brown lid, coffee circle with three cream drops, white background, brown foreground, minimalism, modern"

# Tokenize and pad the user's text input
user_text_sequence = tokenizer.texts_to_sequences([text_input])
user_padded_text_sequence = pad_sequences(user_text_sequence, maxlen=max_length, padding='post')

# Number of images to generate
num_images_to_generate = 1  # Generate one image based on the user's text input

# Generate images based on the user's text description
generated_images = generate_images_from_text(generator, user_padded_text_sequence, noise_dim, num_images=num_images_to_generate)

# Visualize the generated image
plt.imshow(generated_images[0])
plt.axis('off')
plt.show()


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
from PIL import Image
import io

# Load the dataset from Parquet file
df = pd.read_parquet('train-00000-of-00001-b64601da56687a05.parquet')

# Shorten the dataset to 800 images and corresponding text entries
shortened_df = df.head(768)
noise_dim=100
# Preprocess the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(shortened_df['text'])
vocab_size = len(tokenizer.word_index) + 1
max_length = 100
epochs = 200
batch_size = 128
# Convert text to sequences
text_sequences = tokenizer.texts_to_sequences(shortened_df['text'])
padded_text_sequences = pad_sequences(text_sequences, maxlen=max_length, padding='post')

# Preprocess the images
def preprocess_image(image_bytes):
    image = Image.open(io.BytesIO(image_bytes['bytes']))  # Open image from bytes
    image = image.convert('RGB')  # Convert to RGB format if necessary
    image = image.resize((64, 64))  # Resize image
    image = np.array(image)  # Convert PIL image to numpy array
    image = image.astype(np.float32) / 255.0  # Convert pixel values to [0, 1]
    return image

# Convert image bytes to numpy arrays
images = np.array([preprocess_image(image) for image in shortened_df['image']])

# Define the Generator network
def build_generator(max_length, vocab_size, noise_dim):
    embedding_dim = 128

    input_text = layers.Input(shape=(max_length,))
    input_noise = layers.Input(shape=(noise_dim,))

    # Text embedding
    text_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(input_text)
    text_embedding = layers.Flatten()(text_embedding)

    # Noise dense layer
    noise_dense = layers.Dense(256)(input_noise)

    # Concatenate text and noise inputs
    concatenated = layers.Concatenate()([text_embedding, noise_dense])

    # Generator layers
    x = layers.Dense(256 * 8 * 8, activation='relu')(concatenated)
    x = layers.Reshape((8, 8, 256))(x)
    x = layers.Conv2DTranspose(128, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    x = layers.Conv2DTranspose(64, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    generator_output = layers.Conv2DTranspose(3, kernel_size=5, strides=2, padding='same', activation='sigmoid')(x)

    generator_model = models.Model(inputs=[input_text, input_noise], outputs=generator_output)

    return generator_model

# Define the Discriminator network
def build_discriminator(max_length, vocab_size):
    embedding_dim = 128

    input_image = layers.Input(shape=(64, 64, 3))
    input_text = layers.Input(shape=(max_length,))

    # Image embedding layers
    x = layers.Conv2D(64, kernel_size=5, strides=2, padding='same', activation='relu')(input_image)
    x = layers.Conv2D(128, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    image_embedding = layers.Flatten()(x)

    # Text embedding
    text_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(input_text)
    text_embedding = layers.Flatten()(text_embedding)

    # Concatenate image and text embeddings
    concatenated = layers.Concatenate()([image_embedding, text_embedding])

    # Discriminator layers
    x = layers.Dense(256, activation='relu')(concatenated)
    discriminator_output = layers.Dense(1, activation='sigmoid')(x)

    discriminator_model = models.Model(inputs=[input_image, input_text], outputs=discriminator_output)

    return discriminator_model

# Define the conditional gan model
def build_cgan(generator, discriminator):
    input_text = layers.Input(shape=(max_length,))
    input_noise = layers.Input(shape=(noise_dim,))

    fake_image = generator([input_text, input_noise])
    validity = discriminator([fake_image, input_text])

    cgan = models.Model(inputs=[input_text, input_noise], outputs=validity)

    return cgan

# Instantiate the models
generator = build_generator(max_length, vocab_size, noise_dim)
discriminator = build_discriminator(max_length, vocab_size)
cgan = build_cgan(generator, discriminator)

# Compile the models
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
cgan.compile(optimizer='adam', loss='binary_crossentropy')

# Train the GAN models
def train_gan(generator, discriminator, cgan, images, text_embeddings, noise_dim, epochs=10, batch_size=32):
    for epoch in range(epochs):
        for batch_start in range(0, len(images), batch_size):
            # Generate noise
            noise = np.random.normal(0, 1, (batch_size, noise_dim))

            # Select a random batch of images and text embeddings
            real_images_batch = images[batch_start:batch_start+batch_size]
            real_text_embeddings_batch = text_embeddings[batch_start:batch_start+batch_size]

            # Generate fake images from noise and text embeddings
            generated_images = generator.predict([real_text_embeddings_batch, noise])

            # Train the Discriminator
            discriminator_loss_real = discriminator.train_on_batch([real_images_batch, real_text_embeddings_batch], np.ones((batch_size, 1)))
            discriminator_loss_fake = discriminator.train_on_batch([generated_images, real_text_embeddings_batch], np.zeros((batch_size, 1)))
            discriminator_loss = 0.5 * np.add(discriminator_loss_real, discriminator_loss_fake)

            # Train the Generator (via cGAN)
            noise = np.random.normal(0, 1, (batch_size, noise_dim))  # Generate new noise for training Generator
            cgan_loss = cgan.train_on_batch([real_text_embeddings_batch, noise], np.ones((batch_size, 1)))

            # Print training progress
            print(f"Epoch {epoch+1}/{epochs}, Batch {batch_start//batch_size+1}/{len(images)//batch_size}, Discriminator Loss: {discriminator_loss[0]}, Generator Loss: {cgan_loss}")

# Train the GAN models
train_gan(generator, discriminator, cgan, images, padded_text_sequences, noise_dim, epochs, batch_size)



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 139ms/step
Epoch 1/200, Batch 1/6, Discriminator Loss: 4.988353252410889, Generator Loss: 0.32006657123565674
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
Epoch 1/200, Batch 2/6, Discriminator Loss: 6.87853479385376, Generator Loss: 0.3138890266418457
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
Epoch 1/200, Batch 3/6, Discriminator Loss: 6.1000871658325195, Generator Loss: 0.4541427791118622
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
Epoch 1/200, Batch 4/6, Discriminator Loss: 5.101682662963867, Generator Loss: 0.5641254782676697
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
Epoch 1/200, Batch 5/6, Discriminator Loss: 4.2016143798828125, Generator Loss: 0.648676872253418
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
Epoch 1/200, Batch 6/6, Discriminator Loss: 3.579209804534912, Generator Lo

In [7]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
from PIL import Image
import io

# Load the dataset from Parquet file
df = pd.read_parquet('train-00000-of-00001-b64601da56687a05.parquet')

# Shorten the dataset to 800 images and corresponding text entries
shortened_df = df.head(768)
noise_dim=100
# Preprocess the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(shortened_df['text'])
vocab_size = len(tokenizer.word_index) + 1
max_length = 100
epochs = 2
batch_size = 256
# Convert text to sequences
text_sequences = tokenizer.texts_to_sequences(shortened_df['text'])
padded_text_sequences = pad_sequences(text_sequences, maxlen=max_length, padding='post')

# Preprocess the images
def preprocess_image(image_bytes):
    image = Image.open(io.BytesIO(image_bytes['bytes']))  # Open image from bytes
    image = image.convert('RGB')  # Convert to RGB format if necessary
    image = image.resize((64, 64))  # Resize image
    image = np.array(image)  # Convert PIL image to numpy array
    image = image.astype(np.float32) / 255.0  # Convert pixel values to [0, 1]
    return image

# Convert image bytes to numpy arrays
images = np.array([preprocess_image(image) for image in shortened_df['image']])

# Define the Generator network
def build_generator(max_length, vocab_size, noise_dim):
    embedding_dim = 128

    input_text = layers.Input(shape=(max_length,))
    input_noise = layers.Input(shape=(noise_dim,))

    # Text embedding
    text_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(input_text)
    text_embedding = layers.Flatten()(text_embedding)

    # Noise dense layer
    noise_dense = layers.Dense(256)(input_noise)

    # Concatenate text and noise inputs
    concatenated = layers.Concatenate()([text_embedding, noise_dense])

    # Generator layers
    x = layers.Dense(256 * 8 * 8, activation='relu')(concatenated)
    x = layers.Reshape((8, 8, 256))(x)
    x = layers.Conv2DTranspose(128, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    x = layers.Conv2DTranspose(64, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    generator_output = layers.Conv2DTranspose(3, kernel_size=5, strides=2, padding='same', activation='sigmoid')(x)

    generator_model = models.Model(inputs=[input_text, input_noise], outputs=generator_output)

    return generator_model

# Define the Discriminator network
def build_discriminator(max_length, vocab_size):
    embedding_dim = 128

    input_image = layers.Input(shape=(64, 64, 3))
    input_text = layers.Input(shape=(max_length,))

    # Image embedding layers
    x = layers.Conv2D(64, kernel_size=5, strides=2, padding='same', activation='relu')(input_image)
    x = layers.Conv2D(128, kernel_size=5, strides=2, padding='same', activation='relu')(x)
    image_embedding = layers.Flatten()(x)

    # Text embedding
    text_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)(input_text)
    text_embedding = layers.Flatten()(text_embedding)

    # Concatenate image and text embeddings
    concatenated = layers.Concatenate()([image_embedding, text_embedding])

    # Discriminator layers
    x = layers.Dense(256, activation='relu')(concatenated)
    discriminator_output = layers.Dense(1, activation='sigmoid')(x)

    discriminator_model = models.Model(inputs=[input_image, input_text], outputs=discriminator_output)

    return discriminator_model

# Define the conditional gan model
def build_cgan(generator, discriminator):
    input_text = layers.Input(shape=(max_length,))
    input_noise = layers.Input(shape=(noise_dim,))

    fake_image = generator([input_text, input_noise])
    validity = discriminator([fake_image, input_text])

    cgan = models.Model(inputs=[input_text, input_noise], outputs=validity)

    return cgan

# Instantiate the models
generator = build_generator(max_length, vocab_size, noise_dim)
discriminator = build_discriminator(max_length, vocab_size)
cgan = build_cgan(generator, discriminator)

# Compile the models
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
cgan.compile(optimizer='adam', loss='binary_crossentropy')

# Train the GAN models
def train_gan(generator, discriminator, cgan, images, text_embeddings, noise_dim, epochs=10, batch_size=32):
    for epoch in range(epochs):
        for batch_start in range(0, len(images), batch_size):
            # Generate noise
            noise = np.random.normal(0, 1, (batch_size, noise_dim))

            # Select a random batch of images and text embeddings
            real_images_batch = images[batch_start:batch_start+batch_size]
            real_text_embeddings_batch = text_embeddings[batch_start:batch_start+batch_size]

            # Generate fake images from noise and text embeddings
            generated_images = generator.predict([real_text_embeddings_batch, noise])

            # Train the Discriminator
            discriminator_loss_real = discriminator.train_on_batch([real_images_batch, real_text_embeddings_batch], np.ones((batch_size, 1)))
            discriminator_loss_fake = discriminator.train_on_batch([generated_images, real_text_embeddings_batch], np.zeros((batch_size, 1)))
            discriminator_loss = 0.5 * np.add(discriminator_loss_real, discriminator_loss_fake)

            # Train the Generator (via cGAN)
            noise = np.random.normal(0, 1, (batch_size, noise_dim))  # Generate new noise for training Generator
            cgan_loss = cgan.train_on_batch([real_text_embeddings_batch, noise], np.ones((batch_size, 1)))

            # Print training progress
            print(f"Epoch {epoch+1}/{epochs}, Batch {batch_start//batch_size+1}/{len(images)//batch_size}, Discriminator Loss: {discriminator_loss[0]}, Generator Loss: {cgan_loss}")

# Train the GAN models
train_gan(generator, discriminator, cgan, images, padded_text_sequences, noise_dim, epochs, batch_size)
# Save Generator, Discriminator, and cGAN models
generator.save('generator_model.keras')
discriminator.save('discriminator_model.keras')
cgan.save('cgan_model.keras')

from keras.models import load_model

# Load Generator, Discriminator, and cGAN models
generator = load_model('generator_model.keras')
discriminator = load_model('discriminator_model.keras')
cgan = load_model('cgan_model.keras')



[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 93ms/step
Epoch 1/2, Batch 1/3, Discriminator Loss: 3.5179665088653564, Generator Loss: 0.706402599811554
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step
Epoch 1/2, Batch 2/3, Discriminator Loss: 4.428816795349121, Generator Loss: 10.653087615966797
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step
Epoch 1/2, Batch 3/3, Discriminator Loss: 3.436037063598633, Generator Loss: 14.88998794555664
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step
Epoch 2/2, Batch 1/3, Discriminator Loss: 2.6686649322509766, Generator Loss: 16.570810317993164
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 96ms/step
Epoch 2/2, Batch 2/3, Discriminator Loss: 2.2801451683044434, Generator Loss: 17.393352508544922
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step
Epoch 2/2, Batch 3/3, Discriminator Loss: 1.9378788471221924, Generator Loss: 17.614711

IndexError: list index out of range