In [None]:
import json
import nltk
import gensim
from gensim.models import KeyedVectors

# Load the COCO2017 dataset
captions_path = './annotations/captions_train2017.json'
captions = json.load(open(captions_path))['annotations']
texts = [caption['caption'] for caption in captions]

# Tokenize the captions into words
tokenized_texts = [nltk.word_tokenize(text) for text in texts]

# Load a pre-trained Word2Vec model
w2v_path = './w2vmodel.bin'
w2v_model = KeyedVectors.load_word2vec_format(w2v_path, binary=True)

# Convert each word to a Word2Vec embedding
embedded_texts = []
for text in tokenized_texts:
    embedded_text = []
    for word in text:
        try:
            embedding = w2v_model[word]
            embedded_text.append(embedding)
        except KeyError:
            continue
    embedded_texts.append(embedded_text)


In [None]:
from PIL import Image
import numpy as np


# Define the target image size
target_size = (256, 256)

# Initialize an empty list to store the processed embedded texts
processed_embedded_texts = []

# Process each embedded text
for embedded_text in embedded_texts:
    # Convert the embedded text to a PIL Image object
    embedded_image = Image.fromarray(np.array(embedded_text))

    # Resize the image to the target size
    resized_image = embedded_image.resize(target_size)

    # Convert the image to a NumPy array of pixel values
    pixel_values = np.array(resized_image)

    # Normalize the pixel values to be between 0 and 1
    normalized_pixel_values = pixel_values / 255.0

    # Add the processed embedded text to the list
    processed_embedded_texts.append(normalized_pixel_values)


In [None]:
import tensorflow as tf
import numpy as np

# Load the preprocessed image feature vectors
feature_vectors = np.load('image_features.npy')

# Define the input shape for the CNN
input_shape = (256, 256, 3)

# Define the CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu'),
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=tf.keras.losses.MeanSquaredError())

# Train the model on the preprocessed text data
history = model.fit(processed_embedded_texts, feature_vectors, epochs=10, batch_size=32)


In [None]:
import tensorflow as tf
from keras.layers import Dense, Reshape, Conv2D, Conv2DTranspose, Flatten, LeakyReLU, BatchNormalization
from keras.models import Sequential
from keras.optimizers import Adam
from PIL import Image
import numpy as np
import glob

# Define the dimensionality of the noise vector
latent_dim = 100

# Define the shape of the target image
image_shape = (256, 256, 3)

# set the directory where the images are stored
image_dir = "./img"

# create a list of image file paths in the directory
image_paths = glob.glob(image_dir + "/*.jpg")


# Define the generator model
generator = Sequential()
generator.add(Dense(128 * 64 * 64, input_dim=latent_dim))
generator.add(Reshape((64, 64, 128)))
generator.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
generator.add(BatchNormalization())
generator.add(LeakyReLU(alpha=0.2))
generator.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
generator.add(BatchNormalization())
generator.add(LeakyReLU(alpha=0.2))
generator.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
generator.add(BatchNormalization())
generator.add(LeakyReLU(alpha=0.2))
generator.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
generator.add(BatchNormalization())
generator.add(LeakyReLU(alpha=0.2))
generator.add(Conv2D(3, (3,3), activation='tanh', padding='same'))

# Define the discriminator model
discriminator = Sequential()
discriminator.add(Conv2D(64, (3,3), padding='same', input_shape=image_shape))
discriminator.add(LeakyReLU(alpha=0.2))
discriminator.add(Conv2D(64, (3,3), strides=(2,2), padding='same'))
discriminator.add(BatchNormalization())
discriminator.add(LeakyReLU(alpha=0.2))
discriminator.add(Conv2D(128, (3,3), strides=(2,2), padding='same'))
discriminator.add(BatchNormalization())
discriminator.add(LeakyReLU(alpha=0.2))
discriminator.add(Conv2D(256, (3,3), strides=(2,2), padding='same'))
discriminator.add(BatchNormalization())
discriminator.add(LeakyReLU(alpha=0.2))
discriminator.add(Flatten())
discriminator.add(Dense(1, activation='sigmoid'))

# Define the GAN by combining the generator and discriminator
gan = Sequential()
gan.add(generator)
gan.add(discriminator)

# Compile the discriminator model
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0002, beta_1=0.5), metrics=['accuracy'])

# Freeze the weights of the discriminator during GAN training
discriminator.trainable = False

# Compile the GAN model
gan.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0002, beta_1=0.5))

# Define a function to generate a batch of noise vectors
def generate_noise(batch_size, latent_dim):
    return np.random.normal(0, 1, (batch_size, latent_dim))

# Define a function to generate a batch of real images
def generate_real_images(batch_size, processed_embedded_texts, image_paths):
    indices = np.random.randint(0, len(processed_embedded_texts), batch_size)
    real_images = []
    for i in indices:
        image = Image.open(image_paths[i])
        resized_image = image.resize((256, 256))
        pixel_values = np.array(resized_image) / 255.0
        real_images.append(pixel_values)
    return np.array(real_images)

#Define the number of epochs for training
num_epochs = 100

#Define the batch size for training
batch_size = 32



In [None]:
#Train the GAN

for epoch in range(num_epochs):
# Generate a batch of real images
    real_images = generate_real_images(batch_size, processed_embedded_texts, image_paths)
# Generate a batch of noise vectors
noise = generate_noise(batch_size, latent_dim)

# Use the generator to produce fake images from the noise
fake_images = generator.predict(noise)

# Concatenate the real and fake images into a single array
X = np.concatenate([real_images, fake_images])

# Create an array of labels indicating whether each image is real or fake
y = np.concatenate([np.ones((batch_size, 1)), np.zeros((batch_size, 1))])

# Train the discriminator on the combined real and fake images
discriminator_loss, discriminator_accuracy = discriminator.train_on_batch(X, y)

# Generate a new batch of noise vectors
noise = generate_noise(batch_size, latent_dim)

# Create an array of labels indicating that the generated images are real (but they are fake)
misleading_targets = np.ones((batch_size, 1))

# Train the GAN on the noise with the "real" labels
gan_loss = gan.train_on_batch(noise, misleading_targets)

# Print the loss and accuracy for the discriminator and the GAN at the end of each epoch
print(f"Epoch {epoch}/{num_epochs} -- Discriminator loss: {discriminator_loss:.4f}, Discriminator accuracy: {discriminator_accuracy:.4f}, GAN loss: {gan_loss:.4f}")
