In [1]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt


In [2]:
import numpy as np
import os
import pandas as pd

processed_image_dir = 'flickr30k_images/processed_images'
images = []

# Load each .npy file in the directory and add it to the images list
for file_name in os.listdir(processed_image_dir):
    if file_name.endswith('.npy'):
        file_path = os.path.join(processed_image_dir, file_name)
        image = np.load(file_path)
        images.append(image)

# Now, images is a list of numpy arrays (each array is an image)

print("First few images:")
for img in images[:5]:  
    print(img)


captions_df = pd.read_csv('flickr30k_images/processed_results.csv')
print("\nFirst few captions:")
print(captions_df.head())


First few images:
[[[219 219 219]
  [218 218 218]
  [218 218 218]
  ...
  [240 238 241]
  [239 238 243]
  [239 238 243]]

 [[220 220 220]
  [219 219 219]
  [219 219 219]
  ...
  [240 238 241]
  [239 238 243]
  [239 238 243]]

 [[221 219 222]
  [221 219 220]
  [222 220 221]
  ...
  [240 240 242]
  [239 239 241]
  [239 239 241]]

 ...

 [[101 111  87]
  [ 81  76  73]
  [102  76  87]
  ...
  [101  99  87]
  [ 97 103  89]
  [ 96  96  84]]

 [[ 94  91  82]
  [100 106  92]
  [ 81  74  68]
  ...
  [107  97  87]
  [102 103  89]
  [102  95  85]]

 [[ 69  72  65]
  [103  89  86]
  [106 102  90]
  ...
  [103  95  84]
  [ 98 100  86]
  [100  96  84]]]
[[[ 49  65  26]
  [ 47  71  39]
  [ 56  69  43]
  ...
  [ 43  36  43]
  [141 163 142]
  [190 179 196]]

 [[ 44  66  17]
  [ 43  48  42]
  [ 73  72  80]
  ...
  [157 144 172]
  [202 216 219]
  [238 255 255]]

 [[ 39  67   9]
  [ 80  77 106]
  [ 93 126 161]
  ...
  [204 191 211]
  [126 118 131]
  [141 174 153]]

 ...

 [[ 41  90 131]
  [ 21  86 144]
  

In [17]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(8*8*256, use_bias=False, input_shape=(100,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((8, 8, 256)))  # Reshape to a 3D tensor
    assert model.output_shape == (None, 8, 8, 256)

    # Upsample to 16x16
    model.add(layers.Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    assert model.output_shape == (None, 16, 16, 128)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    # Upsample to 32x32
    model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    assert model.output_shape == (None, 32, 32, 64)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    # Upsample to 64x64
    model.add(layers.Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 64, 64, 3)

    return model

def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=[64, 64, 3]))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Flatten())
    model.add(layers.Dense(1))

    return model

generator = make_generator_model()
discriminator = make_discriminator_model()

# Print model summaries
print(generator.summary())
print(discriminator.summary())


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 16384)             1638400   
                                                                 
 batch_normalization_6 (Bat  (None, 16384)             65536     
 chNormalization)                                                
                                                                 
 leaky_re_lu_10 (LeakyReLU)  (None, 16384)             0         
                                                                 
 reshape_2 (Reshape)         (None, 8, 8, 256)         0         
                                                                 
 conv2d_transpose_6 (Conv2D  (None, 16, 16, 128)       819200    
 Transpose)                                                      
                                                                 
 batch_normalization_7 (Bat  (None, 16, 16, 128)      

problem Faced : 

In [13]:
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

generator_optimizer = tf.keras.optimizers.legacy.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.legacy.Adam(1e-4)


# Print the loss functions and optimizers summary
print("Discriminator loss function and optimizer:")
print(discriminator_loss)
print(discriminator_optimizer)

print("\nGenerator loss function and optimizer:")
print(generator_loss)
print(generator_optimizer)


Discriminator loss function and optimizer:
<function discriminator_loss at 0x2c8e73560>
<keras.src.optimizers.legacy.adam.Adam object at 0x2c8e30410>

Generator loss function and optimizer:
<function generator_loss at 0x2c8e73420>
<keras.src.optimizers.legacy.adam.Adam object at 0x2c8e30a50>


In [19]:
dataset=img

In [20]:
def train(dataset, epochs):
    for epoch in range(epochs):
        start = time.time()

        for image_batch in dataset:
            print("Batch shape:", image_batch.shape)  
            train_step(image_batch, None) 

        print(f'Time for epoch {epoch + 1} is {time.time() - start} sec')


In [22]:

reshaped_images = [np.resize(img, (64, 64, 3)) for img in images]


reshaped_images = np.array(reshaped_images)


dataset = tf.data.Dataset.from_tensor_slices(reshaped_images).batch(BATCH_SIZE)


In [None]:
import time

@tf.function
def train_step(images, captions):
    noise = tf.random.normal([32, 100])  

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generator(noise, training=True)

        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)

        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

def train(dataset, epochs):
    for epoch in range(epochs):
        start = time.time()

        for image_batch in dataset:
            # captions are not directly used in the model in this version
            train_step(image_batch, None)  # Replace with actual caption handling if needed

        print(f'Time for epoch {epoch + 1} is {time.time() - start} sec')
        # Generate and save images here if needed

# 'dataset' is a tf.data.Dataset of images, and 'epochs' is the number of epochs
BATCH_SIZE = 32  
train(dataset, 10)  


In [28]:
# Train your model 
train(dataset, 10)


Time for epoch 1 is 130.62996983528137 sec
Time for epoch 2 is 126.28748679161072 sec
Time for epoch 3 is 126.46461701393127 sec
Time for epoch 4 is 118.8111400604248 sec
Time for epoch 5 is 125.28175115585327 sec
Time for epoch 6 is 136.0181758403778 sec
Time for epoch 7 is 133.0854148864746 sec
Time for epoch 8 is 139.8650348186493 sec
Time for epoch 9 is 127.9656629562378 sec
Time for epoch 10 is 125.56670308113098 sec
