# Steps to retrieve training data set.
For this tutorial we will create and train a GAN using iamges of apples.

1. Navigate to [kaggle.com](https://www.kaggle.com/#)
2. Select register and follow the flow to creating a new account
3. Once registerd sign into your account
4. Profile avatar at the top right to reveal the drop down
5. Select settings
6. Scroll down to API
7. Select create new token
8. Save the kaggle.json file which gets generated
9. Upload the kaggle.json to the google Collab root file directory

This kaggle.json will be used to auth our dataset download.



In [None]:
 # ! pip install -q kaggle

In [None]:

! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d moltean/fruits

In [None]:
! unzip fruits.zip

In [None]:
import os

def get_apple_images(root_directory):
    """
    This function searches through the given root_directory and its subdirectories
    for any image files contained in folders starting with 'apple_' and returns a list of their paths.

    :param root_directory: The directory to start the search from.
    :return: A list of paths to the image files.
    """
    image_files = []
    for root, dirs, files in os.walk(root_directory):
        # Check if the current folder starts with 'apple_'
        if os.path.basename(root).startswith('Apple '):
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                    image_files.append(os.path.join(root, file))
    return image_files



In [None]:
# Specify the root directory to search from
root_dir = './fruits-360_dataset/fruits-360/Training'
image_paths = get_apple_images(root_dir)

print(image_paths)

# If you need to print or otherwise use the list of image paths:
for path in image_paths:
    print(path)


In [None]:
# !pip install Pillow numpy


In [None]:
import os
from PIL import Image
import numpy as np


def load_images_into_array(image_paths, target_size=(256, 256)):
    """
    This function loads and resizes images from the given paths into numpy arrays.

    :param image_paths: A list of image file paths.
    :param target_size: A tuple (width, height) to which all images will be resized.
    :return: A list of numpy arrays representing the images.
    """
    images = []
    for path in image_paths:
        with Image.open(path) as img:
            img_resized = img.resize(target_size)  # Resize the image
            images.append(np.array(img_resized))
    return images


In [None]:
train_images = load_images_into_array(image_paths)

# Now image_arrays contains all the images as numpy arrays
# Here's how you can print the shape of the first image (if any)
if train_images:
    print(train_images[0].shape)
else:
    print("No images found.")

In [None]:
# ! pip install matplotlib

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(train_images[8])

In [None]:
! nvidia-smi

In [None]:
# ! pip install "tensorflow[and-cuda]<2.16"

In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

In [None]:
# ! pip install imageio

In [None]:
import glob
import imageio


from tensorflow.keras import layers
import time



from IPython import display

In [None]:
# Batch and shuffle the data
#train_dataset = tf.data.Dataset.from_tensor_slices(train_images).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

In [None]:
#The BUFFER_SIZE variable, set to 60000, is used as a parameter in the dataset shuffling process.
BUFFER_SIZE = 60000

# This defines how many data points (in our case images) the model processes at once during training
BATCH_SIZE = 256

When you call tf.data.Dataset.shuffle(BUFFER_SIZE), TensorFlow takes the first BUFFER_SIZE elements of the dataset and puts them into a buffer. During each training iteration, it randomly selects an element from this buffer and replaces it with the next element from the dataset, if there are any left. This approach ensures that a randomly shuffled version of the dataset is fed into the model, without the need to shuffle the entire dataset in memory all at once.

Performance and Memory Use: The size of BUFFER_SIZE can impact both the randomness of your data during training and the memory usage. A larger BUFFER_SIZE can provide better shuffling but requires more memory to maintain the buffer. Typically, you would set BUFFER_SIZE to the size of the dataset or slightly smaller, depending on memory constraints. In this case, setting it to 60000 may imply that your dataset is about that size or that you are attempting to shuffle as much of it as possible at once for thorough randomness.

In [None]:
def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    #image = tf.image.decode_jpeg(image, channels=1)  # Convert to grayscale
    image = tf.image.decode_jpeg(image, channels=3) # Convert to RGB
    image = tf.image.resize(image, [28, 28])
    image = (image - 127.5) / 127.5  # Normalize to [-1, 1]
    return image

# Assuming you have a list of image paths
train_images = [preprocess_image(path) for path in image_paths]
train_dataset = tf.data.Dataset.from_tensor_slices(train_images).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)


**Dense Layer:**

This is the first layer of the generator model.
It takes a 100-dimensional noise vector as input.
It has 7*7*256 units or neurons. The layer's output will be reshaped in subsequent layers to form a 3D structure suitable for convolution operations.
use_bias=False indicates that no bias vector is added to the layer outputs.
This layer essentially projects and reshapes the input noise vector into a format suitable for convolutional operations that follow.


**BatchNormalization Layer:**

Normalizes the activations of the previous layer at each batch, i.e., it applies a transformation that maintains the mean output close to 0 and the output standard deviation close to 1.
Helps to stabilize the learning process and reduces the number of training epochs required to train deep networks.

**LeakyReLU Layer:**

Applies the Leaky Rectified Linear Unit activation function.
Allows a small, non-zero gradient when the unit is not active, which helps prevent dead neurons in the network.
LeakyReLU is often preferred in GANs because it helps with gradient flow.

**Reshape Layer:**

Reshapes the output from the dense layer into a 3D tensor of shape (7, 7, 256).
This is necessary to match the dimensions expected by the subsequent convolutional layers.

**Conv2DTranspose Layers:**

These layers perform the opposite of a convolution operation, upsampling the input to a higher resolution or dimension.
The first Conv2DTranspose layer upsamples to (7, 7, 128).
The second upsamples further to (14, 14, 64).
The third and final Conv2DTranspose layer upsamples to the target resolution of (28, 28, 1), producing a single-channel (grayscale) image.
strides=(1, 1) or (2, 2) control the upsampling factor.
padding='same' ensures the output size is adjusted to keep the spatial dimensions consistent through convolution operations.
use_bias=False in these layers means no bias vector is added to the outputs.
The last Conv2DTranspose layer uses a tanh activation function to normalize the output pixels to the range [-1, 1].

**More BatchNormalization and LeakyReLU Layers:**

Added after each Conv2DTranspose layer (except the final one) to stabilize training and introduce non-linearity, enabling the model to learn more complex patterns.

**Final Output:**

The model outputs a 28x28 pixel RGB image (as indicated by the shape (None, 28, 28, 1)), where None stands for batch size, allowing the model to process multiple images in parallel during training.


MORE:

Use of Conv2DTranspose layers: These layers are commonly used in GANs for upscaling the input and generating images. In this context, they're used to progressively upscale a low-dimensional input (in this case, from a 100-dimensional noise vector) to a higher resolution image.

Starting from a dense layer that reshapes into an image format: The code begins with a dense layer that outputs to a shape which is then reshaped into an image format (7x7x256). This is a typical approach in GAN generators to start generating an image from a flattened vector.

Batch normalization and LeakyReLU activations: These are common in GANs to help stabilize training and avoid vanishing or exploding gradients.

Output activation is tanh: The use of the tanh function in the last convolutional layer is a common practice in GANs for generating images. This is because tanh outputs values in the range [-1, 1], which is often used for normalized image data.

Output shape is (None, 28, 28, 3): This indicates that the network outputs color images (with 3 channels for RGB) of size 28x28 pixels. The use of GANs is prevalent for image generation tasks.

The initial input shape is (100,): This signifies that the network starts with a 100-dimensional noise vector, which is typical for GAN generators. The noise vector serves as a seed for image generation, allowing the network to produce varied images.

The commented-out line before the final Conv2DTranspose layer suggests there was an option to output a single-channel image (possibly for grayscale images), but it has been modified to produce 3-channel RGB images instead.

Overall, this code defines a GAN generator designed to create 28x28 color images from a 100-dimensional noise input, showcasing a typical architecture for generating images in a GAN setup.

In [None]:
def generator():
    """
        The tf.keras.Sequential() constructor is used in TensorFlow's Keras API to create a linear stack of layers, forming a model.
        The Sequential model is a simple, yet extremely useful way to build a neural network for a wide array of problems.
        It allows you to construct models layer by layer in a step-by-step fashion.
    """
    model = tf.keras.Sequential()

    model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(100,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((7, 7, 256)))
    assert model.output_shape == (None, 7, 7, 256)  # Note: None is the batch size

    model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
    assert model.output_shape == (None, 7, 7, 128)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    assert model.output_shape == (None, 14, 14, 64)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    #model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
    model.add(layers.Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))

    assert model.output_shape == (None, 28, 28, 3)

    return model

In [None]:
generatorModel = generator()

noise = tf.random.normal([1, 100])
generated_image = generatorModel(noise, training=False)

# Remove 'cmap' to display the RGB image
plt.imshow((generated_image[0, :, :, :] + 1) / 2)  # Rescale pixel values back to [0, 1] range for displaying
plt.axis('off')  # Optionally remove the axis for a cleaner visualization
plt.show()


Define a convolutional neural network (CNN) using TensorFlow's Keras API. This CNN includes the following layers:

Conv2D Layers: These layers perform the convolution operation that involves a filter or kernel that passes over the input image (or feature map from the previous layer), capturing spatial hierarchies and features. The input image is specified to have a shape of 28x28 pixels with 3 channels (likely RGB), indicating that the model expects color images of this size. Each Conv2D layer uses a 5x5 kernel with a stride of 2 in both directions and padding set to 'same', which ensures the output volume has the same dimensions as the input when divided by the stride.

LeakyReLU Layers: These are activation functions defined as
�
(
�
)
=
�
�
f(x)=αx for
�
<
0
x<0 and
�
(
�
)
=
�
f(x)=x for
�
≥
0
x≥0. The leaky version of ReLU allows a small, non-zero gradient when the unit is inactive and has been shown to help maintain the flow of gradients through the network during training.

Dropout Layers: These layers randomly set a fraction (0.3 in this case) of the input units to zero at each update during training time, which helps prevent overfitting.

Flatten Layer: This layer flattens the output of the previous convolutional layers to form a single long feature vector necessary for the dense layer that follows.

Dense Layer: This is a fully connected layer that outputs one unit. Since it's not followed by a softmax or sigmoid activation function in the provided code, this configuration suggests the model might be used for regression or binary classification tasks where the output is a single scalar value.

Given the structure and the output layer, this model appears to be set up for a binary classification or regression problem, depending on the final activation function and loss functions used elsewhere in the implementation (not shown in this snippet).

In [None]:
def discriminator():
    """
      The tf.keras.Sequential() constructor is used in TensorFlow's Keras API to create a linear stack of layers, forming a model.
      The Sequential model is a simple, yet extremely useful way to build a neural network for a wide array of problems.
      It allows you to construct models layer by layer in a step-by-step fashion.
    """
    model = tf.keras.Sequential()
    #model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=[28, 28, 1]))
    model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=[28, 28, 3]))

    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Flatten())
    model.add(layers.Dense(1))

    return model

In [None]:
discriminator = discriminator()
decision = discriminator(generated_image)
print (decision)

In [None]:
# This method returns a helper function to compute cross entropy loss
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [None]:
def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

In [None]:
def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

In [None]:
# The Adam optimizer (Adaptive Moment Estimation)
# The adam optimizer is a Stochastic Gradient Descent (SGD): This is the simplest and most straightforward optimization algorithm.
# It updates the weights using a fixed learning rate. Variants of SGD with momentum or Nesterov acceleration are often used to converge faster.

# having an optimizer is crucial for training the neural network effectively.
# An optimizer is responsible for updating the weights of the network based on the gradients calculated during backpropagation,
# in order to minimize the loss function.
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

In [None]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generatorModel,
                                 discriminator=discriminator)

In [None]:
EPOCHS = 50
noise_dim = 100

num_examples_to_generate = 16

# You will reuse this seed overtime (so it's easier)
# to visualize progress in the animated GIF)
seed = tf.random.normal([num_examples_to_generate, noise_dim])

In [None]:
# Notice the use of `tf.function`
# This annotation causes the function to be "compiled".
@tf.function
def train_step(images):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
      generated_images = generatorModel(noise, training=True)

      real_output = discriminator(images, training=True)
      fake_output = discriminator(generated_images, training=True)

      gen_loss = generator_loss(fake_output)
      disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generatorModel.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generatorModel.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

In [None]:
def generate_and_save_images(model, epoch, test_input):
  # Notice `training` is set to False.
  # This is so all layers run in inference mode (batchnorm).
  predictions = model(test_input, training=False)

  fig = plt.figure(figsize=(4, 4))

  for i in range(predictions.shape[0]):
      plt.subplot(4, 4, i+1)
      # plt.imshow(predictions[i, :, :, 0] * 127.5 + 127.5, cmap='gray')
      plt.imshow((predictions[i, :, :, :] + 1) / 2)
      plt.axis('off')

  plt.savefig('image_at_epoch_{:04d}.png'.format(epoch))
  plt.show()

In [None]:
def train(dataset, epochs):
  for epoch in range(epochs):
    start = time.time()

    for image_batch in dataset:
      train_step(image_batch)

    # Produce images for the GIF as you go
    display.clear_output(wait=True)
    generate_and_save_images(generatorModel,
                             epoch + 1,
                             seed)

    # Save the model every 15 epochs
    if (epoch + 1) % 15 == 0:
      checkpoint.save(file_prefix = checkpoint_prefix)

    print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

  # Generate after the final epoch
  display.clear_output(wait=True)
  generate_and_save_images(generatorModel,
                           epochs,
                           seed)

In [None]:
train(train_dataset, EPOCHS)