# Setting up Gpu

In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=1


# Imports

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/dev/null'
import warnings
warnings.filterwarnings("ignore", message="No artists with labels found to put in legend.") 
warnings.filterwarnings("ignore", category=FutureWarning)
import cv2
import os
import pathlib
import random
import shutil
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import tensorflow_gan as tfgan
from tensorflow.keras.applications import VGG16
from keras.layers import Conv2D, Conv2DTranspose, Dropout, Dense, Reshape, LayerNormalization, LeakyReLU
from keras import layers, models
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import f1_score, recall_score, precision_score
from sklearn.metrics.pairwise import polynomial_kernel
from scipy.linalg import sqrtm
SEED = 36
tf.random.set_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

In [3]:
tf.__version__

'2.13.0'

# Loading Data

In [4]:
class ReadDataset:
    def __init__(self, datasetpath, labels, image_shape):
        self.datasetpath = datasetpath
        self.labels = labels
        self.image_shape = image_shape
    def returListImages(self,):
        self.images = []
        for label in self.labels:
            self.images.append(list(pathlib.Path(os.path.join(self.datasetpath,
                                                              label)).glob('*.*')))
    def readImages(self,):
        self.returListImages()
        self.finalImages = []
        labels = []
        for label in range(len(self.labels)):
            for img in self.images[label]:
                img = cv2.imread(str(img), cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(img , self.image_shape[:2])
                img  = img/255
                img = np.expand_dims(img, axis=-1)  # Add channel dimension to get shape (64, 64, 1)
                self.finalImages.append(img)
                labels.append(label)
        images = np.array(self.finalImages)
        labels = np.array(labels)
        return images, labels

In [5]:
readDatasetObject = ReadDataset('/tf/chest_xray/train',
                               ['NORMAL', 'PNEUMONIA'],
                               (64, 64))
images, labels = readDatasetObject.readImages()

In [6]:
images.shape, labels.shape

((5216, 64, 64, 1), (5216,))

# Defining the GAN module

In [7]:
class Acgan:
    def __init__(self, eta, batch_size, epochs, weight_decay, latent_space,
                 image_shape, kernel_size, label_smoothing=0.9):
        self.eta = eta
        self.batch_size = batch_size
        self.epochs = epochs
        self.weight_decay = weight_decay
        self.latent_space = latent_space
        self.image_shape = image_shape
        self.kernel_size = kernel_size
        self.label_smoothing = label_smoothing

    def data(self, images, labels):
        ytrain = tf.keras.utils.to_categorical(labels)
        self.images = images
        self.labels = ytrain

    def samples(self, G, noize, labels):
        images = G.predict([noize, labels])
        ys = np.argmax(labels, axis=1)
        plt.figure(figsize=(12, 4))
        for i in range(16):
            plt.subplot(2, 8, (i + 1))
            plt.imshow(images[i], cmap='gray')
            plt.title(ys[i])
        plt.show()


    def generator(self, inputs, labels):
        filters = [256, 128, 64, 32]
        padding = 'same'
        x = inputs
        y = labels
        x = layers.concatenate([x, y])
        x = layers.Dense(2048)(x)
        x = layers.Dense(8*8*filters[0], kernel_regularizer=tf.keras.regularizers.L2(0.001))(x)
        x = layers.Reshape((8, 8, filters[0]))(x)
        for filter in filters:
            if filter >= 64:
                strides = 2
            else:
                strides = 1
            x = layers.LayerNormalization()(x)
            x = LeakyReLU(alpha=0.2)(x)
            x = Conv2DTranspose(filter, kernel_size=self.kernel_size, padding=padding, strides=strides)(x)
        x = Conv2DTranspose(1, kernel_size=self.kernel_size, padding=padding)(x)
        x = layers.Activation('sigmoid')(x)
        self.generatorModel = models.Model(inputs=[inputs, labels], outputs=x, name='generator')

    def discriminator(self, inputs):
        x = inputs
        filters = [32, 64, 128, 256]
        padding = 'same'
        for filter in filters:
            if filter < 256:
                strides = 2
            else:
                strides = 1
            x = Conv2D(filter, kernel_size=self.kernel_size, padding=padding, strides=strides,
                      kernel_regularizer=tf.keras.regularizers.L2(0.001))(x)
            x = LeakyReLU(alpha=0.2)(x)
        x = layers.Flatten()(x)
        outputs = Dense(1)(x)
        labelsOutput = Dense(256, kernel_regularizer=tf.keras.regularizers.L2(0.001))(x)
        labelsOutput = Dropout(0.3)(labelsOutput)
        labelsOutput = Dense(2)(labelsOutput)
        labelsOutput = layers.Activation('softmax')(labelsOutput)
        self.discriminatorModel = models.Model(inputs=inputs, outputs=[outputs, labelsOutput], name='discriminator')

    def build(self):
        generatorInput = layers.Input(shape=(self.latent_space))
        discriminatorInput = layers.Input(shape=(self.image_shape))
        labelsInput = layers.Input(shape=(2,))
        self.generator(generatorInput, labelsInput)
        self.discriminator(discriminatorInput)
        G = self.generatorModel
        D = self.discriminatorModel
        D.compile(loss=['mse', 'binary_crossentropy'],
                 optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.eta, weight_decay=self.weight_decay))
        D.summary()
        G.summary()
        D.trainable = False
        GAN = models.Model(inputs=[generatorInput, labelsInput], outputs=D(G([generatorInput, labelsInput])))
        GAN.compile(loss=['mse', 'binary_crossentropy'],
                   optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.eta*0.5, weight_decay=self.weight_decay*0.5))
        GAN.summary()
        return G, D, GAN

    def save_model_weights(self, G, D, GAN, epoch):
        G.save_weights(f'generator_weights_epoch_{epoch}.h5')
        D.save_weights(f'discriminator_weights_epoch_{epoch}.h5')
        GAN.save_weights(f'gan_weights_epoch_{epoch}.h5')

    def load_model_weights(self, G, D, GAN, epoch):
        G.load_weights(f'generator_weights_epoch_{epoch}.h5')
        D.load_weights(f'discriminator_weights_epoch_{epoch}.h5')
        GAN.load_weights(f'gan_weights_epoch_{epoch}.h5')

    def trainAlgorithm(self, G, D, GAN):
        for epoch in range(self.epochs):
            indexs = np.random.randint(0, len(self.images), size=(self.batch_size,))
            realImages = self.images[indexs]
            realLabels = self.labels[indexs]
            realTag = tf.ones(shape=(self.batch_size,))*self.label_smoothing
            noize = tf.random.uniform(shape=(self.batch_size, self.latent_space), minval=-1, maxval=1)
            fakeLabels = tf.keras.utils.to_categorical(np.random.choice(range(2), size=(self.batch_size)), num_classes=2)
            fakeImages = G.predict([noize, fakeLabels], verbose=0)
            fakeTag = tf.zeros(shape=(self.batch_size,))
            allImages = np.vstack([realImages, fakeImages])
            allLabels = np.vstack([realLabels, fakeLabels])
            allTags = np.hstack([realTag, fakeTag])
            _, dlossTag, dlossLabels = D.train_on_batch(allImages, [allTags, allLabels])
            noize = tf.random.uniform(shape=(self.batch_size, self.latent_space), minval=-1, maxval=1)
            _, glossTag, glossLabels = GAN.train_on_batch([noize, fakeLabels], [realTag, fakeLabels])
            if epoch % 10000 == 0:
                print('Epoch: {}'.format(epoch))
                print('discriminator loss: [tag: {}, labels: {}], generator loss: [tag: {}, labels: {}]'.format(dlossTag, dlossLabels, glossTag, glossLabels))
                self.samples(G, noize, fakeLabels)
                # Save model weights
                self.save_model_weights(G, D, GAN, epoch)

# Instantiating the model

In [8]:
acgan = Acgan(eta = 0.0002, batch_size = 32, epochs = 0, weight_decay = 6e-9,
              latent_space = 100, image_shape = (64, 64, 1), kernel_size = 5)

In [9]:
acgan.data(images, labels)

In [10]:
G, D, GAN = acgan.build()

Model: "discriminator"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 64, 64, 1)]          0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 32, 32, 32)           832       ['input_2[0][0]']             
                                                                                                  
 leaky_re_lu_4 (LeakyReLU)   (None, 32, 32, 32)           0         ['conv2d[0][0]']              
                                                                                                  
 conv2d_1 (Conv2D)           (None, 16, 16, 64)           51264     ['leaky_re_lu_4[0][0]']       
                                                                                      

# Training

In [11]:
last_epoch = 60000
acgan.load_model_weights(G, D, GAN, last_epoch)

In [12]:
#acgan.trainAlgorithm(G, D, GAN)

# Evaluation

In [13]:
datasetGenerationSize = 5216
noize = tf.random.uniform(shape = (datasetGenerationSize, 100), minval = -1, maxval = 1)
newlabels = tf.keras.utils.to_categorical(np.random.choice([0, 1], size = (datasetGenerationSize, )), num_classes = 2)

In [14]:
noize.shape, newlabels.shape

(TensorShape([5216, 100]), (5216, 2))

In [15]:
np.unique(np.argmax(newlabels, axis = 1), return_counts = True)

(array([0, 1]), array([2532, 2684]))

In [16]:
imagesGeneration = G.predict([noize, newlabels])
imagesGeneration.shape



(5216, 64, 64, 1)

In [17]:
basemodel = tf.keras.applications.VGG16(weights=None, input_shape=(64, 64, 1),
                                        pooling='max', include_top=False)
# Model architecture
x = layers.Dropout(0.4)(basemodel.output)
x = layers.Dense(128)(x)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(32)(x)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(1, activation='sigmoid')(x)  # Assuming binary classification
m = tf.keras.models.Model(inputs=basemodel.input, outputs=x)
m.compile(loss = 'binary_crossentropy', optimizer = tf.keras.optimizers.Adam(learning_rate = 0.00002))
m.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 64, 64, 1)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 64, 64, 64)        640       
                                                                 
 block1_conv2 (Conv2D)       (None, 64, 64, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 32, 32, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 32, 32, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 32, 32, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 16, 16, 128)       0   

In [18]:
images.shape, labels.shape

((5216, 64, 64, 1), (5216,))

In [19]:
imagesGeneration.shape, newlabels.shape

((5216, 64, 64, 1), (5216, 2))

In [20]:
from sklearn.utils import shuffle

def combine_data(real_images, real_labels, synthetic_images, synthetic_labels, ratio_real, ratio_synthetic):
    
    total_real_samples = len(real_images)
    total_synthetic_samples = len(synthetic_images)
    
    real_data_size = int(total_real_samples * (ratio_real / (ratio_real + ratio_synthetic)))
    synthetic_data_size = int(total_synthetic_samples * (ratio_synthetic / (ratio_real + ratio_synthetic)))
    
  
    real_data_size = min(real_data_size, total_real_samples)
    synthetic_data_size = min(synthetic_data_size, total_synthetic_samples)
    
    
    print(f"Real data size {real_data_size}")
    print(f"Synthetic data size {synthetic_data_size}")
    
    
    real_indices = np.random.choice(total_real_samples, real_data_size, replace=False)
    synthetic_indices = np.random.choice(total_synthetic_samples, synthetic_data_size, replace=False)
    
    combined_images = np.concatenate([real_images[real_indices], synthetic_images[synthetic_indices]], axis=0)
    combined_labels = np.concatenate([real_labels[real_indices], synthetic_labels[synthetic_indices]], axis=0)
    

    combined_images, combined_labels = shuffle(combined_images, combined_labels, random_state=SEED)
    
    return combined_images, combined_labels

combined_images_1_1, combined_labels_1_1 = combine_data(images, labels, imagesGeneration, np.argmax(newlabels, axis=1), 1, 1)
combined_images_1_3, combined_labels_1_3 = combine_data(images, labels, imagesGeneration, np.argmax(newlabels, axis=1), 1, 3)
combined_images_3_1, combined_labels_3_1 = combine_data(images, labels, imagesGeneration, np.argmax(newlabels, axis=1), 3, 1)

Real data size 2608
Synthetic data size 2608
Real data size 1304
Synthetic data size 3912
Real data size 3912
Synthetic data size 1304


In [21]:
# Train model with 1:1 combined data
history_1_1 = m.fit(combined_images_1_1, combined_labels_1_1,
                    epochs=60, batch_size=64,
                    validation_split=0.2,
                    callbacks=[tf.keras.callbacks.EarlyStopping(patience=3, monitor='val_loss', mode='min',
                                                                restore_best_weights=True)])

# Train model with 1:3 combined data
history_1_3 = m.fit(combined_images_1_3, combined_labels_1_3,
                    epochs=60, batch_size=64,
                    validation_split=0.2,
                    callbacks=[tf.keras.callbacks.EarlyStopping(patience=3, monitor='val_loss', mode='min',
                                                                restore_best_weights=True)])

# Train model with 3:1 combined data
history_3_1 = m.fit(combined_images_3_1, combined_labels_3_1,
                    epochs=60, batch_size=64,
                    validation_split=0.2,
                    callbacks=[tf.keras.callbacks.EarlyStopping(patience=3, monitor='val_loss', mode='min', 
                                                                restore_best_weights=True)])


Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60


In [22]:
m.evaluate(combined_images_1_1, combined_labels_1_1)
m.evaluate(combined_images_1_3, combined_labels_1_3)
m.evaluate(combined_images_3_1, combined_labels_3_1)



0.03791326656937599

In [23]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

def calculate_metrics(model, images, labels):
    loss = model.evaluate(images, labels, verbose=0)  
    y_pred = tf.squeeze(model.predict(images))
    y_pred = y_pred >= 0.5
    y_pred = np.array(y_pred, dtype='int32')
    labels = np.array(labels, dtype='int32')
    acc = accuracy_score(y_pred, labels) * 100
    f1 = f1_score(y_pred, labels) * 100
    recall = recall_score(y_pred, labels) * 100
    precision = precision_score(y_pred, labels) * 100
    return {
        'accuracy': acc,
        'f1_score': f1,
        'recall': recall,
        'precision': precision
    }


In [24]:
# For 1:1 Ratio
metrics_1_1 = calculate_metrics(m, combined_images_1_1, combined_labels_1_1)
print(f"1:1 Ratio -> Accuracy: {metrics_1_1['accuracy']:.2f}%, F1 Score: {metrics_1_1['f1_score']:.2f}%, "
      f"Recall: {metrics_1_1['recall']:.2f}%, Precision: {metrics_1_1['precision']:.2f}%")

# For 1:3 Ratio
metrics_1_3 = calculate_metrics(m, combined_images_1_3, combined_labels_1_3)
print(f"1:3 Ratio -> Accuracy: {metrics_1_3['accuracy']:.2f}%, F1 Score: {metrics_1_3['f1_score']:.2f}%, "
      f"Recall: {metrics_1_3['recall']:.2f}%, Precision: {metrics_1_3['precision']:.2f}%")

# For 3:1 Ratio
metrics_3_1 = calculate_metrics(m, combined_images_3_1, combined_labels_3_1)
print(f"3:1 Ratio -> Accuracy: {metrics_3_1['accuracy']:.2f}%, F1 Score: {metrics_3_1['f1_score']:.2f}%, "
      f"Recall: {metrics_3_1['recall']:.2f}%, Precision: {metrics_3_1['precision']:.2f}%")


1:1 Ratio -> Accuracy: 99.71%, F1 Score: 99.77%, Recall: 99.72%, Precision: 99.82%
1:3 Ratio -> Accuracy: 99.85%, F1 Score: 99.87%, Recall: 99.83%, Precision: 99.90%
3:1 Ratio -> Accuracy: 99.52%, F1 Score: 99.65%, Recall: 99.64%, Precision: 99.66%
