##Import Libraries

First, all required libraries are imported.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import cv2
import csv
import os
import tensorflow as tf
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Conv2D, Reshape, Input, Conv2DTranspose
from keras.layers import Activation, LeakyReLU, BatchNormalization, Dropout, Resizing
from keras.losses import BinaryCrossentropy
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Reshape, Conv2DTranspose, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping
import nibabel as nib
import sklearn
from sklearn.metrics import roc_curve, auc
import scipy
from scipy.stats import wilcoxon
import imgaug.augmenters as iaa
import pandas as pd
from scipy.stats import mannwhitneyu


##Import Dataset

Load the required dataset and resize it to the required dimensions i.e. 64x64 (in this case).

In [None]:
MAIN_DIR = "trainDataPos"

def load_images(folder):

    imgs = []
    target = 1
    labels = []
    for i in os.listdir(folder):
        img_dir = os.path.join(folder,i)
        try:
            img = cv2.imread(img_dir)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = cv2.resize(img, (64 , 64))
            imgs.append(img)
            labels.append(target)
        except:
            continue

    imgs = np.array(imgs)
    labels = np.array(labels)

    return imgs, labels

data, labels = load_images(MAIN_DIR)
data.shape, labels.shape

np.random.seed(SEED)
idxs = np.random.randint(0, 1000, 1000)

X_train = data[idxs]
X_train.shape

X_train = (X_train.astype(np.float32) - 127.5) / 127.5

X_train = X_train.reshape(-1, WIDTH,HEIGHT,CHANNELS)

X_train.shape

##Set the Parameters for the Generated Images





Set the dimensions of the noise vector, batch size during each iteration, number of epochs, number of steps per epoch, and seed, and set the dimensions of the image to be generate.

In [None]:
NOISE_DIM = 100
BATCH_SIZE = 4
STEPS_PER_EPOCH = 3750
EPOCHS = 100
SEED = 40
WIDTH, HEIGHT, CHANNELS = 64, 64, 1

OPTIMIZER = Adam(0.0002, 0.5)

Let's plot the images

In [None]:
plt.figure(figsize=(20,8))
for i in range(10):
    axs = plt.subplot(2,5,i+1)
    plt.imshow(X_train[i], cmap="gray")
    plt.axis('off')
    axs.set_xticklabels([])
    axs.set_yticklabels([])
    plt.subplots_adjust(wspace=None, hspace=None)
plt.tight_layout()

# Utility Function



## Generator Model

The 'build_generator()' function constructs the generator part of the GAN model. Initilizes a sequential model, for stacking layers in sequence. The Generator starts with the dense layer with 8x8x512 neurons followed by LeakyReLU Activation. These neurons are upsampled through 4 transposed convolutional layers with filters added. Filters are progressively decreasing as 256, 128, 64, CHANNEL, each filter followed by LeakyReLU except for the last layer, which uses tanh. The model is complied by binary cross entropy and optimized by Adam optimizer.

In [None]:
def build_generator():
    model = Sequential([
        Dense(8*8*512, input_dim=NOISE_DIM, use_bias=False),
        LeakyReLU(alpha=0.2),
        Reshape((8, 8, 512)),

        Conv2DTranspose(256, (4, 4), strides=(2, 2), padding='same', use_bias=False),
        LeakyReLU(alpha=0.2),

        Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same', use_bias=False),
        LeakyReLU(alpha=0.2),

        Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same', use_bias=False),
        LeakyReLU(alpha=0.2),

        Conv2DTranspose(CHANNELS, (4, 4), padding='same', activation='tanh')
    ], name="generator")

    model.summary()
    model.compile(loss="binary_crossentropy", optimizer=OPTIMIZER)

    return model


## Discriminator Model




The 'build_discriminator()' function constructs the discriminator part of GAN using a sequential model. The discriminator uses 4 convolutional layers with filters increasing as 64,128,128,256 followed by LeakyReLU activation. There is a dense layer with a single neuron and sigmoid activation that is added to indicate whether an image is real or fake.

In [None]:
def build_discriminator():

    model = Sequential([
        Conv2D(64, (3, 3), padding='same', input_shape=(WIDTH, HEIGHT, CHANNELS)),
        LeakyReLU(alpha=0.2),

        Conv2D(128, (3, 3), strides=2, padding='same'),
        LeakyReLU(alpha=0.2),

        Conv2D(128, (3, 3), strides=2, padding='same'),
        LeakyReLU(alpha=0.2),

        Conv2D(256, (3, 3), strides=2, padding='same'),
        LeakyReLU(alpha=0.2),

        Flatten(),
        Dropout(0.4),
        Dense(1, activation="sigmoid")
    ], name="discriminator")

    model.summary()
    model.compile(loss="binary_crossentropy", optimizer=OPTIMIZER)

    return model


## Generate Artifically Synthesise Images

The 'sample_image' function is called out to generate images from a noise input using the GAN's generator model and display them in grid format. Each generated image is saved in a specified directory. It subplots the image.

In [None]:
def sample_images(noise, subplots, figsize=(22,8), save=False):
    generated_images = generator.predict(noise)
    plt.figure(figsize=figsize)

    for i, image in enumerate(generated_images):
        plt.subplot(subplots[0], subplots[1], i+1)
        if CHANNELS == 1:
            plt.imshow(image.reshape((WIDTH, HEIGHT)), cmap='gray')

        else:
            plt.imshow(image.reshape((WIDTH, HEIGHT, CHANNELS)))
        if save == True:
            img_name = "output64x64/gen" + str(i)
            plt.savefig(img_name)
        plt.subplots_adjust(wspace=None, hspace=None)
        plt.axis('off')

    plt.tight_layout()
    plt.show()

 ## Start the GAN

To make sure only generator is trained in the GAN model. A GAN input is defined, which is fed to generator to create a fake image. The fake image is then fed to discriminator.

In [None]:

print('\n')
discriminator = build_discriminator()
print('\n')
generator = build_generator()

discriminator.trainable = False

gan_input = Input(shape=(NOISE_DIM,))
fake_image = generator(gan_input)

gan_output = discriminator(fake_image)

gan = Model(gan_input, gan_output, name="gan_model")
gan.compile(loss="binary_crossentropy", optimizer=OPTIMIZER)

print("The Combined Network:\n")
gan.summary()

 # Run the model

Start the tensroflow session and generate the images.

In [None]:
np.random.seed(SEED)
for epoch in range(10):
    for batch in tqdm(range(STEPS_PER_EPOCH)):

        noise = np.random.normal(0,1, size=(BATCH_SIZE, NOISE_DIM))
        fake_X = generator.predict(noise)

        idx = np.random.randint(0, X_train.shape[0], size=BATCH_SIZE)
        real_X = X_train[idx]

        X = np.concatenate((real_X, fake_X))

        disc_y = np.zeros(2*BATCH_SIZE)
        disc_y[:BATCH_SIZE] = 1

        d_loss = discriminator.train_on_batch(X, disc_y)

        y_gen = np.ones(BATCH_SIZE)
        g_loss = gan.train_on_batch(noise, y_gen)

    print(f"EPOCH: {epoch + 1} Generator Loss: {g_loss:.4f} Discriminator Loss: {d_loss:.4f}")
    noise = np.random.normal(0, 1, size=(10,NOISE_DIM))
    sample_images(noise, (2,5))

Predict the image

In [None]:
generated_images = generator.predict(noise)
generated_images.shape

Generate Sample Images

In [None]:
noise = np.random.normal(0, 1, size=(100, NOISE_DIM))
sample_images(noise, (10,10), (24,20), save=True)

 ## Plot comparing the distribution of real and generated images.

In [None]:
fig, axs = plt.subplots(ncols=1, nrows=1, figsize=(18,10))

sns.distplot(X_train, label='Real Images', hist=True, color='#fc0328', ax=axs)
sns.distplot(generated_images, label='Generated Images', hist=True, color='#0c06c7', ax=axs)

axs.legend(loc='upper right', prop={'size': 12})

output_path = 'GANplot100.png'
plt.savefig(output_path, bbox_inches='tight')

plt.show()


## Save the Generated Images

In [None]:
def save_images(images, path, start_index=0):
    """
    Saves a batch of images to the specified directory.

    Parameters:
    - images: A batch of images as a NumPy array.
    - path: The directory where images will be saved.
    - start_index: The starting index for naming saved image files.
    """
    for i, img_array in enumerate(images):

        plt.imshow(img_array.reshape((WIDTH, HEIGHT)), cmap='gray')

        filename = f"{path}/GANpos{start_index + i}.png"

        plt.savefig(filename)

noise = np.random.normal(0, 1, size=(1000,NOISE_DIM))
generated_images = generator.predict(noise)
print(generated_images.shape)
save_images(generated_images, 'GANpos')


# Evaluating the results

## Load the Training Data

In [None]:
pathToTrainNeg='trainDataNeg'
pathToTrainPos='trainDataPos'
pathToTestNeg='testSetNeg'
pathToTestPos='testSetPos'

trainDataSize=10
testSetSize=1000

img_height = 64

first50= pathToTrainNeg[:5]
first50= pathToTrainPos[:5]



## Load the Generated Data

In [None]:
doUWantAugmentation='yes' #yes or no
numberOfAugmentedImages=1000

pathToGANNeg= 'GANneg'
pathToGANPos= 'GANpos'

first50= pathToGANNeg[:500]
first50= pathToGANPos[:500]

Load the x_train and y_train to store training and test data.

In [None]:
x_train=[]
y_train=[]
x_test=[]
y_test=[]

Ensuring equal number of positive and negative samples in training data.

In [None]:
for i in range(int(trainDataSize/2)):
    img_path='{}/{}'.format(pathToTrainNeg,os.listdir(pathToTrainNeg)[i])
    img=cv2.imread(img_path)
    img=cv2.resize(img[:,:,0],(img_height,img_height))
    x_train.append(img)
    y_train.append(0)
    img_path1='{}/{}'.format(pathToTrainPos,os.listdir(pathToTrainPos)[i])
    img1=cv2.imread(img_path1)
    img1=cv2.resize(img1[:,:,0],(img_height,img_height))
    x_train.append(img1)
    y_train.append(1)

 To check if model need to be tested with augmented images (GAN generated images) or not.

In [None]:
if(doUWantAugmentation=='yes'):
    for i in range(int(numberOfAugmentedImages/2)):
        img_path='{}/{}'.format(pathToGANNeg,os.listdir(pathToGANNeg)[i])
        img=cv2.imread(img_path)
        img=cv2.resize(img[:,:,0],(img_height,img_height))
        x_train.append(img)
        y_train.append(0)
        img_path1='{}/{}'.format(pathToGANPos,os.listdir(pathToGANPos)[i])
        img1=cv2.imread(img_path1)
        img1=cv2.resize(img1[:,:,0],(img_height,img_height))
        x_train.append(img1)
        y_train.append(1)

 ## Load Positive and negative images and convert them into arrays.

Load test images from directory to populate the 'x_test' and 'y_test'. Images are taken from both positive and negative samples. Process these images to make a array and print the length of these arrays.

In [None]:
for i in range(int(testSetSize/2)):
    img_path='{}/{}'.format(pathToTestNeg,os.listdir(pathToTestNeg)[i])
    img=cv2.imread(img_path)
    img=cv2.resize(img[:,:,0],(img_height,img_height))
    x_test.append(img)
    y_test.append(0)
    img_path1='{}/{}'.format(pathToTestPos,os.listdir(pathToTestPos)[i])
    img1=cv2.imread(img_path1)
    img1=cv2.resize(img1[:,:,0],(img_height,img_height))
    x_test.append(img1)
    y_test.append(1)

x_train=np.array(x_train)
y_train=np.array(y_train)
x_test=np.array(x_test)
y_test=np.array(y_test)


print(len(x_train))
print(len(y_train))
print(len(x_test))
print(len(y_test))

Plot one image from the data

In [None]:
i=8
print(y_train[i])
plt.imshow(x_train[i],cmap='gray')

## PredictwithUnet

In [None]:
def predictWithUnet(x_train, y_train, x_test, numberOfEpochs):
    img_height = x_train[0].shape[0]
    model = Sequential([
        Conv2D(64, 3, padding='same', input_shape=(img_height, img_height, 1)),
        LeakyReLU(alpha=0.2),
        BatchNormalization(),
        Conv2D(64, 3, padding='same'),
        LeakyReLU(alpha=0.2),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),

        Conv2D(128, 3, padding='same'),
        LeakyReLU(alpha=0.2),
        BatchNormalization(),
        Conv2D(128, 3, padding='same'),
        LeakyReLU(alpha=0.2),
        BatchNormalization(),
        MaxPooling2D(strides=(2, 2)),

        Flatten(),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-3),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[tf.keras.metrics.BinaryAccuracy()]
    )

    earlystopping = EarlyStopping(monitor='val_loss', mode='min', patience=15, restore_best_weights=True)

    history = model.fit(x=x_train, y=y_train, epochs=numberOfEpochs, validation_split=0.3, callbacks=[earlystopping], shuffle=True)
    predictions = model.predict(x_test)
    trainPreds = model.predict(x_train)

    import matplotlib.pyplot as plt
    plt.plot(history.history['loss'], color='blue')
    plt.title('Model Loss Progression')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.show()

    return [predictions, trainPreds]


## InceptionV3

In [None]:
def predictWithInception(x_train,y_train,x_test,numEpochs):

    img_height=x_train[0].shape[0]
    if len(x_train.shape)==3:
        x_train=np.rollaxis(np.array([x_train,x_train,x_train]),0,4)
        x_test=np.rollaxis(np.array([x_test,x_test,x_test]),0,4)
    ntf_model = keras.applications.InceptionV3(weights=None,input_shape=(img_height,img_height,3),include_top=False)
    ntf_model.trainable = True
    inputs = keras.Input(shape=(img_height,img_height,3))
    x = ntf_model(inputs, training=True)
    x = keras.layers.GlobalAveragePooling2D()(x)
    outputs = keras.layers.Dense(1)(x)
    model = keras.Model(inputs, outputs)
    model.compile(optimizer=keras.optimizers.SGD(1e-3),
                loss=keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=[keras.metrics.BinaryAccuracy()])
    model.fit(x=x_train, y=y_train, epochs=numEpochs, validation_split=0.3, shuffle=True)
    ntfPredictions=model.predict(x_test)
    ntfTrainPreds=model.predict(x_train)
    return([ntfPredictions, ntfTrainPreds])

Function for deciding the Threshold

In [None]:
def findThreshold(trainPredictions):

    y_train=np.array([0,1]*int(trainPredictions.shape[0]/2))
    fpr, tpr, thresholds = roc_curve(y_train, trainPredictions, drop_intermediate=False)
    J_stats = tpr - fpr
    youdensIndex = thresholds[np.argmax(J_stats)]
    return([youdensIndex])

Function for evaluating the test set prediction

In [None]:
def evaluatePredictions(testPredictions,threshold):

    y_test=np.array([0,1]*int(testPredictions.shape[0]/2))

    TN = 0
    FN = 0
    TP = 0
    FP = 0

    for i in range(len(y_test)):
        if testPredictions[i]<threshold:
            if y_test[i]==0:
                TN+=1
            else:
                FN+=1
        else:
            if y_test[i]==1:
                TP+=1
            else:
                FP+=1

    acc = (TN+TP)/(TN+FN+TP+FP)
    sen = TP/(FN+TP)
    spe = TN/(TN+FP)
    f1 = TP/(TP+1/2*(FP+FN))
    fpr, tpr, thresholds = roc_curve(y_test, testPredictions, drop_intermediate=False)
    auc_value=auc(fpr, tpr)

    return([acc,sen,spe,f1,auc_value])

## Run the Unet

In [None]:
numberOfEpochs=100
k=0

testPredictions,trainPredictions=predictWithUnet(x_train,y_train,x_test,numberOfEpochs)
threshold=findThreshold(trainPredictions)
acc,sen,spe,f1,auc_value=evaluatePredictions(testPredictions,threshold)

print('Accuracy:',acc)
print('Sensitivity:',sen)
print('Specificity:',spe)
print('F1 score:',f1)
print('AUC:',auc_value)

Save results and write result to csv file

In [None]:
results_dir = 'resultsGAN'

os.makedirs(results_dir, exist_ok=True)

results_file = os.path.join(results_dir, 'results10.1000.csv')

results = []

for j in range(20):
    testPredictions, trainPredictions = predictWithUnet(x_train, y_train, x_test, numberOfEpochs)
    threshold = findThreshold(trainPredictions)
    acc, sen, spe, f1, auc_value = evaluatePredictions(testPredictions, threshold)

    results.append([acc])

with open(results_file, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)

    csvwriter.writerow(['Accuracy'])

    csvwriter.writerows(results)


Calculate mean accuracy with standard deviation

In [None]:
def calculate_mean_and_std_from_csv(file_path):
    try:
        df = pd.read_csv(file_path)
       values = df.iloc[:, 0].tolist()

        if values:
            mean_value = sum(values) / len(values)
            std_deviation = (sum((x - mean_value) ** 2 for x in values) / len(values)) ** 0.5
            return mean_value, std_deviation
        else:
            return None, None
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None, None

file_path = 'resultsGAN/results10.1000.csv'
mean_value, std_deviation = calculate_mean_and_std_from_csv(file_path)

if mean_value is not None and std_deviation is not None:
    print(f"The mean value of the numbers in the file is: {mean_value}")
    print(f"The standard deviation of the numbers in the file is: {std_deviation}")
else:
    print("No valid numerical values found in the file.")


## Run the Wilcoxon test

In [None]:
folder_x = '912'
folder_y = '911'

def read_data_from_folder(folder):
    data = []
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        if os.path.isfile(filepath):
            with open(filepath, 'r') as file:
                for line in file:
                    try:
                        data.append(float(line.strip()))
                    except ValueError:
                        continue
    return data

x_values = read_data_from_folder(folder_x)
y_values = read_data_from_folder(folder_y)

stat, p_value = wilcoxon(x_values, y_values)

print(f'Wilcoxon test statistic: {stat}')
print(f'p-value: {p_value}')

alpha = 0.05
if p_value < alpha:
    print("There is a statistically significant difference between x and y (reject the null hypothesis).")
else:
    print("There is no statistically significant difference between x and y (fail to reject the null hypothesis).")


## Run the Mann-Whitney U test

In [None]:
group1 = pd.read_csv('resultsGAN/results10.0.csv').squeeze().values
group2 = pd.read_csv('resultsGAN/results10.10.csv').squeeze().values

stat, p_value = mannwhitneyu(group1 , group2)
print('Statistics=%.2f, p=%.2f' % (stat, p_value))

alpha = 0.05

if p_value < alpha:
    print('Reject Null Hypothesis (Significant difference between two samples)')
else:
    print('Do not Reject Null Hypothesis (No significant difference between two samples)')