In [None]:
!pip install tensorflow

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import Necessary Library
import gc
import os
import cv2
import random
import numpy as np
from tqdm import tqdm
from PIL import Image
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.applications.resnet50 import ResNet50
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
pip install split-folders

In [None]:
import splitfolders
input_folder = 'C:/Users/nusra/OneDrive/Desktop/Data2/Dataset'

# To only split into training and validation set, set a tuple to `ratio`, i.e, `(.7, .2, .1)`.
splitfolders.ratio(input_folder, output='C:/Users/nusra/OneDrive/Desktop/Data2/Evaluation', seed=42, ratio=(.7, .2, .1), group_prefix=None)


In [None]:
# Load dataset in three different directory for further processing
train_directory = "C:/Users/nusra/OneDrive/Desktop/Data2/Evaluation/train"
test_directory = "C:/Users/nusra/OneDrive/Desktop/Data2/Evaluation/test"
validation_directory = "C:/Users/nusra/OneDrive/Desktop/Data2/Evaluation/val"

In [None]:
# Load dataset in train, validation, and test directory

In [None]:
DATADIR = train_directory
CATEGORIES = ["Monkeypox","Other"]
training_data = []

IMG_SIZE = 100

def create_training_data():
  for category in CATEGORIES:
    path = os.path.join(DATADIR,category)
    class_num = CATEGORIES.index(category)
    for img in tqdm(os.listdir(path)):
      try:
        img_array = cv2.imread(os.path.join(path,img))  # convert to array
        new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize to normalize data size
        training_data.append([img_array, class_num])  # add this to our training_data
      except Exception as e:  # in the interest in keeping the output clean...
        print(e)

create_training_data()
print(len(training_data))

In [None]:
DATADIR = validation_directory
CATEGORIES = ["Monkeypox","Other"]
validation_data = []

IMG_SIZE = 100

def create_validation_data():
  for category in CATEGORIES:
    path = os.path.join(DATADIR,category)
    class_num = CATEGORIES.index(category)
    for img in tqdm(os.listdir(path)):
      try:
        img_array = cv2.imread(os.path.join(path,img))  # convert to array
        new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize to normalize data size
        validation_data.append([img_array, class_num])  # add this to our training_data
      except Exception as e:  # in the interest in keeping the output clean...
        print(e)

create_validation_data()
print(len(validation_data))

In [None]:
DATADIR = test_directory
CATEGORIES = ["Monkeypox","Other"]
testing_data = []

IMG_SIZE = 100

def create_testing_data():
  for category in CATEGORIES:
    path = os.path.join(DATADIR,category)
    class_num = CATEGORIES.index(category)
    for img in tqdm(os.listdir(path)):
      try:
        img_array = cv2.imread(os.path.join(path,img))  # convert to array
        new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize to normalize data size
        testing_data.append([img_array, class_num])  # add this to our training_data
      except Exception as e:  # in the interest in keeping the output clean...
        print(e)

create_testing_data()
print(len(testing_data))

In [None]:
# Apply random suffleing and normalization

random.shuffle(training_data)
random.shuffle(validation_data)
random.shuffle(testing_data)

X = []
y = []

for features, label in training_data:
  X.append(features)
  y.append(label)

X_train = np.array(X)
X_train = X_train/255.0
y_train = np.array(y)


X_v = []
y_v = []

for features, label in validation_data:
  X_v.append(features)
  y_v.append(label)

X_val = np.array(X_v)
X_val = X_val/255.0
y_val = np.array(y_v)


X_t = []
y_t = []

for features, label in testing_data:
    X_t.append(features)
    y_t.append(label)

X_test = np.array(X_t)
X_test = X_test/255.0
y_test = np.array(y_t)

y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

print(len(X_train), ": ",len(X_test), ": ", len(X_val))

In [None]:
y_train = np.array(y_train)
y_val = np.array(y_val)
y_test = np.array(y_test)

In [None]:
# Define the number of clients, number of clients help to improve the performance.
# Here I set only 5 due to lake of computational resources

def create_clients(image_list, label_list, num_clients=5, initial='clients'):
  #create a list of client names
    client_names = ['{}_{}'.format(initial, i+1) for i in range(num_clients)]
    data = list(zip(image_list, label_list))
    #shard data and place at each client
    size = len(data)//num_clients
    shards = [data[i:i + size] for i in range(0, size*num_clients, size)]
    #number of clients must equal number of shards
    assert(len(shards) == len(client_names))
    return {client_names[i] : shards[i] for i in range(len(client_names))}
clients = create_clients(X_train, y_train, num_clients=5, initial='client')

def batch_data(data_shard, bs=16):
    #seperate shard into data and labels lists
    data, label = zip(*data_shard)
    dataset = tf.data.Dataset.from_tensor_slices((list(data), list(label)))
    return dataset.shuffle(len(label)).batch(bs)

#process and batch the training data for each client
clients_batched = dict()
co = 0
for (client_name, data) in clients.items():
    co+=1
    clients_batched[client_name] = batch_data(data)

#process and batch the test set
test_batched = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(len(y_test))
clients_batched

In [None]:
del(X_train)
del(y_train)
del(training_data)
gc.collect()

In [None]:
# Import library for CNN model
from keras.layers import Input, Conv2D, Dense, Flatten, MaxPool2D
from keras.layers import Activation, Add, BatchNormalization, Dropout

In [None]:
class SimpleModel:
    def build(self):
        model = Sequential()

        model.add(Conv2D(activation ='relu', input_shape = (150,150,3), filters=256, kernel_size=(3, 3), padding="SAME", strides=(1, 1)))
        model.add(MaxPool2D(pool_size=(2, 2)))

        model.add(Conv2D(activation ='relu',filters=128, kernel_size=(3, 3), padding="SAME", strides=(1,1)))
        model.add(MaxPool2D(pool_size=(2, 2)))

        model.add(Conv2D(activation ='relu',filters=64, kernel_size=(3, 3), padding="SAME", strides=(1,1)))
        model.add(MaxPool2D(pool_size=(2, 2)))

        model.add(Conv2D(activation ='relu',filters=32, kernel_size=(3, 3), padding="SAME", strides=(1,1)))
        model.add(MaxPool2D(pool_size=(2, 2)))

        model.add(Dropout(0.25))

        model.add(Flatten())
        model.add(Dense(512, activation='relu'))

        model.add(Dense(256, activation='relu'))

        model.add(Dense(2, activation='sigmoid'))

        return model


In [None]:
lr = 0.001
loss='binary_crossentropy'
# loss = 'categorical_crossentropy'
metrics = [keras.metrics.binary_accuracy]
optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=lr)
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=lr)

In [None]:
# from classification_models.tfkeras import Classifiers

def weight_scalling_factor(clients_trn_data, client_name):
    client_names = list(clients_trn_data.keys())
    #get the bs
    bs = list(clients_trn_data[client_name])[0][0].shape[0]
    #first calculate the total training data points across clinets
    global_count = sum([tf.data.experimental.cardinality(clients_trn_data[client_name]).numpy() for client_name in client_names])*bs
    print(global_count)
    # get the total number of data points held by a client
    local_count = tf.data.experimental.cardinality(clients_trn_data[client_name]).numpy()*bs
    return local_count/global_count

def scale_model_weights(weight, scalar):
    '''function for scaling a models weights'''
    weight_final = []
    steps = len(weight)
    for i in range(steps):
        weight_final.append(scalar * weight[i])
    return weight_final

def sum_scaled_weights(scaled_weight_list):
    '''Return the sum of the listed scaled weights. The is equivalent to scaled avg of the weights'''
    avg_grad = list()
    #get the average grad accross all client gradients
    for grad_list_tuple in zip(*scaled_weight_list):
        layer_mean = tf.math.reduce_sum(grad_list_tuple, axis=0)
        avg_grad.append(layer_mean)
    return avg_grad

def test_local_model(X_test, Y_test,  model, comm_round):
    cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    #logits = model.predict(X_test, batch_size=100)
    logits = model.predict(X_test)
    loss = cce(Y_test, logits)
    acc = accuracy_score(tf.argmax(logits, axis=1), tf.argmax(Y_test, axis=1))
    print('comm_round: {} | local_acc: {:.3%} | local_loss: {}'.format(comm_round, acc, loss))
    return acc, loss

def test_global_model(X_test, Y_test,  model, comm_round):
    cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    #logits = model.predict(X_test, batch_size=100)
    logits = model.predict(X_test)
    loss = cce(Y_test, logits)
    acc = accuracy_score(tf.argmax(logits, axis=1), tf.argmax(Y_test, axis=1))
    print('comm_round: {} | global_acc: {:.3%} | global_loss: {}'.format(comm_round, acc, loss))
    return acc, loss

smlp_global = SimpleModel()
global_model = tf.keras.models.load_model('C:/Users/nusra/OneDrive/Desktop/Dataset/model/global_model_0.h5')
global_model = None
if global_model == None:
    global_model = smlp_global.build()
else:
    print("path found")

global_model.summary()

local_model = smlp_global.build()
local_model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [None]:
import multiprocessing
comms_round =4  # get the global model's weights - will serve as the initial weights for all local models
count = 0
for comm_round in range(0, comms_round):
    count += 1
    global_weights = global_model.get_weights()

    #initial list to collect local model weights after scalling
    scaled_local_weight_list = list()

    #randomize client data - using keys
    client_names= list(clients_batched.keys())
    random.shuffle(client_names)

    #loop through each client and create new local model
    count = 0

    for client in tqdm(client_names, desc = 'Progress Bar'):
        # for client in client_names:
        local_model.compile(optimizer=Adam(learning_rate=0.001),loss='binary_crossentropy',metrics=[keras.metrics.binary_accuracy])
        #print(local_model.summary())
        #set local model weight to the weight of the global model
        local_model.set_weights(global_weights)
        #fit local model with client's data
        #local_model.fit(clients_batched[client], batch_size = 64, epochs=1+(45//(comm_round+1)), verbose=1, validation_data=(X_val, y_val), shuffle = False)
        local_model.fit(clients_batched[client], batch_size = 64, epochs=25, verbose=1, validation_data=(X_val, y_val), shuffle = False)
        test_local_model(X_test, y_test, local_model, comm_round)
        #scale the model weights and add to list
        scaling_factor = weight_scalling_factor(clients_batched, client)
        scaled_weights = scale_model_weights(local_model.get_weights(), scaling_factor)
        scaled_local_weight_list.append(scaled_weights)

        #clear session to free memory after each communication round
        K.clear_session()

    #to get the average over all the local model, we simply take the sum of the scaled weights
    average_weights = sum_scaled_weights(scaled_local_weight_list)

    #global_model.save('C:/Users/nusra/OneDrive/Desktop/Dataset/model/global_model_' +count+ '.h5')
#     global_model.save('C:/Users/nusra/OneDrive/Desktop/Dataset/model/global_model_' + str(count) + '.h5')

    print("done")

    #update global model
    global_model.set_weights(average_weights)

    #test global model and print out metrics after each communications round
    for(X_test, Y_test) in test_batched:
        global_acc, global_loss = test_global_model(X_test, Y_test, global_model, comm_round)

In [None]:
# Plot the confusion matrix. Set Normalize = True/False
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
  plt.figure(figsize=(5,5))
  plt.imshow(cm, interpolation='nearest', cmap=cmap)
  plt.title(title)
  plt.colorbar()

  tick_marks = np.arange(len(classes))
  plt.xticks(tick_marks, classes, rotation=45)
  plt.yticks(tick_marks, classes)

  if normalize:
      cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
      cm = np.around(cm, decimals=2)
      cm[np.isnan(cm)] = 0.0
      print("Normalized confusion matrix")
  else:
      print('Confusion matrix, without normalization')
  thresh = cm.max() / 2.

  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
      plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

  plt.tight_layout()
  plt.ylabel('True label')
  plt.xlabel('Predicted label')

# CATEGORIES = ["benign", "malignant"]
# target_names=["ADI", "BACK","DEB", "LYM","MUC", "MUS","NORM", "STR","TUM"]

predict_x=global_model.predict(X_test)
y_pred=np.argmax(predict_x,axis=1)
y_testreport=np.argmax(y_test,axis=1)
print('Classification Report')
print(classification_report(y_testreport, y_pred, target_names = ["Monkeypox","Other"]))

In [None]:
import itertools
print('Confusion Matrix')
cm = confusion_matrix(y_testreport, y_pred)
plot_confusion_matrix(cm, ["Monkeypox","Other"], title='Confusion Matrix')

In [None]:
#PLot fractional incorrect misclassifications
class_names = ["Monkeypox","Other"]
incorr_fraction = 1 - np.diag(cm) / np.sum(cm, axis=1)
fig, ax = plt.subplots(figsize=(5, 5))
plt.bar(np.arange(2), incorr_fraction)
plt.xlabel('True Label')
plt.ylabel('Fraction of incorrect predictions')
plt.xticks(np.arange(2), class_names)

In [None]:
from sklearn.metrics import roc_curve, auc

# Add this code after the classification report
fpr, tpr, thresholds = roc_curve(y_testreport, y_pred)
roc_auc = auc(fpr, tpr)

# Plot the ROC curve
plt.figure(figsize=(5, 5))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()
