In [None]:
import numpy as np
import random
import copy
import sys
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import shuffle
import cv2
import os
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
import time

### Create clients from two classes of data

In [None]:
def get_clients(class1, class2, n_clients = 3):

  clients_X = []
  clients_y = []

  clientsXtest = []
  clientsYtest = []

  clusters_1 = KMeans(n_clusters=n_clients, random_state=0).fit_predict(class1)
  clusters_2 = KMeans(n_clusters=n_clients, random_state=0).fit_predict(class2)

  for i in range(n_clients):

    X_train0, X_test0, y_train0, y_test0 = train_test_split(class1[clusters_1 == i],np.zeros((class1[clusters_1 == i].shape[0],)),test_size=0.2)
    X_train1, X_test1, y_train1, y_test1 = train_test_split(class2[clusters_2 == i],np.ones((class2[clusters_2 == i].shape[0],)),test_size=0.2)

    clients_X.append([X_train0, X_train1])
    clients_y.append([y_train0, y_train1])

    clientsXtest.extend([X_test0,X_test1])
    clientsYtest.extend([y_test0,y_test1])

  X_test = np.concatenate(clientsXtest,axis=0)
  y_test = np.concatenate(clientsYtest,axis=0)

  return clients_X,clients_y,X_test,y_test

In [None]:
def get_total_from_clients(clients_X,clients_y):
  x_train0 = [i[0] for i in clients_X]
  x_train0 = np.concatenate(x_train0, axis=0)
  x_train1 = [i[1] for i in clients_X]
  x_train1 = np.concatenate(x_train1, axis=0)
  y_train0 = [i[0] for i in clients_y]
  y_train0 = np.concatenate(y_train0, axis=0)
  y_train1 = [i[1] for i in clients_y]
  y_train1 = np.concatenate(y_train1, axis=0)

  return ([x_train0,x_train1],[y_train0,y_train1])

# svm

In [None]:
class SVM:

  def __init__(self, X_train, y_train, X_test, y_test, val=True, val_type='k_fold', val_distribution='balanced', k=5, learning_rate=0.001, lambda_param=0.01, n_iters=1000):

    self.lr = learning_rate
    self.lambda_param = lambda_param
    self.n_iters = n_iters

    self.X_train = X_train
    self.y_train = y_train

    self.X_test = X_test
    self.y_test = y_test

    self.val_distribution = val_distribution
    self.val = val
    self.val_type=val_type
    self.val_distribution=val_distribution
    self.k=k

    self.w = np.array([])
    self.b = None


  def Gradient_update(self, X_train, y_train, X_val=None, y_val=None):

    n_samples, n_features = X_train.shape
    y_ = np.where(y_train <= 0, -1, 1)

    if self.w.size == 0 and self.b is None :
      self.w = np.zeros(n_features)
      self.b = 0

    w_best = np.zeros(n_features)
    b_best = 0

    acc_list = []
    for i in range(0,self.n_iters):
      for idx, x_i in enumerate(X_train):
        condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1
        if condition:
          self.w -= self.lr * (2 * self.lambda_param * self.w)
        else:
          self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))
          self.b -= self.lr * y_[idx]

      if i%10 == 0 and self.val:
        approx_w = np.dot(X_val, self.w) - self.b
        approx_w = np.sign(approx_w)
        res_w = np.where(approx_w<0, 0, approx_w)

        approx_w_best = np.dot(X_val, w_best) - b_best
        approx_w_best = np.sign(approx_w_best)
        res_w_best = np.where(approx_w_best<0, 0, approx_w_best)

        if (accuracy_score(y_val, res_w_best) < accuracy_score(y_val, res_w)):
          w_best = copy.deepcopy(self.w)
          b_best = copy.deepcopy(self.b)
        else:
          self.w = copy.deepcopy(w_best)
          self.b = copy.deepcopy(b_best)
          break

  def Cross_validation(self, val_split):

    if (self.val_distribution == 'balanced'):
      X_train0, X_val0, y_train0, y_val0 = train_test_split(self.X_train[0], self.y_train[0], test_size=val_split)
      X_train1, X_val1, y_train1, y_val1 = train_test_split(self.X_train[1], self.y_train[1], test_size=val_split)

      X_train = np.concatenate((X_train0,X_train1),axis=0)
      y_train = np.concatenate((y_train0,y_train1),axis=0)

      X_val = np.concatenate((X_val0,X_val1),axis=0)
      y_val = np.concatenate((y_val0,y_val1),axis=0)

    elif (self.val_distribution == 'unbalanced'):
      X_train = np.concatenate((self.X_train[0],self.X_train[1]),axis=0)
      y_train = np.concatenate((self.y_train[0],self.y_train[1]),axis=0)

      X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_split)

    X_train, y_train = self.random_shuffle(X_train, y_train)
    self.Gradient_update(X_train, y_train, X_val, y_val)

  def k_fold_cross_validation(self):

    if (self.val_distribution == 'unbalanced'):
      X_train = np.concatenate((self.X_train[0],self.X_train[1]),axis=0)
      y_train = np.concatenate((self.y_train[0],self.y_train[1]),axis=0)

      X_train_, X_train0, y_train_, y_train0 = train_test_split(X_train, y_train, test_size=round(1/self.k,2), shuffle=True)

      X_train = []
      y_train = []

      X_train.append(copy.deepcopy(X_train0))
      y_train.append(copy.deepcopy(y_train0))
      k = self.k - 1

      X_train0 = np.array_split(X_train_,k)
      y_train0 = np.array_split(y_train_,k)

      for i in range(k):
        X_train.append(X_train0[i])
        y_train.append(y_train0[i])

    elif (self.val_distribution == 'balanced'):
      X_train0 = np.array_split(self.X_train[0],self.k)
      X_train1 = np.array_split(self.X_train[1],self.k)
      y_train0 = np.array_split(self.y_train[0],self.k)
      y_train1 = np.array_split(self.y_train[1],self.k)
      X_train = []
      y_train = []
      for i in range(self.k):
        X_train.append(np.concatenate((X_train0[i],X_train1[i]),axis=0))
        y_train.append(np.concatenate((y_train0[i],y_train1[i]),axis=0))

    if self.w.size == 0 and self.b == None:
      w = np.zeros(self.X_train[0].shape[1])
      b = 0
    else:
      w = copy.deepcopy(self.w)
      b = self.b

    w_list = []
    b_list = []
    acc_list = []
    for i in range(self.k):
      X_train_temp = np.zeros((1,X_train[0].shape[1]))
      y_train_temp = np.array([])

      for j in range(self.k):
        if (j!=i):
          X_train_temp = np.concatenate((X_train_temp,X_train[j]),axis=0)
          y_train_temp = np.concatenate((y_train_temp,y_train[j]),axis=0)
        else:
          X_val = X_train[j]
          y_val = y_train[j]


      X_train_temp = np.delete(X_train_temp,0,0)
      X_train_temp, y_train_temp = self.random_shuffle(X_train_temp, y_train_temp)
      self.Gradient_update(X_train_temp, y_train_temp, X_val, y_val)
      print(self.accuracy())
      w_list.append(self.w)
      b_list.append(self.b)

      test_w = np.dot(X_val, self.w) - self.b
      test_w = np.sign(test_w)
      res_val = np.where(test_w<0,0,test_w)

      acc_list.append(accuracy_score(y_val, res_val))

      self.w = copy.deepcopy(w)
      self.b = b

    self.w = copy.deepcopy(w_list[acc_list.index(max(acc_list))])
    self.b = b_list[acc_list.index(max(acc_list))]


  def fit(self):
    if self.val_type == 'k_fold' and self.val:
      self.k_fold_cross_validation()

    elif self.val_type == 'cross_val' and self.val:
      self.Cross_validation(0.2)

    elif not self.val:
      X_train = np.concatenate((self.X_train[0],self.X_train[1]),axis=0)
      y_train = np.concatenate((self.y_train[0],self.y_train[1]),axis=0)
      X_train, y_train = self.random_shuffle(X_train, y_train)
      self.Gradient_update(X_train, y_train)

  def random_shuffle(self, X_train, y_train):
    self.x_tr, self.x_te, self.y_tr, self.y_te = train_test_split(X_train,y_train,test_size=0.5)
    return np.concatenate((self.x_tr, self.x_te),axis=0), np.concatenate((self.y_tr, self.y_te),axis=0)

  def predict(self):
     approx = np.dot(self.X_test, self.w) - self.b
     approx = np.sign(approx)
     return np.where(approx<0, 0, approx)

  def accuracy(self):
    return accuracy_score(self.y_test, self.predict())*100

  def precision(self):
    return precision_score(self.y_test, self.predict())*100

# FL

In [None]:
class Federated_SVM:

  def __init__(self, n_clients=3, val=True, val_type='k_fold', val_distribution='balanced', k=5, learning_rate=0.001, lambda_param=0.01, n_iters=100):
    self.n_clients = n_clients
    self.learning_rate = learning_rate
    self.lambda_param = lambda_param
    self.n_iters = n_iters
    self.val = val
    self.val_type = val_type
    self.val_distribution = val_distribution
    self.client_distribution = []
    self.k = k
    self.X_test = None
    self.y_test = None
    self.noise = None

  def create_clients(self, X_train, y_train, X_test, y_test):
    self.clients=[]
    for i in range(self.n_clients):
      self.client_distribution.append(X_train[i][0].shape[0] + X_train[i][1].shape[0])
      self.clients.append(SVM(X_train[i],y_train[i], X_test, y_test, self.val, self.val_type, self.val_distribution, self.k, self.learning_rate, self.lambda_param, self.n_iters))
    self.X_test = copy.deepcopy(X_test)
    self.y_test = copy.deepcopy(y_test)

  def average_aggregator(self, parameter_list):
    if not parameter_list:
        # Handle the case when parameter_list is empty
        return np.zeros(self.X_test.shape[1]), 0

    w = np.zeros(parameter_list[0].shape[0])
    b = 0
    for i in range(0, 2 * self.n_clients, 2):
        w = np.add(w, parameter_list[i] * self.client_distribution[i // 2] / sum(self.client_distribution))
        b = b + parameter_list[i + 1]

    return w, b / self.n_clients

  def fit(self, g_iters, aggregator):
    start_time_fl_svm = time.time()
    w_best = np.zeros(self.X_test.shape[1])
    b_best = 0

    # Lists to store accuracy and precision values after each global round
    self.accuracy_history = []
    self.precision_history = []

    # Lists to store accuracy values from each client after each epoch
    self.client_accuracy_history = [[] for _ in range(self.n_clients)]

    w_agg = np.zeros(self.X_test.shape[1])  # Initialize w_agg
    b_agg = 0  # Initialize b_agg

    for i in range(g_iters):
        print('global round', i + 1)
        for j in range(self.n_clients):
            if i == 0:
                self.clients[j].fit()
            else:
                self.clients[j].w = copy.deepcopy(w_agg)
                self.clients[j].b = copy.deepcopy(b_agg)
                self.clients[j].fit()
            print('client', j + 1, "acc pr", self.clients[j].accuracy(), self.clients[j].precision())

            # Collect accuracy from each client after each epoch
            self.client_accuracy_history[j].append(self.clients[j].accuracy())

        # Collect accuracy and precision after each epoch
        epoch_accuracies = [self.clients[k].accuracy() for k in range(self.n_clients)]
        epoch_precisions = [self.clients[k].precision() for k in range(self.n_clients)]

        # Calculate average accuracy and precision for the federated model
        epoch_accuracy = np.mean(epoch_accuracies)
        epoch_precision = np.mean(epoch_precisions)

        self.accuracy_history.append(epoch_accuracy)
        self.precision_history.append(epoch_precision)

        parameter_list = []
        for k in range(self.n_clients):
            parameter_list.append(self.clients[k].w)
            parameter_list.append(self.clients[k].b)

        w_agg, b_agg = aggregator(parameter_list)

        if epoch_accuracy > self.accuracy(w_best, b_best) or i == 0:
            w_best = copy.deepcopy(w_agg)
            b_best = copy.deepcopy(b_agg)

        print('global test acc pr', epoch_accuracy, epoch_precision)
        print('Global parameter size:', sys.getsizeof(w_agg), sys.getsizeof(b_agg))
        end_time_fl_svm = time.time()
        time_taken_fl_svm = end_time_fl_svm - start_time_fl_svm
        print("Time taken for federated SVM:", time_taken_fl_svm, "seconds")

    # Plotting accuracy and precision line graphs
    self.plot_metrics_accuracy_graph(self.accuracy_history)
    self.plot_metrics_precision_graph(self.precision_history)

    # Plotting accuracy from each client after each epoch
    self.plot_client_accuracy_line_graph()

  def plot_client_accuracy_line_graph(self):
    x_values = range(1, len(self.client_accuracy_history[0]) + 1)

    plt.figure(figsize=(10, 6))
    for i in range(self.n_clients):
        plt.plot(x_values, self.client_accuracy_history[i], label=f'Client {i + 1}', marker='o')

    plt.xlabel('Local Rounds')
    plt.ylabel('Accuracy')
    plt.title(' Local Client Accuracy vs Rounds ')
    plt.legend()
    plt.show()
  
  def plot_metrics_accuracy_graph(self, accuracy_values):
        x_values = range(1, len(accuracy_values) + 1)

        plt.figure(figsize=(10, 6))
        plt.plot(x_values, accuracy_values, label='Accuracy', marker='o')

        plt.xlabel('Global Rounds')
        plt.ylabel('Accuracy(%)')
        plt.title('Federated SVM Accuracy Over Global Rounds')
        plt.legend()
        plt.show()
        
  def plot_metrics_precision_graph(self, precision_values):
        x_values = range(1, len(precision_values) + 1)

        plt.figure(figsize=(10, 6))
        plt.plot(x_values, precision_values, label='Precision', marker='o')

        plt.xlabel('Global Rounds')
        plt.ylabel('Precision(%)')
        plt.title('Federated SVM precision Over Global Rounds')
        plt.legend()
        plt.show()

  def predict(self,w,b):
     approx = np.dot(self.X_test, w) - b
     approx = np.sign(approx)
     return np.where(approx<0, 0, 1)

  def accuracy(self,w,b):
    return accuracy_score(self.y_test, self.predict(w,b))*100

  def precision(self,w,b):
    return precision_score(self.y_test, self.predict(w,b))*100


# load covid data

In [None]:
ima = plt.imread("/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Viral Pneumonia/images/Viral Pneumonia-352.png")
plt.imshow(ima,cmap='gray')
print(ima.shape)


In [None]:
ima = plt.imread("/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID/images/COVID-1.png")
plt.imshow(ima,cmap='gray')
print(ima.shape)

# resize covid data

In [None]:
images_path = os.listdir('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID/images/')
covid = []

for img_path in images_path:
    # Read and process each image
    image = plt.imread("/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID/images/" + img_path)
    covid.append(cv2.resize(image, (100, 100)).flatten())

covid = np.stack(covid, axis=0)

In [None]:
images_path = os.listdir('/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Viral Pneumonia/images/')
pnemo = []

for img_path in images_path:
    # Read each image
    image = plt.imread("/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Viral Pneumonia/images/" + img_path)
    
    # Check if the flattened image shape is 89401 before resizing and adding to the list
    if image.flatten().shape[0] == 89401:
        pnemo.append(cv2.resize(image, (100, 100)).flatten())

pnemo = np.stack(pnemo, axis=0)

In [None]:
print(covid.shape)
print(pnemo.shape)

# clients of data

In [None]:
clients_X,clients_y,X_test,y_test = get_clients(covid, pnemo, n_clients = 3)

In [None]:
xtrain_gl, ytrain_gl = get_total_from_clients(clients_X,clients_y)

In [None]:
start_time_svm = time.time()
clf = SVM(xtrain_gl, ytrain_gl, X_test, y_test, val=False, n_iters=1000)
clf.fit()

print(clf.accuracy())
print(clf.precision())
end_time_svm = time.time()


In [None]:
# Calculate and print the time taken for both methods
time_taken_svm = end_time_svm - start_time_svm
print("Time taken for SVM:", time_taken_svm, "seconds")

In [None]:
#start_time_fl_svm = time.time()
f_svm = Federated_SVM(n_clients = 3, val=False, n_iters=150)
f_svm.create_clients(clients_X,clients_y,X_test,y_test)

f_svm.fit(5,f_svm.average_aggregator)
#end_time_fl_svm = time.time()

In [None]:
# # Calculate and print the time taken for both methods
# time_taken_fl_svm = end_time_fl_svm - start_time_fl_svm
# print("Time taken for federated SVM:", time_taken_fl_svm, "seconds")

In [None]:
def plot_accuracy(SVM, Federated_SVM, title):
    labels = ['Accuracy']

    # For normal SVM
    normal_svm_metrics = [SVM.accuracy()]

    # For federated SVM
    federated_svm_metrics = [Federated_SVM.accuracy_history[-1]]  # Use the accuracy after the last epoch

    x = range(len(labels))
    
    x_normal_svm = [0]
    x_federated_svm = [1]
    
    plt.figure(figsize=(6, 6))
    bar_width = 0.5
    plt.bar(x_normal_svm, normal_svm_metrics, width=bar_width, label='Normal SVM', align='center')
    plt.bar(x_federated_svm, federated_svm_metrics, width=bar_width, label='Federated SVM', align='edge')

    plt.ylabel('Accuracy(%)')
    plt.title(title)
    plt.xticks([])
    plt.legend()
    plt.show()
    
# Plot for SVM and Federated SVM
plot_accuracy(clf, f_svm, 'SVM vs Federated SVM Accuracy')

In [None]:
def plot_precision(SVM, Federated_SVM, title):
    labels = ['Precision']

    # For normal SVM
    normal_svm_metrics = [SVM.precision()]

    # For federated SVM
    federated_svm_metrics = [Federated_SVM.precision_history[-1]]  # Use the accuracy after the last epoch

    x = range(len(labels))
    
    plt.figure(figsize=(6, 6))
    x_normal_svm_pre = [0]
    x_federated_svm_pre = [1]
    
    
    bar_width = 0.5
    plt.bar(x_normal_svm_pre, normal_svm_metrics, width=bar_width, label='Normal SVM', align='center')
    plt.bar(x_federated_svm_pre, federated_svm_metrics, width=bar_width, label='Federated SVM', align='edge')

    plt.ylabel('Precision(%)')
    plt.title(title)
    plt.xticks([])
    plt.legend(loc = 'center')
    plt.show()
    
# Plot for SVM and Federated SVM
plot_precision(clf, f_svm, 'SVM vs Federated SVM precision')