# Import necessary libraries

In [1]:
# General
import os
import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import pickle
import time
import copy
import pandas as pd


# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader


# PySyft
import syft as sy
from syft.frameworks.torch.fl import utils
from syft.workers.websocket_client import WebsocketClientWorker

# Pre-processing the Data

In [3]:
# Flag to only rebuild data once
REBUILD_DATA = False

In [4]:
# Set the image size Y where Y represents YxY 
IMG_SIZE = 64
BATCH_SIZE = 100
LR = 0.001

In [5]:
# Create a pre-processing class
class CatsVSDogs_Train():
    
    # Define the img_size we want for ALL of the images
    
#     source_folder = r"/media/wilfredo/Willie931GB/SLU/EchoNet/EchoNet-Dynamic/TRAIN/Healthy_FRAMES/"
    # Define where your data is stored
    CAT = r"/media/wilfredo/Willie931GB/EURECOM_SLU_Linux/II_SEMESTER/SLU/PAPER_KDD2022/EXPERIMENTS/PySyft/Datasets/cats_and_dogs/PetImages/Cat/"
    DOG = r"/media/wilfredo/Willie931GB/EURECOM_SLU_Linux/II_SEMESTER/SLU/PAPER_KDD2022/EXPERIMENTS/PySyft/Datasets/cats_and_dogs/PetImages/Dog/"
#     TESTING = "./Datasets/cats_and_dogs/PetImages/Testing"
    
    # Define what each type of image is (their labels for the NN)
    LABELS = {CAT: 0, DOG: 1}
    
    # Define your image size
    IMG_SIZE = 64
    
    # Define your training data
    training_data = []
    
    # Define your counters to check for imbalance issues
    cat_count = 0
    dog_count = 0
    
    def make_training_data(self):
        for label in self.LABELS:
            # Iterare over the images in the directories
            # tqdm is just here to show a progress bar
            for f in tqdm(os.listdir(label)):
                # We will TRY this piece of code. Some images might lead to errors, so instead of stoppin EVERY
                # time we find an error, let's just ignore the image
                try: 
                    # Define the image path
                    path = os.path.join(label, f)

                    # Select the image and convert it to grayscale, because color is not a defining feature 
                    # to know if something is a cat or a dog
                    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

                    # Resize the image
                    img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))

                    # Add the image AND its label to the training data
                    self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])
                    # The label here was added as a 1-hot vector. We create an identity matrix with 
                    # numpy (np.eye), of two classes (np.eye(2)), and the corresponding output that each label 
                    # should represent label = 0 should be "cat", so [1, 0], and "dog" should be [0, 1]

                    # COunt which are cats and which are dogs
                    if label == self.CAT:
                        self.cat_count += 1
                    elif label == self.DOG:
                        self.dog_count += 1
                
                except Exception as e:
                    pass
#                     print(str(e))
        
        # Outside the for loop but still within the function
        # Now we shuffle the data and save it to the local directory
        np.random.shuffle(self.training_data)
        np.save(r"/media/wilfredo/Willie931GB/EURECOM_SLU_Linux/II_SEMESTER/SLU/PAPER_KDD2022/EXPERIMENTS/PySyft/Datasets/cats_and_dogs/Numpy_Datasets/training_data_size_64.npy", 
                self.training_data)
#         np.save(r"/media/wilfredo/Willie931GB/SLU/EchoNet/EchoNet-Dynamic/Numpy_Datasets/training_data_size_50.npy", self.training_data)
        print("Cat = ", self.cat_count)
        print("Dog = ", self.dog_count)

In [7]:
# Run the thing IF we want to rebuild the data, and check our distribution of data
if REBUILD_DATA:
    data_train = CatsVSDogs_Train()
    data_train.make_training_data()

In [8]:
# Load the data from the file it was saved in
training_data = np.load(r"/media/wilfredo/Willie931GB/EURECOM_SLU_Linux/II_SEMESTER/SLU/PAPER_KDD2022/EXPERIMENTS/PySyft/Datasets/cats_and_dogs/Numpy_Datasets/training_data_size_64.npy",
                        allow_pickle = True)
# test_data = np.load(r"/media/wilfredo/Willie931GB/SLU/EchoNet/EchoNet-Dynamic/Numpy_Datasets/test_data_size_50.npy", allow_pickle = True)

# Create the CNN (based on VGG11)
Source: Page 3/14, Table 1, Configuration A, https://arxiv.org/pdf/1409.1556.pdf

## Model on clients (small portion)

In [8]:
class Net_client(nn.Module):
    def __init__(self):
        super().__init__()
        # Define your first convolutional layer: input = 1, output = 32 convolutional features, kernel size = 5
        # Remember that kernel = 5 means that the "window" used to scan for features will be 5x5
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)

    # Function defining only one part of the forward pass (the convolution layers only). This will also write
    # the output dimensions of the conv layers to self._to_linear ONCE, and this information will then be used 
    # as the input data flattened dimensions of the next fully connected layers 
    def convs(self, x):
        # Convolutional layer 1 + activation + max_pooling
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        return x
    
    # Function defining the rest of the forward pass
    def forward(self, x):
        # Run the convs layers first
        x = self.convs(x)
        return x

net_client = Net_client()

## Model on Server (big portion)

In [9]:
class Net_server(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Start from the third convolutional layer
        self.conv3 = nn.Conv2d(64, 128, 3)
        self.conv4 = nn.Conv2d(128, 256, 3)
        
        # Run the fully connected layers. We know the input of this fc1 layer is 512, because of our previous
        # results with FL, where self.__to__linear told us this result when you run the cell that contains the 
        # NN
        self._to_linear = 1024
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 2)

    # Function defining only one part of the forward pass (the convolution layers only). This will also write
    # the output dimensions of the conv layers to self._to_linear ONCE, and this information will then be used 
    # as the input data flattened dimensions of the next fully connected layers 
    def convs(self, x):
        # Convolutional layer 1 + activation + max_pooling
        x = self.conv3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv4(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        
        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x
    
    # Function defining the rest of the forward pass
    def forward(self, x):
        # Run the convs layers first
        x = self.convs(x)
        # Reshape the output data from the convs to be flattened
        x = x.view(-1, self._to_linear)
        # Pass the data through the fully connected layers now
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # Pass it through the final layer
        x = self.fc3(x)
        # One final softmax function to make the output vector look nicer
        x = F.softmax(x, dim = 1)
        return x

net_server = Net_server()

In [10]:
# Take a look at our models
model_client = net_client
model_server = net_server

In [11]:
# Take a look at your model
model_client

Net_client(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
)

In [12]:
# Take a look at your model
model_server

Net_server(
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=2, bias=True)
)

# Establish your loss function

In [13]:
# Set your loss function (MSE for images!)
loss_function = nn.MSELoss()

# Separate your data into data, labels, training, testing, and scale it

In [12]:
########################## TRAIN DATA ##########################
# Separate the x's and the y's
X = torch.Tensor([i[0] for i in training_data]).view(-1, 1, IMG_SIZE, IMG_SIZE)

# Scale the images. The pixel values are between 0-255, but we want them to be between 0-1
X = X/255.0

# Get your y's
y = torch.Tensor([i[1] for i in training_data])

########################## TEST DATA ##########################
# Separate the x's and the y's
# X_test = torch.Tensor([i[0] for i in test_data]).view(-1, 1, IMG_SIZE, IMG_SIZE)

# Scale the images. The pixel values are between 0-255, but we want them to be between 0-1
# X_test = X_test/255.0

# Get your y's
# y_test = torch.Tensor([i[1] for i in test_data])

In [25]:
# Set your validation data percentage
VAL_PCT = 0.1
val_size = int(len(X)*VAL_PCT)

In [26]:
# Define your training data
train_X = X[:-val_size]
train_y = y[:-val_size]
# train_X = X
# train_y = y

# Define your testing (validation) data
test_X = X[-val_size:]
test_y = y[-val_size:]
# test_X = X_test
# test_y = y_test

# Parallel Split Learning

## Establish the virtual workers, their data, their NNs, and their optimizers

In [17]:
# Start the hook
hook = sy.TorchHook(torch)

# Create your virtual workers and our server
worker1 = sy.VirtualWorker(hook, id="worker1")
worker2 = sy.VirtualWorker(hook, id="worker2")
worker3 = sy.VirtualWorker(hook, id="worker3")
server = sy.VirtualWorker(hook, id="server")

# Put the WORKERS into a list for easier access later on
compute_nodes = [worker1, worker2, worker3]

In [18]:
# Split the training data for each worker
# General method is:
# train_X_workerN = train_X[int((N-1) * len(train_X)/len(compute_nodes)):int(N * len(train_X)/len(compute_nodes))].view(-1, 1, IMG_SIZE, IMG_SIZE)
train_X_worker1 = train_X[:int(len(train_X)/len(compute_nodes))].view(-1, 1, IMG_SIZE, IMG_SIZE)
train_X_worker2 = train_X[int(len(train_X)/len(compute_nodes)):int(2 * len(train_X)/len(compute_nodes))].view(-1, 1, IMG_SIZE, IMG_SIZE)
train_X_worker3 = train_X[int(2 * len(train_X)/len(compute_nodes)):int(3 * len(train_X)/len(compute_nodes))].view(-1, 1, IMG_SIZE, IMG_SIZE)

train_y_worker1 = train_y[:int(len(train_X)/len(compute_nodes))]
train_y_worker2 = train_y[int(len(train_X)/len(compute_nodes)):int(2 * len(train_X)/len(compute_nodes))]
train_y_worker3 = train_y[int(2 * len(train_X)/len(compute_nodes)):int(3 * len(train_X)/len(compute_nodes))]

In [19]:
# Clear the workers of any objects, just in case you forgot some were still there from a previous run
worker1.clear_objects()
worker2.clear_objects()
worker3.clear_objects()
server.clear_objects()

<VirtualWorker id:server #objects:0>

In [20]:
# # Establish the NN model for each worker. This is model-centric FL, so it is the same model for all workers
worker1_model = model_client.copy()
worker2_model = model_client.copy()
worker3_model = model_client.copy()
server_model = model_server.copy()

# Establish the optimizer for each worker
worker1_optimizer = optim.SGD(worker1_model.parameters(), lr=LR)
worker2_optimizer = optim.SGD(worker2_model.parameters(), lr=LR)
worker3_optimizer = optim.SGD(worker3_model.parameters(), lr=LR)
server_optimizer = optim.SGD(server_model.parameters(), lr=LR)

In [21]:
# Organize the WORKER models and optimizers into lists. The server stuff must not be mixed with these
models = [worker1_model, worker2_model, worker3_model]
optimizers = [worker1_optimizer, worker2_optimizer, worker3_optimizer]

worker_collection = [(worker1, worker1_model, worker1_optimizer), (worker2, worker2_model, worker2_optimizer), 
                    (worker3, worker3_model, worker3_optimizer)]

## Training Sequence

In [22]:
def train():
    total_batch_times = []
    total_train_time = 0
    batch_count = 0
    for i in tqdm(range(0, int(len(train_X)/len(compute_nodes)), BATCH_SIZE)):
        start_time = time.time()
        # Get our models and optimizers from the global list
        batch_send_time = time.time()
        model_client1 = worker1_model
        opt_client1 = worker1_optimizer
        model_client2 = worker2_model
        opt_client2 = worker2_optimizer
        model_client3 = worker3_model
        opt_client3 = worker3_optimizer
        model_server = server_model
        opt_server = server_optimizer
        
        # Send the models to their respective workers
        model_client1.send(worker1)
        model_client2.send(worker2)
        model_client3.send(worker3)
        model_server.send(server)
        
        # Obtain the data for each worker
        batch_X_1 = train_X_worker1[i : i + BATCH_SIZE]
        batch_X_2 = train_X_worker2[i : i + BATCH_SIZE]
        batch_X_3 = train_X_worker3[i : i + BATCH_SIZE]
        
        # Send the batches to their respective workers
        batch_X_1 = batch_X_1.send(worker1)
        batch_X_2 = batch_X_2.send(worker2)
        batch_X_3 = batch_X_3.send(worker3)
        
        # Obtain the data for each worker
        batch_y_1 = train_y_worker1[i : i + BATCH_SIZE]
        batch_y_2 = train_y_worker2[i : i + BATCH_SIZE]
        batch_y_3 = train_y_worker3[i : i + BATCH_SIZE]
        
        # Send the batches to their respective workers
        batch_y_1 = batch_y_1.send(server)
        batch_y_2 = batch_y_2.send(server)
        batch_y_3 = batch_y_3.send(server)
#         print("Time to get and send batches: ", time.time() - batch_send_time)
    
        # Zero the sequence for all models on both workers and server!
        opt_client1.zero_grad()
        opt_client2.zero_grad()
        opt_client3.zero_grad()
        opt_server.zero_grad()
        
        # Start FP on the workers
        client_FP_start_time = time.time()
        interm1 = model_client1(batch_X_1)
        interm2 = model_client2(batch_X_2)
        interm3 = model_client3(batch_X_3)
        client_FP_end_time = (time.time() - client_FP_start_time) / (len(compute_nodes)-1)
        
        # Send the FP to the server
        remote_interm1 = interm1.detach().move(server).requires_grad_()
        remote_interm2 = interm2.detach().move(server).requires_grad_()
        remote_interm3 = interm3.detach().move(server).requires_grad_()
#         print("client_FP_time = ", client_FP_end_time, " s")
        
        # Complete FPs, sequentially, on the server
        server_FP_start_time = time.time()
        pred1 = model_server(remote_interm1)
        pred2 = model_server(remote_interm2)
        pred3 = model_server(remote_interm3)
        server_FP_end_time = time.time() - server_FP_start_time
        
        # Calculate loss on server for each client
        server_BP_start_time = time.time()
        loss1 = loss_function(pred1, batch_y_1)
        loss2 = loss_function(pred2, batch_y_2)
        loss3 = loss_function(pred3, batch_y_3)
        # Do the BPs, sequentially, on server side of NN for each client's loss
        loss1.backward()
        opt_server.step()
        loss2.backward()
        opt_server.step()
        loss3.backward()
        opt_server.step()
        server_BP_end_time = time.time() - server_BP_start_time
#         print("server_BP_time = ", server_end_time, " s")
        
        # Send the gradients back to the clients
        interm1.move(worker1)
        interm2.move(worker2)
        interm3.move(worker3)
        
        # Get their gradients as well
        grad_interm1 = remote_interm1.grad.copy().move(worker1)
        grad_interm2 = remote_interm2.grad.copy().move(worker2)
        grad_interm3 = remote_interm3.grad.copy().move(worker3)
        
        # Finish the BPs on the clients as well
        client_BP_start_time = time.time()
        interm1.backward(grad_interm1)
        interm2.backward(grad_interm2)
        interm3.backward(grad_interm3)
        opt_client1.step()
        opt_client2.step()
        opt_client3.step()
        client_BP_end_time = (time.time() - client_BP_start_time) / (len(compute_nodes)-1)
#         print("client_BP_end_time = ", client_BP_end_time, " s")
        
        # Total batch time
        total_batch_time = client_FP_end_time + server_FP_end_time + server_BP_end_time + client_BP_end_time
#         print("Total batch time = ", round(total_batch_time, 4), " s")
        total_batch_times.append(total_batch_time)
        total_train_time += total_batch_time
        
        # Get back all models
        model_client1.get()
        model_client2.get()
        model_client3.get()
        model_server.get()
#         print("Time to get models: ", time.time() - model_get_time)
        
        # Remove all objects from workers and servers before next batch comes
        worker1.clear_objects()
        worker2.clear_objects()
        worker3.clear_objects()
        server.clear_objects()
        
#         batch_count += 1
#         if batch_count >= 25:
#             break
        
        # Update the timers
#         total_train_time += client_FP_end_time + server_end_time + client_BP_end_time
#         total_train_time += time.time() - start_time
#         print("total_train_time so far: ", total_train_time, " s")
        
    # OUTSIDE For loop!  
    # Obtain the averaged model for the clients
    avg_model_clients = utils.federated_avg({
            "worker1": model_client1,
            "worker2": model_client2,
            "worker3": model_client3
        })
    print("Total training time for this epoch = ", total_train_time, " s \n \n")
    # Return the new models
    return avg_model_clients, model_server, total_batch_times, total_train_time

## Function used for testing

In [23]:
def test(new_client_model, new_server_model):
    
    # Calculate the accuracy
    correct = 0
    total = 0

    # Do not update your gradients while testing
    with torch.no_grad():
        print("Initiated model testing:")
        for i in tqdm(range(len(test_X))):
            
            # Put the model into evaluation mode so it does not update its gradients during this test
            new_client_model.eval()
            new_server_model.eval()

            # Obtain the real class for the sample
            real_class = torch.argmax(test_y[i])

            # Obtain our prediction for said sample (not arg_maxed yet)
            output = new_server_model(new_client_model(test_X[i].view(-1, 1, IMG_SIZE, IMG_SIZE)))[0]
            
            # Obtain our arg_maxed prediction for said sample
            predicted_class = torch.argmax(output)

            # Update counters
            if predicted_class == real_class:
                correct += 1
            total += 1

    print("Accuracy of the new model = ", round(correct/total, 3), "\n \n")

## Reset function

In [24]:
def update_models(new_client_model, new_server_model):
    # Clear the workers of any objects, just in case you forgot some were still there from a previous run
    worker1.clear_objects()
    worker2.clear_objects()
    worker3.clear_objects()
    server.clear_objects()
    
    # Establish the NN model for each worker. This is model-centric FL, so it is the same model for all workers
    global worker1_model
    worker1_model = new_client_model.copy()
    global worker2_model
    worker2_model = new_client_model.copy()
    global worker3_model
    worker3_model = new_client_model.copy()
    global server_model
    server_model = new_server_model.copy()

    # Establish the optimizer for each worker
    global worker1_optimizer
    worker1_optimizer = optim.SGD(worker1_model.parameters(), lr=LR)
    global worker2_optimizer
    worker2_optimizer = optim.SGD(worker2_model.parameters(), lr=LR)
    global worker3_optimizer
    worker3_optimizer = optim.SGD(worker3_model.parameters(), lr=LR)
    global server_optimizer
    server_optimizer = optim.SGD(server_model.parameters(), lr=LR)
    
    # Organize the WO"RKER models and optimizers into lists. The server stuff must not be mixed with these
    global models
    models = [worker1_model, worker2_model, worker3_model]
    global optimizers
    optimizers = [worker1_optimizer, worker2_optimizer, worker3_optimizer]
    global worker_collection
    worker_collection = [(worker1, worker1_model, worker1_optimizer), (worker2, worker2_model, worker2_optimizer), 
                        (worker3, worker3_model, worker3_optimizer)]

# RUN THE MODEL

In [25]:
# Define your number of epochs
epochs = 5
epoch_times = []

# Train all workers for the set number of epochs
for epoch in range(epochs):
    
    # Start counting the time for this epoch
#     start_time = time.time()
    print(f"Epoch Number {epoch + 1}")
    
#     # Send the client models to each of the workers
#     for worker, model, optimizer in worker_collection:
#         model.send(worker)
#     # Send the big part of the NN to the server
#     server_model.send(server)
    
    # Train the individual models, and then obtain the federated averaged model
#     train_start_time = time.time()
    new_client_model, new_server_model, batch_times, epoch_time = train()
#     train_total_time = time.time() - train_start_time
#     print("Total TRAIN time for epoch ", epoch, " = ", 
#           round((train_total_time*0.5)/60, 2), " min")
    # Get the full model back to my side
#     if epoch == 0:
#         new_worker_model.get()
#         new_server_model.get()
    
    # Save the epoch time
    epoch_times.append(epoch_time)
    
    # Stop counting the time
#     total_time = time.time() - start_time
#     print('Time for this epoch', round(total_time/60, 2), ' min')
    
    # Test your new model to keep a log of how good we're doing per epoch 
    test(new_client_model, new_server_model)
    
    # Update all models before the next epoch
    update_models(new_client_model, new_server_model)
    
    # Save the batch times
    df_batch = pd.DataFrame(batch_times)
    df_batch.to_csv("./Batch_times/STD_C&D_PSL_epoch_" + str(epoch) + ".csv")

# Save the epoch times
df_epoch = pd.DataFrame(epoch_times)
df_epoch.to_csv("./Epoch_times/STD_C&D_PSL.csv")

# Clean the global namespace after run is done
%reset -f

Epoch Number 1


  1%|          | 1/124 [00:16<32:55, 16.06s/it]

Total batch time =  0.8932  s


  2%|▏         | 2/124 [00:31<32:21, 15.92s/it]

Total batch time =  0.8327  s


  2%|▏         | 3/124 [00:47<31:56, 15.84s/it]

Total batch time =  0.8026  s


  3%|▎         | 4/124 [01:03<31:30, 15.76s/it]

Total batch time =  0.8204  s


  4%|▍         | 5/124 [01:18<30:49, 15.54s/it]

Total batch time =  0.8507  s


  5%|▍         | 6/124 [01:34<30:47, 15.66s/it]

Total batch time =  0.857  s


  6%|▌         | 7/124 [01:49<30:27, 15.62s/it]

Total batch time =  0.8153  s


  6%|▋         | 8/124 [02:04<29:53, 15.46s/it]

Total batch time =  0.8548  s


  7%|▋         | 9/124 [02:19<29:16, 15.28s/it]

Total batch time =  0.795  s


  8%|▊         | 10/124 [02:34<28:53, 15.20s/it]

Total batch time =  0.8314  s


  9%|▉         | 11/124 [02:49<28:26, 15.11s/it]

Total batch time =  0.7737  s


 10%|▉         | 12/124 [03:05<28:23, 15.21s/it]

Total batch time =  0.8684  s


 10%|█         | 13/124 [03:20<28:14, 15.26s/it]

Total batch time =  0.7999  s


 11%|█▏        | 14/124 [03:35<28:01, 15.29s/it]

Total batch time =  0.8258  s


 12%|█▏        | 15/124 [03:51<27:48, 15.30s/it]

Total batch time =  0.8433  s


 13%|█▎        | 16/124 [04:06<27:39, 15.36s/it]

Total batch time =  0.8331  s


 14%|█▎        | 17/124 [04:22<27:31, 15.43s/it]

Total batch time =  0.7863  s


 15%|█▍        | 18/124 [04:37<27:22, 15.49s/it]

Total batch time =  0.8462  s


 15%|█▌        | 19/124 [04:53<27:12, 15.55s/it]

Total batch time =  0.8105  s


 16%|█▌        | 20/124 [05:09<27:04, 15.62s/it]

Total batch time =  0.8277  s


 17%|█▋        | 21/124 [05:25<26:52, 15.65s/it]

Total batch time =  0.9041  s


 18%|█▊        | 22/124 [05:40<26:33, 15.62s/it]

Total batch time =  0.8427  s


 19%|█▊        | 23/124 [05:56<26:15, 15.59s/it]

Total batch time =  0.8294  s


 19%|█▉        | 24/124 [06:12<26:04, 15.65s/it]

Total batch time =  0.8507  s


 19%|█▉        | 24/124 [06:27<26:54, 16.14s/it]


Total batch time =  0.8895  s
Total training time for this epoch =  20.884167551994324  s 
 

Total TRAIN time for epoch  0  =  3.23  min
Time for this epoch 6.46  min
Initiated model testing:


100%|██████████| 6472/6472 [00:27<00:00, 233.89it/s]


Accuracy of the new model =  0.496 
 



NameError: name 'update_models' is not defined