# Import necessary libraries

In [6]:
# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets, transforms

# General
import numpy as np
from tqdm import tqdm
import time

# General parameters

In [7]:
# Set the image size Y where Y represents YxY 
IMG_SIZE = 64
BATCH_SIZE = 100
LR = 0.001

# Notes:
- From here on you are free to uncomment all print()'s if you wish to see more details
- If not, just modify your NN classes, then RUN ALL CELLS and check your results at the end

# Complete CNN
Define your complete CNN, as if it were running all on a single processor, in the following cell

## NN STD

In [8]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        IMG_SIZE = 64
        # Define your first convolutional layer: input = 1, output = 32 convolutional features, kernel size = 5
        # Remember that kernel = 5 means that the "window" used to scan for features will be 5x5
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        self.conv4 = nn.Conv2d(128, 256, 3)
        
        
        # Now we need to get the output of the convolution processed to get it into a fully connected layer
        # To know what dimensions to use for our fully connected layers, the only known way in PyTorch is to 
        # pass some data through the convolutional layers, and then examine their output. Then we manually 
        # input the dimensions of this output into the next fully connected layer and voila.
        
        # Create random dummy data, run the convolution layers, check the size of the output of conv3, and then 
        # give that dimension info to the fully connected layers. This will only be done ONCE
        x = torch.randn(IMG_SIZE,IMG_SIZE).view(-1, 1, IMG_SIZE, IMG_SIZE)
        self._to_linear = None
        self.convs(x)
        
        # Run the fully connected layers
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 2)

    # Function defining only one part of the forward pass (the convolution layers only). This will also write
    # the output dimensions of the conv layers to self._to_linear ONCE, and this information will then be used 
    # as the input data flattened dimensions of the next fully connected layers 
    def convs(self, x):
        # Convolutional layer 1 + activation + max_pooling
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv4(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
#         x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
#         x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
#         x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        
#         print("x[0].shape = ", x[0].shape)
        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x
    
    # Function defining the rest of the forward pass
    def forward(self, x):
        # Run the convs layers first
        x = self.convs(x)
        # Reshape the output data from the convs to be flattened
        x = x.view(-1, self._to_linear)
        # Pass the data through the fully connected layers now
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # Pass it through the final layer
        x = self.fc3(x)
        # One final softmax function to make the output vector look nicer
        x = F.softmax(x, dim = 1)
        return x

net = Net()

Take a look at your CNN and obtain some values

In [9]:
# Take a look at your model
model_complete = net
model_complete

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=2, bias=True)
)

# Obtain the data to use for the experiments

In [10]:
train = datasets.MNIST(r"/media/wilfredo/Willie931GB/EURECOM_SLU_Linux/II_SEMESTER/SLU/PAPER_KDD2022/EXPERIMENTS/PySyft/Datasets/MNIST_64", 
                      train = True, download = True, 
                      transform = transforms.Compose([transforms.Resize(IMG_SIZE),
                                                      transforms.ToTensor()]))

test = datasets.MNIST(r"/media/wilfredo/Willie931GB/EURECOM_SLU_Linux/II_SEMESTER/SLU/PAPER_KDD2022/EXPERIMENTS/PySyft/Datasets/MNIST_64", 
                      train = False, download = True, 
                      transform = transforms.Compose([transforms.Resize(IMG_SIZE),
                                                      transforms.ToTensor()]))

In [11]:
# Load the data from the file it was saved in. Take the ENTIRE dataset!
training_data = torch.utils.data.DataLoader(train, batch_size = int(len(train)/2), shuffle = True)
test_data = torch.utils.data.DataLoader(test, batch_size = int(len(test)/2), shuffle = True)

In [12]:
# Take the data loaded onto training_data. You NEED to iterate over it to take it, even if you
# want to take the entire thing. Make sure to convert the values to floats
X = next(iter(training_data))[0]
y_unformatted = next(iter(training_data))[1].type(torch.FloatTensor)
X_test = next(iter(test_data))[0]
y_test_unformatted = next(iter(test_data))[1].type(torch.FloatTensor)

# The two other cases in this paper use 2 dimensional labels (0, 1), not only (0)
# MNIST by default comes with labels in the format (9) instead of (9, 0). To change this:
# Create tensors with all zeros of the same size
y_unformatted_addition = torch.zeros(y_unformatted.size())
y_test_unformatted_addition = torch.zeros(y_test_unformatted.size())
# Then stack them together (0 for vertically, -1 for horizontally)
y = torch.stack((y_unformatted, y_unformatted_addition), -1)
y_test = torch.stack((y_test_unformatted, y_test_unformatted_addition), -1)

In [13]:
# Define your training data
train_X = X
train_y = y

# Define your testing (validation) data
test_X = X_test
test_y = y_test

In [14]:
# Set your loss function (MSE for images!)
loss_function = nn.MSELoss()
# Set your optimizer
optimizer = optim.SGD(model_complete.parameters(), lr=LR)

# Experiments
1. Run the FP on one epoch
2. Record the time it takes for FP AND BP for every batch
3. Save results and average them out

In [15]:
FP_time_list = []
BP_time_list = []
for i in tqdm(range(0, int(len(train_X)), BATCH_SIZE)):
        
    # Get the batches for each of the workers
    batch_X = train_X[i : i + BATCH_SIZE]
    batch_y = train_y[i : i + BATCH_SIZE]

    # Train the models
    # The following in real life would be done in parallel
    FP_start_time = time.time()
    pred = model_complete(batch_X)
    FP_end_time = time.time() - FP_start_time

    # Calculate the loss functions
    BP_start_time = time.time()
    loss = loss_function(pred, batch_y)

    # Do the backwards pass
    loss.backward()
    optimizer.step()
    BP_end_time = time.time() - BP_start_time

    # Accumulate the times
    FP_time_list.append(FP_end_time)
    BP_time_list.append(BP_end_time)
        
# OUTSIDE THE FOR LOOP: After all workers are done training...
avg_FP_time = sum(FP_time_list)/len(FP_time_list)
avg_BP_time = sum(BP_time_list)/len(BP_time_list)
print("avg_FP_time PER BATCH = ", round(avg_FP_time, 4), " s")
# print("avg_BP_time PER BATCH = ", round(avg_BP_time, 4), " s \n")

nn_size = 0.06559962 # GFLOP PER **IMAGE**
nn_size_batch = nn_size * BATCH_SIZE # GFLOP PER **BATCH** OF IMAGES
# Now we have GFLOP/batch and SECONDS/batch, so:
proc_power = nn_size_batch / avg_FP_time # turns to GFLOP/s
print("Your processor power for these operations is: ", proc_power, " GFLOP/s")

# Clean the global namespace
%reset -f

100%|██████████| 300/300 [03:05<00:00,  1.62it/s]

avg_FP_time PER BATCH =  0.2129  s
Your processor power for these operations is:  30.81119543152012  GFLOP/s



