# Import necessary libraries

In [1]:
# For FLOP measurements
from ptflops import get_model_complexity_info

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# General parameters

In [2]:
# Set the image size Y where Y represents YxY 
IMG_SIZE = 50
BATCH_SIZE = 124
LR = 0.001

# Set the memory safety_factor, to run on the cautious side and not crashing because of random
# system memory spikes because of external processes
MEM_SF = 1.3

# Notes:
- From here on you are free to uncomment all print()'s if you wish to see more details
- If not, just modify your NN classes, then RUN ALL CELLS and check your results at the end

# Complete CNN
Define your complete CNN, as if it were running all on a single processor, in the following cell

## NN Mini

In [3]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        IMG_SIZE = 50
        # Define your first convolutional layer: input = 1, output = 32 convolutional features, kernel size = 5
        # Remember that kernel = 5 means that the "window" used to scan for features will be 5x5
        self.conv1 = nn.Conv2d(1, 16, 5)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.conv3 = nn.Conv2d(32, 64, 5)
#         self.conv4 = nn.Conv2d(128, 256, 3)
        
        
        # Now we need to get the output of the convolution processed to get it into a fully connected layer
        # To know what dimensions to use for our fully connected layers, the only known way in PyTorch is to 
        # pass some data through the convolutional layers, and then examine their output. Then we manually 
        # input the dimensions of this output into the next fully connected layer and voila.
        
        # Create random dummy data, run the convolution layers, check the size of the output of conv3, and then 
        # give that dimension info to the fully connected layers. This will only be done ONCE
        x = torch.randn(IMG_SIZE,IMG_SIZE).view(-1, 1, IMG_SIZE, IMG_SIZE)
        self._to_linear = None
        self.convs(x)
        
        # Run the fully connected layers
#         self.fc1 = nn.Linear(self._to_linear, 512)
#         self.fc2 = nn.Linear(512, 2)
        self.fc1 = nn.Linear(self._to_linear, 32)
        self.fc2 = nn.Linear(32, 2)
#         self.fc3 = nn.Linear(256, 2)

    # Function defining only one part of the forward pass (the convolution layers only). This will also write
    # the output dimensions of the conv layers to self._to_linear ONCE, and this information will then be used 
    # as the input data flattened dimensions of the next fully connected layers 
    def convs(self, x):
        # Convolutional layer 1 + activation + max_pooling
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
#         x = self.conv4(x)
#         x = F.relu(x)
#         x = F.max_pool2d(x, (2, 2))
#         x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
#         x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
#         x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        
#         print("x[0].shape = ", x[0].shape)
        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x
    
    # Function defining the rest of the forward pass
    def forward(self, x):
        # Run the convs layers first
        x = self.convs(x)
        # Reshape the output data from the convs to be flattened
        x = x.view(-1, self._to_linear)
        # Pass the data through the fully connected layers now
        x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
        # Pass it through the final layer
        x = self.fc2(x)
        # One final softmax function to make the output vector look nicer
        x = F.softmax(x, dim = 1)
        return x

net = Net()

## NN Mini

Take a look at your CNN and obtain some values

In [4]:
# Take a look at your model
model = net
model

Net(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)

In [5]:
# Obtain the FLOPs of one FP and the total size of the model in memory
mac_count, param_count = get_model_complexity_info(net, (1, IMG_SIZE, IMG_SIZE), as_strings=False,
                                           print_per_layer_stat=False, verbose=False)
# print('Computational complexity: ', mac_count, " MACs")

# Remember that 1 MAC = 1 Multiply-Accumulation operation = 2 FLOPs, so:
# print('Computational complexity: ', mac_count * 2, " FLOPs")
# And one 1 GFLOP = 1000000000 FLOP so:
G_base = mac_count * 2 / 1000000000
# print('Computational complexity: ', G_base, " GFLOPs")
# print('Number of parameters: ', param_count)

# Convert these: Params --> Bytes --> kB --> MB --> GB
# Remember your weights are 32 bit floats, and each 32 bit float == 4 bytes
# We  will need an ADDITIONAL triple of this amount because of the backwards pass
# BP includes: activations and gradients for neurons, gradients for weights, momentum, etc.
# And we will need this amount of parameters for each image in the batch, so:
M_base = (param_count * 4 * 1/1024 * 1/1024 * 1/1024) * (1 + 3) * BATCH_SIZE * MEM_SF
# print("Total size in memory:  ", round(M_base, 5), " GB")
print("Total parameters in this model: ", param_count)

Total parameters in this model:  72802


# Split NN Mini
Specify your CNN split into Server and Client models

In [6]:
class Net_client(nn.Module):
    def __init__(self):
        super().__init__()
        # Define your first convolutional layer: input = 1, output = 32 convolutional features, kernel size = 5
        # Remember that kernel = 5 means that the "window" used to scan for features will be 5x5
        self.conv1 = nn.Conv2d(1, 16, 5)
        self.conv2 = nn.Conv2d(16, 32, 5)

    # Function defining only one part of the forward pass (the convolution layers only). This will also write
    # the output dimensions of the conv layers to self._to_linear ONCE, and this information will then be used 
    # as the input data flattened dimensions of the next fully connected layers 
    def convs(self, x):
        # Convolutional layer 1 + activation + max_pooling
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        return x
    
    # Function defining the rest of the forward pass
    def forward(self, x):
        # Run the convs layers first
        x = self.convs(x)
        return x

net_client = Net_client()

In [7]:
class Net_server(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Start from the third convolutional layer
        self.conv3 = nn.Conv2d(32, 64, 5)
        
        # Run the fully connected layers. We know the input of this fc1 layer is 512, because of our previous
        # results with FL, where self.__to__linear told us this result when you run the cell that contains the 
        # NN
        self._to_linear = 256
        self.fc1 = nn.Linear(self._to_linear, 32)
        self.fc2 = nn.Linear(32, 2)

    # Function defining only one part of the forward pass (the convolution layers only). This will also write
    # the output dimensions of the conv layers to self._to_linear ONCE, and this information will then be used 
    # as the input data flattened dimensions of the next fully connected layers 
    def convs(self, x):
        # Convolutional layer 1 + activation + max_pooling
        x = self.conv3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        
        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x
#         return x
    
    # Function defining the rest of the forward pass
    def forward(self, x):
        # Run the convs layers first
        x = self.convs(x)
        # Reshape the output data from the convs to be flattened
        x = x.view(-1, self._to_linear)
        # Pass the data through the fully connected layers now
        x = F.relu(self.fc1(x))
        # Pass it through the final layer
        x = self.fc2(x)
        # One final softmax function to make the output vector look nicer
        x = F.softmax(x, dim = 1)
        return x

net_server = Net_server()

In [8]:
# Take a look at our models
model_client = net_client
model_server = net_server

In [9]:
# Take a look at your model
model_client

Net_client(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
)

In [10]:
# Results for this model
# Obtain the FLOPs of one FP
mac_count_client, param_count_client = get_model_complexity_info(Net_client(), (1, IMG_SIZE, IMG_SIZE), as_strings=False,
                                           print_per_layer_stat=False, verbose=False)
# print('Computational complexity: ', mac_count_client, " MACs")

# Remember that 1 MAC = 1 Multiply-Accumulation operation = 2 FLOPs, so:
# print('Computational complexity: ', mac_count_client * 2, " FLOPs")
# And one 1 GFLOP = 1000000000 FLOP so:
G_client = mac_count_client * 2 / 1000000000
# print('Computational complexity: ', G_client, " GFLOPs")
# print('Number of parameters: ', param_count_client)

# Convert these: Params --> Bytes --> kB --> MB --> GB
# Remember your weights are 32 bit floats, and each 32 bit float == 4 bytes
# We  will need an ADDITIONAL triple of this amount because of the backwards pass
# BP includes: activations and gradients for neurons, gradients for weights, momentum, etc.
# And we will need this amount of parameters for each image in the batch, so:
M_client = (param_count_client * 4 * 1/1024 * 1/1024 * 1/1024) * (1 + 3) * BATCH_SIZE * MEM_SF
# print("Total size in memory:  ", round(M_client, 5), " GB")

In [11]:
# Take a look at your model
model_server

Net_server(
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)

In [12]:
# Results for this model
# Because we already know the results for the whole NN and for the client side, obtaining the server side results
# is simple arithmetic
mac_count_server = mac_count - mac_count_client
param_count_server = param_count - param_count_client
G_server = G_base - G_client
M_server = M_base - M_client

# print('Computational complexity: ', mac_count_server, " MACs")
# Remember that 1 MAC = 1 Multiply-Accumulation operation = 2 FLOPs, so:
# print('Computational complexity: ', mac_count_server * 2, " FLOPs")
G_server = mac_count_server * 2 / 1000000000
# print('Computational complexity: ', G_server, " GFLOPs")
# print('Number of parameters: ', param_count_server)

# Convert these: Params --> Bytes --> kB --> MB --> GB
# Remember your weights are 32 bit floats, and each 32 bit float == 4 bytes
# We  will need an ADDITIONAL triple of this amount because of the backwards pass
# BP includes: activations and gradients for neurons, gradients for weights, momentum, etc.
# And we will need this amount of parameters for each image in the batch, so:
M_server = (param_count_server * 4 * 1/1024 * 1/1024 * 1/1024) * (1 + 3) * BATCH_SIZE * MEM_SF
# print("Total size in memory:  ", round(M_server, 5), " GB")

# Measurement Results
The following results are shown with the same variable names as the one used for the simulator

In [13]:
# Model's total parameters:
print("Total parameters in model = ", param_count, " parameters \n")

# print("Total FLOPs for one FP of ONE SAMPLE (NOT one batch, NOT one epoch): ")
print("G_base = ", G_base, " GFLOP")
# print("FLOPs done for one FP on the server side:")
print("G_server = ", G_server, " GFLOP")
# print("FLOPs done for one FP on the client side:")
print("G_client = ", G_client, " GFLOP")
# print("FLOPs needed for averaged and aggregating the models (in specific architectures): ")
print("G_agg = ", G_base * 0.001, " GFLOP \n")


# print("Total GB in local memory needed for the model:")
print("M_base = ", M_base, " GB")
# print("GBs in local memory needed for the model on the server:")
print("M_server = ", M_server, " GB")
# print("GBs in local memory needed for the model on the clients:")
print("M_client = ", M_client, " GB")
# print("GBs in local memory needed for the aggregation phase on the server:")
print("M_agg = ", M_base, " GB \n") # Because we federated averaged ALL parameters in this phase!


# print("Size of weights to be sent to server for federated averaging:") #ALL parameters!
print("D_weights = ", M_base, " GB")
# print("Size of data to be sent as intermediate results from client to server:") # Size of ALL client params sent!
print("D_client_out = ", M_client, " GB")

Total parameters in model =  72802  parameters 

G_base =  0.013604996  GFLOP
G_server =  0.00257978  GFLOP
G_client =  0.011025216  GFLOP
G_agg =  1.3604996e-05  GFLOP 

M_base =  0.17487529516220093  GB
M_server =  0.14305270910263063  GB
M_client =  0.031822586059570314  GB
M_agg =  0.17487529516220093  GB 

D_weights =  0.17487529516220093  GB
D_client_out =  0.031822586059570314  GB
