# ConvNN Random Sampling Time Complexity Test

- Random Sampling method takes equal or longer than all samples. Need to figure out what is going on 
- Try to use the same random seed for all tests
- Try n = 1 for sample, this should be very quick

In [1]:
# Torch
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch import optim 


# Train + Data 
import sys 
sys.path.append('../Layers')
from Conv1d_NN import * 
from Conv2d_NN import * 

sys.path.append('../Data')
from CIFAR10 import * 


sys.path.append('../Train')
from train2d import * 


  from .autonotebook import tqdm as notebook_tqdm


## I. 2D Testing - Time Complexity Test

### i. Conv2d_NN 

In [2]:
# Example
ex = torch.rand(32, 1, 28, 28) 
print("Input: ", ex.shape)

conv2d_nn = Conv2d_NN(in_channels=1, out_channels=3, K=3, stride=3, padding=0, shuffle_pattern="N/A", shuffle_scale=2, samples=5)
output = conv2d_nn(ex)
print("Output: ", output.shape)
      

Input:  torch.Size([32, 1, 28, 28])
Output:  torch.Size([32, 3, 28, 28])


### ii. Models

In [3]:
# all sample model
conv2d_nn_all = nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=5,
      K=1,
      stride=1,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
   ), 
   Conv2d_NN(
      in_channels=5,
      out_channels=10,
      K=1,
      stride=1,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
   ),
   Conv2d_NN(
      in_channels=10,
      out_channels=20,
      K=1,
      stride=1,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
   ),
   nn.Flatten(), 
   nn.Linear(20480, 10)
   
).to('cpu')
   

from torchsummary import summary
summary(conv2d_nn_all, (3, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 20, 256]             260
         Conv1d_NN-3              [-1, 20, 256]               0
         Conv2d_NN-4            [-1, 5, 32, 32]               0
           Flatten-5              [-1, 20, 256]               0
            Conv1d-6              [-1, 40, 256]             840
         Conv1d_NN-7              [-1, 40, 256]               0
         Conv2d_NN-8           [-1, 10, 32, 32]               0
           Flatten-9              [-1, 40, 256]               0
           Conv1d-10              [-1, 80, 256]           3,280
        Conv1d_NN-11              [-1, 80, 256]               0
        Conv2d_NN-12           [-1, 20, 32, 32]               0
          Flatten-13                [-1, 20480]               0
           Linear-14                   

In [4]:
# n = 1 sample model
conv2d_nn_1 = nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=5,
      K=1,
      stride=1,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=1, 
   ), 
   Conv2d_NN(
      in_channels=5,
      out_channels=10,
      K=1,
      stride=1,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=1, 
   ),
   Conv2d_NN(
      in_channels=10,
      out_channels=20,
      K=1,
      stride=1,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=1, 
   ),
   nn.Flatten(), 
   nn.Linear(20480, 10)
   
).to('cpu')
   

from torchsummary import summary
summary(conv2d_nn_1, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 20, 256]             260
         Conv1d_NN-3              [-1, 20, 256]               0
         Conv2d_NN-4            [-1, 5, 32, 32]               0
           Flatten-5              [-1, 20, 256]               0
            Conv1d-6              [-1, 40, 256]             840
         Conv1d_NN-7              [-1, 40, 256]               0
         Conv2d_NN-8           [-1, 10, 32, 32]               0
           Flatten-9              [-1, 40, 256]               0
           Conv1d-10              [-1, 80, 256]           3,280
        Conv1d_NN-11              [-1, 80, 256]               0
        Conv2d_NN-12           [-1, 20, 32, 32]               0
          Flatten-13                [-1, 20480]               0
           Linear-14                   

### iii. Data + Training

In [5]:
# MNIST 
cifar10 = CIFAR10()


Files already downloaded and verified
Files already downloaded and verified


In [6]:
# All Samples
conv2d_nn_all.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(conv2d_nn_all.parameters(), lr=0.001)
num_epochs = 10 
train_model(conv2d_nn_all, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(conv2d_nn_all, cifar10.test_loader)

Epoch 1, Time: 33.16854500770569, Loss: 1.8030061689789032
Epoch 2, Time: 32.9063937664032, Loss: 1.7296879790018282
Epoch 3, Time: 32.79917025566101, Loss: 1.7079878170471972
Epoch 4, Time: 32.795594215393066, Loss: 1.6920543495956284
Epoch 5, Time: 32.94094491004944, Loss: 1.6797637506519132
Epoch 6, Time: 32.8787841796875, Loss: 1.6706648008597782
Epoch 7, Time: 32.99019765853882, Loss: 1.6626758720258923
Epoch 8, Time: 32.972837924957275, Loss: 1.6540381501397818
Epoch 9, Time: 32.849344968795776, Loss: 1.6475171594668532
Epoch 10, Time: 32.607789039611816, Loss: 1.6425090218748888

 Average epoch time: 32.89096019268036
Accuracy on test set: 37.75%


37.75

In [7]:
# 1 sample
conv2d_nn_1.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(conv2d_nn_1.parameters(), lr=0.001)
num_epochs = 10 
train_model(conv2d_nn_1, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(conv2d_nn_1, cifar10.test_loader)


Epoch 1, Time: 10.562476873397827, Loss: 1.7877723868850552
Epoch 2, Time: 10.67931079864502, Loss: 1.7135777810345525
Epoch 3, Time: 10.647848129272461, Loss: 1.691183196156836
Epoch 4, Time: 10.451513051986694, Loss: 1.6743735943913765
Epoch 5, Time: 10.505614995956421, Loss: 1.6602925457003173
Epoch 6, Time: 10.404139041900635, Loss: 1.6516371700160033
Epoch 7, Time: 10.531826257705688, Loss: 1.6448648129887593
Epoch 8, Time: 10.623788118362427, Loss: 1.634292572050753
Epoch 9, Time: 10.363701105117798, Loss: 1.630638681104421
Epoch 10, Time: 10.406721115112305, Loss: 1.6238336387802572

 Average epoch time: 10.517693948745727
Accuracy on test set: 39.67%


39.67

# II. 2D Testing - KNN/Sampling Test

## i. Conv2d_NN 10 samples + 1 vs. 3 vs. 5 KNN

In [8]:
# MNIST 
cifar10 = CIFAR10()


Files already downloaded and verified
Files already downloaded and verified


### i. Models

In [10]:
# n = 10 sample, K = 1
conv2d_nn_10_1= nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=5,
      K=1,
      stride=1,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ), 
   Conv2d_NN(
      in_channels=5,
      out_channels=10,
      K=1,
      stride=1,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ),
   Conv2d_NN(
      in_channels=10,
      out_channels=20,
      K=1,
      stride=1,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ),
   nn.Flatten(), 
   nn.Linear(20480, 10)
   
).to('cpu')
   

from torchsummary import summary
summary(conv2d_nn_10_1, (3, 32, 32))    
    

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 20, 256]             260
         Conv1d_NN-3              [-1, 20, 256]               0
         Conv2d_NN-4            [-1, 5, 32, 32]               0
           Flatten-5              [-1, 20, 256]               0
            Conv1d-6              [-1, 40, 256]             840
         Conv1d_NN-7              [-1, 40, 256]               0
         Conv2d_NN-8           [-1, 10, 32, 32]               0
           Flatten-9              [-1, 40, 256]               0
           Conv1d-10              [-1, 80, 256]           3,280
        Conv1d_NN-11              [-1, 80, 256]               0
        Conv2d_NN-12           [-1, 20, 32, 32]               0
          Flatten-13                [-1, 20480]               0
           Linear-14                   

In [11]:
# n = 10 sample, K = 3
conv2d_nn_10_3= nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=5,
      K=3,
      stride=3,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ), 
   Conv2d_NN(
      in_channels=5,
      out_channels=10,
      K=3,
      stride=3,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2,
      samples=10, 
   ),
   Conv2d_NN(
      in_channels=10,
      out_channels=20,
      K=3,
      stride=3,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ),
   nn.Flatten(), 
   nn.Linear(20480, 10)
   
).to('cpu')
   

from torchsummary import summary
summary(conv2d_nn_10_3, (3, 32, 32))    
    

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 20, 256]             740
         Conv1d_NN-3              [-1, 20, 256]               0
         Conv2d_NN-4            [-1, 5, 32, 32]               0
           Flatten-5              [-1, 20, 256]               0
            Conv1d-6              [-1, 40, 256]           2,440
         Conv1d_NN-7              [-1, 40, 256]               0
         Conv2d_NN-8           [-1, 10, 32, 32]               0
           Flatten-9              [-1, 40, 256]               0
           Conv1d-10              [-1, 80, 256]           9,680
        Conv1d_NN-11              [-1, 80, 256]               0
        Conv2d_NN-12           [-1, 20, 32, 32]               0
          Flatten-13                [-1, 20480]               0
           Linear-14                   

In [12]:
# n = 10 sample, K = 3
conv2d_nn_10_5= nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=5,
      K=5,
      stride=5,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ), 
   Conv2d_NN(
      in_channels=5,
      out_channels=10,
      K=5,
      stride=5,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ),
   Conv2d_NN(
      in_channels=10,
      out_channels=20,
      K=5,
      stride=5,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ),
   nn.Flatten(), 
   nn.Linear(20480, 10)
   
).to('cpu')
   

from torchsummary import summary
summary(conv2d_nn_10_5, (3, 32, 32))    
    

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 20, 256]           1,220
         Conv1d_NN-3              [-1, 20, 256]               0
         Conv2d_NN-4            [-1, 5, 32, 32]               0
           Flatten-5              [-1, 20, 256]               0
            Conv1d-6              [-1, 40, 256]           4,040
         Conv1d_NN-7              [-1, 40, 256]               0
         Conv2d_NN-8           [-1, 10, 32, 32]               0
           Flatten-9              [-1, 40, 256]               0
           Conv1d-10              [-1, 80, 256]          16,080
        Conv1d_NN-11              [-1, 80, 256]               0
        Conv2d_NN-12           [-1, 20, 32, 32]               0
          Flatten-13                [-1, 20480]               0
           Linear-14                   

#### ii. data + train

In [13]:
# n = 10, K = 1
conv2d_nn_10_1.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(conv2d_nn_10_1.parameters(), lr=0.001)
num_epochs = 10 
train_model(conv2d_nn_10_1, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(conv2d_nn_10_1, cifar10.test_loader)

Epoch 1, Time: 14.625463008880615, Loss: 1.7897454853862753
Epoch 2, Time: 11.854705333709717, Loss: 1.7112682469360663
Epoch 3, Time: 14.57162094116211, Loss: 1.6932422922700263
Epoch 4, Time: 12.636346101760864, Loss: 1.6755355364831208
Epoch 5, Time: 10.969309091567993, Loss: 1.6627411109102352
Epoch 6, Time: 10.975165128707886, Loss: 1.6515161012444655
Epoch 7, Time: 10.115506887435913, Loss: 1.6451355307303426
Epoch 8, Time: 10.190560102462769, Loss: 1.6379867430843051
Epoch 9, Time: 10.10482907295227, Loss: 1.6301509183081215
Epoch 10, Time: 10.044563055038452, Loss: 1.6223057784387827

 Average epoch time: 11.60880687236786
Accuracy on test set: 38.85%


38.85

In [14]:
# n = 10, K = 3
conv2d_nn_10_3.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(conv2d_nn_10_3.parameters(), lr=0.001)
num_epochs = 10 
train_model(conv2d_nn_10_3, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(conv2d_nn_10_3, cifar10.test_loader)

Epoch 1, Time: 35.69110822677612, Loss: 1.8093836395941731
Epoch 2, Time: 35.04939913749695, Loss: 1.7094615842680188
Epoch 3, Time: 35.04956388473511, Loss: 1.6841153220447433
Epoch 4, Time: 35.036799907684326, Loss: 1.6660403095547804
Epoch 5, Time: 35.05671811103821, Loss: 1.6521561746402165
Epoch 6, Time: 35.151490926742554, Loss: 1.6374492517212773
Epoch 7, Time: 35.043601989746094, Loss: 1.6313051456380683
Epoch 8, Time: 35.201579093933105, Loss: 1.6235981211637902
Epoch 9, Time: 35.040578842163086, Loss: 1.6102069415094908
Epoch 10, Time: 35.08999490737915, Loss: 1.6079818653633526

 Average epoch time: 35.14108350276947
Accuracy on test set: 41.58%


41.58

In [15]:
# n = 10, K = 5
conv2d_nn_10_5.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(conv2d_nn_10_5.parameters(), lr=0.001)
num_epochs = 10 
train_model(conv2d_nn_10_5, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(conv2d_nn_10_5, cifar10.test_loader)

Epoch 1, Time: 41.270713090896606, Loss: 1.8510341289098307
Epoch 2, Time: 41.752373933792114, Loss: 1.733542690649057
Epoch 3, Time: 41.16184115409851, Loss: 1.7043807192531693
Epoch 4, Time: 41.13418912887573, Loss: 1.6815717723363501
Epoch 5, Time: 41.148539304733276, Loss: 1.6659924874220358
Epoch 6, Time: 41.567811012268066, Loss: 1.6584399645895604
Epoch 7, Time: 41.787107706069946, Loss: 1.6462144927905344
Epoch 8, Time: 41.495346784591675, Loss: 1.637082338638013
Epoch 9, Time: 41.422688007354736, Loss: 1.6256226561868283
Epoch 10, Time: 41.39573001861572, Loss: 1.6279986694340816

 Average epoch time: 41.41363401412964
Accuracy on test set: 40.01%


40.01

## III. 2D Testing - 

In [29]:
# n = 10 sample, K = 3
conv2d_nn_50_8= nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=5,
      K=8,
      stride=8,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=50, 
   ), 
   Conv2d_NN(
      in_channels=5,
      out_channels=10,
      K=8,
      stride=8,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=50, 
   ),
   Conv2d_NN(
      in_channels=10,
      out_channels=20,
      K=8,
      stride=8,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=50, 
   ),
   nn.Flatten(), 
   nn.Linear(20480, 10)
   
).to('cpu')
   

from torchsummary import summary
summary(conv2d_nn_50_8, (3, 32, 32))    
    

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 20, 256]           1,940
         Conv1d_NN-3              [-1, 20, 256]               0
         Conv2d_NN-4            [-1, 5, 32, 32]               0
           Flatten-5              [-1, 20, 256]               0
            Conv1d-6              [-1, 40, 256]           6,440
         Conv1d_NN-7              [-1, 40, 256]               0
         Conv2d_NN-8           [-1, 10, 32, 32]               0
           Flatten-9              [-1, 40, 256]               0
           Conv1d-10              [-1, 80, 256]          25,680
        Conv1d_NN-11              [-1, 80, 256]               0
        Conv2d_NN-12           [-1, 20, 32, 32]               0
          Flatten-13                [-1, 20480]               0
           Linear-14                   

In [30]:
# n = 50, K = 8, scale factor = 4
conv2d_nn_50_8.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(conv2d_nn_50_8.parameters(), lr=0.001)
num_epochs = 10 
train_model(conv2d_nn_50_8, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(conv2d_nn_50_8, cifar10.test_loader)

Epoch 1, Time: 48.25150799751282, Loss: 1.8441631289394311
Epoch 2, Time: 47.570520877838135, Loss: 1.7299187328199597
Epoch 3, Time: 47.40331029891968, Loss: 1.6946193028593917
Epoch 4, Time: 46.82696294784546, Loss: 1.6743213398682186
Epoch 5, Time: 48.3626971244812, Loss: 1.6535950577472482
Epoch 6, Time: 46.158265113830566, Loss: 1.6379403567984891
Epoch 7, Time: 47.57890820503235, Loss: 1.6272206266822717
Epoch 8, Time: 45.86710286140442, Loss: 1.6127006884121224
Epoch 9, Time: 46.053531885147095, Loss: 1.6071525716110873
Epoch 10, Time: 46.615870237350464, Loss: 1.597067325926193

 Average epoch time: 47.06886775493622
Accuracy on test set: 40.61%


40.61

## IV. PyTorch Profiler Trials

In [31]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Conv2d
import torchvision.models as models
from torch.profiler import profile, record_function, ProfilerActivity

### i. Simple Conv2d Model Profile

In [32]:
# Example resnet model
model = nn.Sequential(
    Conv2d(1, 3, kernel_size=3, stride=1, padding=1),
    nn.ReLU(), 
    Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(6*28*28, 10)
).to('cpu')

summary(model, (1, 28, 28))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 3, 28, 28]              30
              ReLU-2            [-1, 3, 28, 28]               0
            Conv2d-3            [-1, 6, 28, 28]             168
              ReLU-4            [-1, 6, 28, 28]               0
           Flatten-5                 [-1, 4704]               0
            Linear-6                   [-1, 10]          47,050
Total params: 47,248
Trainable params: 47,248
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.14
Params size (MB): 0.18
Estimated Total Size (MB): 0.33
----------------------------------------------------------------


In [33]:
input = torch.randn(5, 1, 28, 28)

with profile(activities=[ProfilerActivity.CPU], record_shapes=True, profile_memory=True) as prof:
    with record_function("model_inference"):
        model(input)

STAGE:2025-02-03 10:58:28 52846:5848710 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 10:58:28 52846:5848710 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 10:58:28 52846:5848710 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


In [34]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                          Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
               model_inference        41.72%       1.619ms       100.00%       3.881ms       3.881ms           0 b    -275.82 Kb             1  
                  aten::conv2d         0.21%       8.000us        31.72%       1.231ms     615.500us     137.81 Kb           0 b             2  
             aten::convolution         0.75%      29.000us        31.51%       1.223ms     611.500us     137.81 Kb           0 b             2  
            aten::_convolution         2.78%     108.000us        30.77%       1.194ms     597.000us     137.81 Kb     -91.88 Kb  

### i. Conv2d_NN_ALL Model Profile

In [54]:
# all sample model
conv2d_nn_all = nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=5,
      K=5,
      stride=5,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
      magnitude_type='similarity'
   ), 
   Conv2d_NN(
      in_channels=5,
      out_channels=10,
      K=5,
      stride=5,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
      magnitude_type='similarity'
   ),
   Conv2d_NN(
      in_channels=10,
      out_channels=20,
      K=5,
      stride=5,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
      magnitude_type='similarity'
   ),
   nn.Flatten(), 
   nn.Linear(200000, 10)
   
).to('cpu')
   

from torchsummary import summary
summary(conv2d_nn_all, (3, 100, 100))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1             [-1, 12, 2500]               0
            Conv1d-2             [-1, 20, 2500]           1,220
         Conv1d_NN-3             [-1, 20, 2500]               0
         Conv2d_NN-4          [-1, 5, 100, 100]               0
           Flatten-5             [-1, 20, 2500]               0
            Conv1d-6             [-1, 40, 2500]           4,040
         Conv1d_NN-7             [-1, 40, 2500]               0
         Conv2d_NN-8         [-1, 10, 100, 100]               0
           Flatten-9             [-1, 40, 2500]               0
           Conv1d-10             [-1, 80, 2500]          16,080
        Conv1d_NN-11             [-1, 80, 2500]               0
        Conv2d_NN-12         [-1, 20, 100, 100]               0
          Flatten-13               [-1, 200000]               0
           Linear-14                   

In [55]:
input = torch.randn(32, 3, 100, 100)

conv2d_nn_all.to('cpu')

with profile(activities=[ProfilerActivity.CPU], record_shapes=True, profile_memory=True) as prof1:
    with record_function("model_inference"):
        conv2d_nn_all(input)

print(prof1.key_averages().table(sort_by="cpu_time_total"))

STAGE:2025-02-03 11:29:31 52846:5848710 ActivityProfilerController.cpp:314] Completed Stage: Warm Up


-------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                 Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      model_inference         0.80%       8.853ms       100.00%        1.105s        1.105s       2.39 Gb     -97.35 Mb             1  
                            aten::bmm        62.45%     689.725ms        62.58%     691.183ms     230.394ms       2.24 Gb       2.24 Gb             3  
                           aten::topk        15.83%     174.861ms        15.83%     174.861ms      58.287ms      13.73 Mb      13.73 Mb             3  
                         aten::conv1d         0.00%      14.000us        13.42%     148.

STAGE:2025-02-03 11:29:32 52846:5848710 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 11:29:32 52846:5848710 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


In [None]:
# Distance matrix calculation -> sub, mult, add, sqrt takes alot of time for bigger images 
# Similarity matrix calculation -> takes significantly less time because of no sub, mult, add, sqrt, 

###---

In [51]:
# n = 10 sample, K = 3
conv2d_nn_10_5= nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=5,
      K=5,
      stride=5,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ), 
   Conv2d_NN(
      in_channels=5,
      out_channels=10,
      K=5,
      stride=5,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ),
   Conv2d_NN(
      in_channels=10,
      out_channels=20,
      K=5,
      stride=5,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=10, 
   ),
   nn.Flatten(), 
   nn.Linear(200000, 10)
   
).to('cpu')
   

from torchsummary import summary
summary(conv2d_nn_10_5, (3, 100, 100))    
    

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1             [-1, 12, 2500]               0
            Conv1d-2             [-1, 20, 2500]           1,220
         Conv1d_NN-3             [-1, 20, 2500]               0
         Conv2d_NN-4          [-1, 5, 100, 100]               0
           Flatten-5             [-1, 20, 2500]               0
            Conv1d-6             [-1, 40, 2500]           4,040
         Conv1d_NN-7             [-1, 40, 2500]               0
         Conv2d_NN-8         [-1, 10, 100, 100]               0
           Flatten-9             [-1, 40, 2500]               0
           Conv1d-10             [-1, 80, 2500]          16,080
        Conv1d_NN-11             [-1, 80, 2500]               0
        Conv2d_NN-12         [-1, 20, 100, 100]               0
          Flatten-13               [-1, 200000]               0
           Linear-14                   

In [52]:
input = torch.randn(32, 3, 100, 100)

conv2d_nn_10_5.to('cpu')

with profile(activities=[ProfilerActivity.CPU], record_shapes=True, profile_memory=True) as prof1:
    with record_function("model_inference"):
        conv2d_nn_10_5(input)

print(prof1.key_averages().table(sort_by="cpu_time_total"))

STAGE:2025-02-03 11:24:22 52846:5848710 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-02-03 11:24:22 52846:5848710 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-02-03 11:24:22 52846:5848710 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


-------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                 Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      model_inference         2.95%       5.104ms       100.00%     172.777ms     172.777ms     154.49 Mb    -159.16 Mb             1  
                         aten::conv1d         0.01%      14.000us        76.78%     132.652ms      44.217ms      42.72 Mb           0 b             3  
                    aten::convolution         0.02%      39.000us        76.77%     132.638ms      44.213ms      42.72 Mb           0 b             3  
                   aten::_convolution         0.89%       1.537ms        76.75%     132.