## CIFAR10 Performance Test

In [1]:
# Torch
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch import optim 


# Train + Data 
import sys 
sys.path.append('../Layers')
from Conv1d_NN import *
from Conv2d_NN import *

from Conv1d_NN_spatial import * 
from Conv2d_NN_spatial import * 

sys.path.append('../Data')
from CIFAR10 import * 


sys.path.append('../Train')
from train2d import * 


  from .autonotebook import tqdm as notebook_tqdm


### 1. Models

In [2]:
# Classic CNN 2D Model - Control Model 

CNN_2D = nn.Sequential(
    nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),

    nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),

    nn.Flatten(),
    nn.Linear(32768, 1024), 
    nn.ReLU(),
    nn.Linear(1024, 10)
).to('cpu')

from torchsummary import summary 
summary(CNN_2D, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
              ReLU-2           [-1, 16, 32, 32]               0
            Conv2d-3           [-1, 32, 32, 32]           4,640
              ReLU-4           [-1, 32, 32, 32]               0
           Flatten-5                [-1, 32768]               0
            Linear-6                 [-1, 1024]      33,555,456
              ReLU-7                 [-1, 1024]               0
            Linear-8                   [-1, 10]          10,250
Total params: 33,570,794
Trainable params: 33,570,794
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 1.02
Params size (MB): 128.06
Estimated Total Size (MB): 129.09
----------------------------------------------------------------


#### Original

In [3]:
# ConNN 2D All Sample Model with K = 9
Conv2dNN_9_all = nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=16,
      K=9,
      stride=9,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
   ), 
   nn.ReLU(),

   Conv2d_NN(
      in_channels=16,
      out_channels=32,
      K=9,
      stride=9,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
   ),
   nn.ReLU(),
   
   nn.Flatten(),
   nn.Linear(32768, 1024),
   nn.ReLU(),
   nn.Linear(1024, 10)
).to('cpu')

from torchsummary import summary
summary(Conv2dNN_9_all, (3, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 64, 256]           6,976
         Conv1d_NN-3              [-1, 64, 256]               0
         Conv2d_NN-4           [-1, 16, 32, 32]               0
              ReLU-5           [-1, 16, 32, 32]               0
           Flatten-6              [-1, 64, 256]               0
            Conv1d-7             [-1, 128, 256]          73,856
         Conv1d_NN-8             [-1, 128, 256]               0
         Conv2d_NN-9           [-1, 32, 32, 32]               0
             ReLU-10           [-1, 32, 32, 32]               0
          Flatten-11                [-1, 32768]               0
           Linear-12                 [-1, 1024]      33,555,456
             ReLU-13                 [-1, 1024]               0
           Linear-14                   

In [4]:
# ConNN 2D Random Sample n = 64 Model with K = 9
Conv2dNN_9_64 = nn.Sequential(
   Conv2d_NN(
      in_channels=3,
      out_channels=16,
      K=9,
      stride=9,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=64, 
   ), 
   nn.ReLU(),

   Conv2d_NN(
      in_channels=16,
      out_channels=32,
      K=9,
      stride=9,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=64, 
   ),
   nn.ReLU(),
   
   nn.Flatten(),
   nn.Linear(32768, 1024),
   nn.ReLU(),
   nn.Linear(1024, 10)
).to('cpu')

from torchsummary import summary
summary(Conv2dNN_9_64, (3, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 64, 256]           6,976
         Conv1d_NN-3              [-1, 64, 256]               0
         Conv2d_NN-4           [-1, 16, 32, 32]               0
              ReLU-5           [-1, 16, 32, 32]               0
           Flatten-6              [-1, 64, 256]               0
            Conv1d-7             [-1, 128, 256]          73,856
         Conv1d_NN-8             [-1, 128, 256]               0
         Conv2d_NN-9           [-1, 32, 32, 32]               0
             ReLU-10           [-1, 32, 32, 32]               0
          Flatten-11                [-1, 32768]               0
           Linear-12                 [-1, 1024]      33,555,456
             ReLU-13                 [-1, 1024]               0
           Linear-14                   

#### Optimized

In [5]:
# ConNN 2D All Sample Model with K = 9
Conv2dNN_9_all_optimized = nn.Sequential(
   Conv2d_NN_optimized(
      in_channels=3,
      out_channels=16,
      K=9,
      stride=9,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
   ), 
   nn.ReLU(),

   Conv2d_NN_optimized(
      in_channels=16,
      out_channels=32,
      K=9,
      stride=9,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples="all", 
   ),
   nn.ReLU(),
   
   nn.Flatten(),
   nn.Linear(32768, 1024),
   nn.ReLU(),
   nn.Linear(1024, 10)
).to('cpu')

from torchsummary import summary
summary(Conv2dNN_9_all, (3, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 64, 256]           6,976
         Conv1d_NN-3              [-1, 64, 256]               0
         Conv2d_NN-4           [-1, 16, 32, 32]               0
              ReLU-5           [-1, 16, 32, 32]               0
           Flatten-6              [-1, 64, 256]               0
            Conv1d-7             [-1, 128, 256]          73,856
         Conv1d_NN-8             [-1, 128, 256]               0
         Conv2d_NN-9           [-1, 32, 32, 32]               0
             ReLU-10           [-1, 32, 32, 32]               0
          Flatten-11                [-1, 32768]               0
           Linear-12                 [-1, 1024]      33,555,456
             ReLU-13                 [-1, 1024]               0
           Linear-14                   

In [6]:
# ConNN 2D Random Sample n = 64 Model with K = 9
Conv2dNN_9_64_optimized = nn.Sequential(
   Conv2d_NN_optimized(
      in_channels=3,
      out_channels=16,
      K=9,
      stride=9,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=64, 
   ), 
   nn.ReLU(),

   Conv2d_NN_optimized(
      in_channels=16,
      out_channels=32,
      K=9,
      stride=9,
      padding=0,
      shuffle_pattern="BA", 
      shuffle_scale=2, 
      samples=64, 
   ),
   nn.ReLU(),
   
   nn.Flatten(),
   nn.Linear(32768, 1024),
   nn.ReLU(),
   nn.Linear(1024, 10)
).to('cpu')

from torchsummary import summary
summary(Conv2dNN_9_64, (3, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1              [-1, 12, 256]               0
            Conv1d-2              [-1, 64, 256]           6,976
         Conv1d_NN-3              [-1, 64, 256]               0
         Conv2d_NN-4           [-1, 16, 32, 32]               0
              ReLU-5           [-1, 16, 32, 32]               0
           Flatten-6              [-1, 64, 256]               0
            Conv1d-7             [-1, 128, 256]          73,856
         Conv1d_NN-8             [-1, 128, 256]               0
         Conv2d_NN-9           [-1, 32, 32, 32]               0
             ReLU-10           [-1, 32, 32, 32]               0
          Flatten-11                [-1, 32768]               0
           Linear-12                 [-1, 1024]      33,555,456
             ReLU-13                 [-1, 1024]               0
           Linear-14                   

### 2. Training

In [7]:
# CIFAR10
cifar10 = CIFAR10()

Files already downloaded and verified
Files already downloaded and verified


In [8]:
# Classic CNN 2D Model - Control Model 
CNN_2D.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_2D.parameters(), lr=0.001)
num_epochs = 2
train_model(CNN_2D, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(CNN_2D, cifar10.test_loader)

Epoch 1, Time: 20.060369968414307, Loss: 1.3660212851241422
Epoch 2, Time: 19.888731241226196, Loss: 0.8281724258228336

 Average epoch time: 19.97455060482025
Accuracy on test set: 65.71%


65.71

#### All Samples

In [9]:
# Original
Conv2dNN_9_all.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Conv2dNN_9_all.parameters(), lr=0.001)
num_epochs = 2
train_model(Conv2dNN_9_all, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(Conv2dNN_9_all, cifar10.test_loader)

Epoch 1, Time: 54.91022205352783, Loss: 1.6180734754828237
Epoch 2, Time: 54.43134903907776, Loss: 1.2933059513111553

 Average epoch time: 54.670785546302795
Accuracy on test set: 52.67%


52.67

In [13]:
# Optimized
Conv2dNN_9_all_optimized.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Conv2dNN_9_all_optimized.parameters(), lr=0.001)
num_epochs = 10
train_model(Conv2dNN_9_all_optimized, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(Conv2dNN_9_all_optimized, cifar10.test_loader)

Epoch 1, Time: 117.15188002586365, Loss: 0.93886028104426
Epoch 2, Time: 133.8014268875122, Loss: 0.709866513224209
Epoch 3, Time: 133.1089551448822, Loss: 0.47900541029546573
Epoch 4, Time: 130.42445611953735, Loss: 0.3163632525564612
Epoch 5, Time: 122.80665302276611, Loss: 0.23354341748082424
Epoch 6, Time: 127.06309008598328, Loss: 0.1686711843504244
Epoch 7, Time: 129.65726494789124, Loss: 0.15646147743925032
Epoch 8, Time: 126.23992824554443, Loss: 0.13481480319915182
Epoch 9, Time: 131.7521688938141, Loss: 0.12596496819730496
Epoch 10, Time: 122.97920107841492, Loss: 0.11563027875266893

 Average epoch time: 127.49850244522095
Accuracy on test set: 54.83%


54.83

#### N Random Samples


In [11]:
# Original
Conv2dNN_9_64.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Conv2dNN_9_64.parameters(), lr=0.001)
num_epochs = 2
train_model(Conv2dNN_9_64, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(Conv2dNN_9_64, cifar10.test_loader)

Epoch 1, Time: 59.907439947128296, Loss: 1.7384636894516323
Epoch 2, Time: 58.717626094818115, Loss: 1.4707601257907155

 Average epoch time: 59.312533020973206
Accuracy on test set: 47.6%


47.6

In [14]:
# Optimized
Conv2dNN_9_64_optimized.to('mps')

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Conv2dNN_9_64_optimized.parameters(), lr=0.001)
num_epochs = 10
train_model(Conv2dNN_9_64_optimized, cifar10.train_loader, criterion, optimizer, num_epochs)
evaluate_accuracy(Conv2dNN_9_64_optimized, cifar10.test_loader)

Epoch 1, Time: 102.40424609184265, Loss: 1.176160012440913
Epoch 2, Time: 101.678631067276, Loss: 1.0443694508441574
Epoch 3, Time: 92.82319712638855, Loss: 0.9180408567571274
Epoch 4, Time: 58.14507436752319, Loss: 0.7763948482092079
Epoch 5, Time: 59.652820110321045, Loss: 0.6351556695635666
Epoch 6, Time: 57.94316291809082, Loss: 0.4943758460414379
Epoch 7, Time: 59.167937994003296, Loss: 0.3720183279698767
Epoch 8, Time: 58.554649114608765, Loss: 0.2914434125756516
Epoch 9, Time: 57.6046359539032, Loss: 0.2545947561569302
Epoch 10, Time: 57.9154372215271, Loss: 0.2033600853732251

 Average epoch time: 70.58897919654846
Accuracy on test set: 57.76%


57.76

In [16]:

# Optionally, move model and input to CUDA if available.
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

Conv2dNN_9_all_optimized.to(device)
input_tensor = torch.randn(32, 3, 32, 32)
input_tensor = input_tensor.to(device)

# Profile the forward pass
with torch.autograd.profiler.profile(enabled=True) as prof:
    output = Conv2dNN_9_all_optimized(input_tensor)

# Print the profiling results sorted by total CPU time.
print(prof.key_averages().table(sort_by="cpu_time_total"))

--------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
--------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
               aten::copy_        26.98%      33.401ms        32.96%      40.800ms       4.080ms            10  
              aten::linear        32.37%      40.080ms        32.38%      40.085ms      20.043ms             2  
               aten::index        -0.83%   -1030.000us        27.41%      33.934ms      16.967ms             2  
                  aten::to         1.17%       1.453ms        27.01%      33.441ms       8.360ms             4  
            aten::_to_copy         0.02%      22.000us        27.00%      33.432ms       8.358ms             4  
                aten::relu         6.69%       8.282ms         6.72%       8.316ms       2.772ms

STAGE:2025-03-24 10:55:56 19797:36658667 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2025-03-24 10:55:57 19797:36658667 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2025-03-24 10:55:57 19797:36658667 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
