# ConvNN Attention Test 
## I. 2D Training for testing with CIFAR10 Dataset

In [1]:
# Torch
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch import optim 


# Train + Data 
import sys 
sys.path.append('../Layers')
from Conv1d_NN_spatial import * 
from Conv2d_NN_spatial import * 

sys.path.append('../Data')
from CIFAR10 import * 


sys.path.append('../Models')
from CIFAR_experiment_models.Attention import Attention
from CIFAR_experiment_models.BranchingConvNN import Branching_ConvNN_K_All, Branching_ConvNN_K_N, Branching_ConvNN_Spatial_K_N, Branching_ConvNN_Attention_K_N
from CIFAR_experiment_models.ConvNN import ConvNN_K_All,ConvNN_K_N, ConvNN_Spatial_K_N, ConvNN_Attn_K_N
from CIFAR_experiment_models.CNN_Control import CNN


sys.path.append('../Train')
from train2d import train_eval, evaluate_accuracy




In [2]:
cifar10 = CIFAR10()

Files already downloaded and verified
Files already downloaded and verified


In [3]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


### 2 Layer Models

In [4]:

# CNN
CNN_2 = CNN(num_layers=2, num_classes=10, device='cuda')

print("Model: " + CNN_2.name)
print("Num params: " + str(count_parameters(CNN_2)))
print()
# Test + Eval
criterion = nn.CrossEntropyLoss()0
optimizer = optim.Adam(CNN_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(CNN_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(CNN_2, cifar10.test_loader, device='cuda')


Model: CNN
Num params: 166618

Epoch 1, Time: 9.662012815475464, Loss: 1.6635174585120451
Epoch 1, Accuracy: 49.65%
Epoch 2, Time: 9.309512376785278, Loss: 1.3639852045594578
Epoch 2, Accuracy: 53.19%
Epoch 3, Time: 9.426666259765625, Loss: 1.2611928235386949
Epoch 3, Accuracy: 55.35%
Epoch 4, Time: 9.247191667556763, Loss: 1.2005311547185453
Epoch 4, Accuracy: 55.91%
Epoch 5, Time: 9.367055416107178, Loss: 1.1533419325223664
Epoch 5, Accuracy: 57.52%
Epoch 6, Time: 9.333389282226562, Loss: 1.1146043538285033
Epoch 6, Accuracy: 57.99%
Epoch 7, Time: 9.377770185470581, Loss: 1.074851929836566
Epoch 7, Accuracy: 58.88%
Epoch 8, Time: 9.318773031234741, Loss: 1.037272206276579
Epoch 8, Accuracy: 60.05%
Epoch 9, Time: 9.39230728149414, Loss: 1.0043443461208392
Epoch 9, Accuracy: 60.38%
Epoch 10, Time: 9.339741945266724, Loss: 0.9735180758454306
Epoch 10, Accuracy: 60.19%
Epoch 11, Time: 9.377856254577637, Loss: 0.94539554878269
Epoch 11, Accuracy: 60.78%
Epoch 12, Time: 9.30274486541748, L

57.19

In [5]:
# Attention
Attention_2 = Attention(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Attention_2.name)
print("Num params: " + str(count_parameters(Attention_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Attention_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Attention_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Attention_2, cifar10.test_loader, device='cuda')


Model: Attention
Num params: 202730

Epoch 1, Time: 11.061918020248413, Loss: 1.9940525916836145
Epoch 1, Accuracy: 30.23%
Epoch 2, Time: 10.943254470825195, Loss: 1.825660556326132
Epoch 2, Accuracy: 36.33%
Epoch 3, Time: 11.018296480178833, Loss: 1.7103027719670854
Epoch 3, Accuracy: 38.64%
Epoch 4, Time: 10.872601985931396, Loss: 1.6598589397452372
Epoch 4, Accuracy: 39.91%
Epoch 5, Time: 10.95972752571106, Loss: 1.6258888785796397
Epoch 5, Accuracy: 41.29%
Epoch 6, Time: 10.959372758865356, Loss: 1.6000498098790492
Epoch 6, Accuracy: 41.66%
Epoch 7, Time: 10.93392300605774, Loss: 1.5792384848875158
Epoch 7, Accuracy: 42.5%
Epoch 8, Time: 11.009439945220947, Loss: 1.5578012748447525
Epoch 8, Accuracy: 43.46%
Epoch 9, Time: 10.901677370071411, Loss: 1.541174367565633
Epoch 9, Accuracy: 43.43%
Epoch 10, Time: 11.000517845153809, Loss: 1.526697262931053
Epoch 10, Accuracy: 43.44%
Epoch 11, Time: 10.943708896636963, Loss: 1.511678011216166
Epoch 11, Accuracy: 43.93%
Epoch 12, Time: 10.9

41.36

In [6]:
# ConvNN All 
ConvNN_All_2 = ConvNN_K_All(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_All_2.name)
print("Num params: " + str(count_parameters(ConvNN_All_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_All_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_All_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_All_2, cifar10.test_loader, device='cuda')


Model: ConvNN_K_All
Num params: 208362

Epoch 1, Time: 10.358115196228027, Loss: 1.6443319145370932
Epoch 1, Accuracy: 48.38%
Epoch 2, Time: 10.188043594360352, Loss: 1.3735060944886464
Epoch 2, Accuracy: 52.48%
Epoch 3, Time: 10.300394058227539, Loss: 1.2678037924534828
Epoch 3, Accuracy: 53.16%
Epoch 4, Time: 10.25177812576294, Loss: 1.1960426196265403
Epoch 4, Accuracy: 54.81%
Epoch 5, Time: 10.320242404937744, Loss: 1.1337337948172295
Epoch 5, Accuracy: 56.33%
Epoch 6, Time: 10.214428186416626, Loss: 1.0816891618701807
Epoch 6, Accuracy: 56.52%
Epoch 7, Time: 10.193755388259888, Loss: 1.036666795649492
Epoch 7, Accuracy: 57.97%
Epoch 8, Time: 10.31055235862732, Loss: 0.9977014887973171
Epoch 8, Accuracy: 58.45%
Epoch 9, Time: 10.214206218719482, Loss: 0.9610510722298147
Epoch 9, Accuracy: 58.67%
Epoch 10, Time: 10.222529172897339, Loss: 0.9257728132750372
Epoch 10, Accuracy: 59.05%
Epoch 11, Time: 10.142736196517944, Loss: 0.8931420953834758
Epoch 11, Accuracy: 58.18%
Epoch 12, Tim

53.38

In [7]:
# ConvNN N 
ConvNN_N_2 = ConvNN_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_N_2, cifar10.test_loader, device='cuda')


Model: ConvNN_K_N
Num params: 208362

Epoch 1, Time: 10.419870853424072, Loss: 1.745406957995861
Epoch 1, Accuracy: 44.47%
Epoch 2, Time: 10.384969711303711, Loss: 1.524671682311446
Epoch 2, Accuracy: 48.02%
Epoch 3, Time: 10.348228454589844, Loss: 1.4186399723867626
Epoch 3, Accuracy: 49.28%
Epoch 4, Time: 10.47769832611084, Loss: 1.3446311648849332
Epoch 4, Accuracy: 50.88%
Epoch 5, Time: 10.405449867248535, Loss: 1.2910581763900455
Epoch 5, Accuracy: 52.02%
Epoch 6, Time: 10.456016063690186, Loss: 1.2439845411673836
Epoch 6, Accuracy: 52.82%
Epoch 7, Time: 10.412107467651367, Loss: 1.2055831461611306
Epoch 7, Accuracy: 54.16%
Epoch 8, Time: 10.477323770523071, Loss: 1.1716104713089937
Epoch 8, Accuracy: 54.36%
Epoch 9, Time: 10.414530277252197, Loss: 1.1386871770824618
Epoch 9, Accuracy: 55.42%
Epoch 10, Time: 10.404435157775879, Loss: 1.1125293445709112
Epoch 10, Accuracy: 55.9%
Epoch 11, Time: 10.427421569824219, Loss: 1.0843719702090144
Epoch 11, Accuracy: 56.6%
Epoch 12, Time: 1

56.93

In [8]:
# ConvNN Spatial N
ConvNN_Spatial_N_2 = ConvNN_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_Spatial_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_Spatial_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Spatial_N_2, cifar10.test_loader, device='cuda')


Model: ConvNN_Spatial_K_N
Num params: 208362

Epoch 1, Time: 12.75902509689331, Loss: 1.7399301435941321
Epoch 1, Accuracy: 45.38%
Epoch 2, Time: 12.706775426864624, Loss: 1.506068321445104
Epoch 2, Accuracy: 48.57%
Epoch 3, Time: 12.632580757141113, Loss: 1.3930085551403368
Epoch 3, Accuracy: 50.09%
Epoch 4, Time: 12.656312942504883, Loss: 1.315006207855766
Epoch 4, Accuracy: 51.73%
Epoch 5, Time: 12.737659931182861, Loss: 1.2537417560434707
Epoch 5, Accuracy: 51.93%
Epoch 6, Time: 12.61144471168518, Loss: 1.1977359485595733
Epoch 6, Accuracy: 53.21%
Epoch 7, Time: 12.701889514923096, Loss: 1.149713974474641
Epoch 7, Accuracy: 53.16%
Epoch 8, Time: 12.667410373687744, Loss: 1.1022647059024753
Epoch 8, Accuracy: 54.17%
Epoch 9, Time: 12.633203983306885, Loss: 1.0581556085278006
Epoch 9, Accuracy: 54.24%
Epoch 10, Time: 12.639528036117554, Loss: 1.0143020486892642
Epoch 10, Accuracy: 54.59%
Epoch 11, Time: 12.6745445728302, Loss: 0.9755284955434482
Epoch 11, Accuracy: 53.97%
Epoch 12, T

50.22

In [9]:
# ConvNN Attention N
ConvNN_Attn_N_2 = ConvNN_Attn_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_N_2)))
print()


# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_N_2, cifar10.test_loader, device='cuda')


Model: ConvNN_Attn_K_N
Num params: 601578

Epoch 1, Time: 10.826727628707886, Loss: 1.9230630413040786
Epoch 1, Accuracy: 39.46%
Epoch 2, Time: 10.870772361755371, Loss: 1.5911518234730986
Epoch 2, Accuracy: 47.28%
Epoch 3, Time: 10.820856094360352, Loss: 1.462294616205308
Epoch 3, Accuracy: 49.58%
Epoch 4, Time: 10.780829429626465, Loss: 1.3754689827599489
Epoch 4, Accuracy: 52.33%
Epoch 5, Time: 10.787330627441406, Loss: 1.2998386577267171
Epoch 5, Accuracy: 54.56%
Epoch 6, Time: 10.875243186950684, Loss: 1.2368368786161819
Epoch 6, Accuracy: 55.43%
Epoch 7, Time: 10.827059745788574, Loss: 1.1867086781412743
Epoch 7, Accuracy: 56.48%
Epoch 8, Time: 10.841125011444092, Loss: 1.1421585523563882
Epoch 8, Accuracy: 57.41%
Epoch 9, Time: 10.896265745162964, Loss: 1.0991033296603376
Epoch 9, Accuracy: 57.73%
Epoch 10, Time: 10.81866717338562, Loss: 1.0627162044158067
Epoch 10, Accuracy: 57.82%
Epoch 11, Time: 10.950514078140259, Loss: 1.0248320551632006
Epoch 11, Accuracy: 58.57%
Epoch 12,

55.94

#### ii. Branching

In [10]:

# Branching ConvNN All
Branching_ConvNN_All_2 = Branching_ConvNN_K_All(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_All_2.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_All_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_All_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_All_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_All_2, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_K_All
Num params: 212186

Epoch 1, Time: 10.82050895690918, Loss: 1.7413599814295464
Epoch 1, Accuracy: 45.78%
Epoch 2, Time: 10.813725709915161, Loss: 1.4776905856626419
Epoch 2, Accuracy: 49.1%
Epoch 3, Time: 10.80051565170288, Loss: 1.362445660488075
Epoch 3, Accuracy: 51.84%
Epoch 4, Time: 10.82760500907898, Loss: 1.2730943357853024
Epoch 4, Accuracy: 54.46%
Epoch 5, Time: 10.808266401290894, Loss: 1.2057095336182344
Epoch 5, Accuracy: 56.11%
Epoch 6, Time: 10.777215242385864, Loss: 1.156128653479964
Epoch 6, Accuracy: 56.54%
Epoch 7, Time: 10.82368540763855, Loss: 1.1116102197590996
Epoch 7, Accuracy: 57.54%
Epoch 8, Time: 10.829699754714966, Loss: 1.0783728329879243
Epoch 8, Accuracy: 58.17%
Epoch 9, Time: 10.827310562133789, Loss: 1.0429955469372938
Epoch 9, Accuracy: 58.88%
Epoch 10, Time: 10.849241495132446, Loss: 1.0106586867281238
Epoch 10, Accuracy: 59.46%
Epoch 11, Time: 10.857288122177124, Loss: 0.9811244676332644
Epoch 11, Accuracy: 59.61%
Epoch 1

55.48

In [11]:

# Branching ConvNN N
Branching_ConvNN_N_2 = Branching_ConvNN_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_N_2.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_N_2, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_K_N
Num params: 212186

Epoch 1, Time: 11.044896364212036, Loss: 1.7639362112335537
Epoch 1, Accuracy: 44.18%
Epoch 2, Time: 11.035870552062988, Loss: 1.529974512584374
Epoch 2, Accuracy: 47.28%
Epoch 3, Time: 10.997245073318481, Loss: 1.4305426821379406
Epoch 3, Accuracy: 50.37%
Epoch 4, Time: 11.0909583568573, Loss: 1.3660620479175196
Epoch 4, Accuracy: 51.73%
Epoch 5, Time: 10.999750852584839, Loss: 1.3082415066716615
Epoch 5, Accuracy: 52.77%
Epoch 6, Time: 11.056133031845093, Loss: 1.2584676125165446
Epoch 6, Accuracy: 53.82%
Epoch 7, Time: 11.006056785583496, Loss: 1.2103961908146548
Epoch 7, Accuracy: 54.3%
Epoch 8, Time: 11.034668445587158, Loss: 1.168401128038421
Epoch 8, Accuracy: 55.18%
Epoch 9, Time: 10.99562954902649, Loss: 1.1286981198031578
Epoch 9, Accuracy: 56.36%
Epoch 10, Time: 10.976665258407593, Loss: 1.1013295151236113
Epoch 10, Accuracy: 57.68%
Epoch 11, Time: 11.042697429656982, Loss: 1.0657090834339562
Epoch 11, Accuracy: 56.8%
Epoch 12,

57.14

In [12]:

# Branching ConvNN Spatial N
Branching_ConvNN_Spatial_N_2 = Branching_ConvNN_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Spatial_N_2.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Spatial_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Spatial_N_2, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_Spatial_K_N
Num params: 212186

Epoch 1, Time: 13.305484294891357, Loss: 1.7321468114548022
Epoch 1, Accuracy: 45.42%
Epoch 2, Time: 13.363604307174683, Loss: 1.4630243301086718
Epoch 2, Accuracy: 47.81%
Epoch 3, Time: 13.3135085105896, Loss: 1.3804193151271558
Epoch 3, Accuracy: 50.1%
Epoch 4, Time: 13.305439233779907, Loss: 1.3205021426958197
Epoch 4, Accuracy: 52.55%
Epoch 5, Time: 13.31656289100647, Loss: 1.2715706716260642
Epoch 5, Accuracy: 52.92%
Epoch 6, Time: 13.357956409454346, Loss: 1.2298422435970258
Epoch 6, Accuracy: 54.41%
Epoch 7, Time: 13.296377658843994, Loss: 1.1902100700704032
Epoch 7, Accuracy: 55.02%
Epoch 8, Time: 13.307124376296997, Loss: 1.1535810612504134
Epoch 8, Accuracy: 54.65%
Epoch 9, Time: 13.319891929626465, Loss: 1.121829318497187
Epoch 9, Accuracy: 56.13%
Epoch 10, Time: 13.323537826538086, Loss: 1.087837820696404
Epoch 10, Accuracy: 56.27%
Epoch 11, Time: 13.41297698020935, Loss: 1.0557402770994875
Epoch 11, Accuracy: 56.99%
E

54.57

In [13]:

# Branching ConvNN Attention N 
Branching_ConvNN_Attn_N_2 = Branching_ConvNN_Attention_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Attn_N_2.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Attn_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Attn_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Attn_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Attn_N_2, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_Attention_K_N
Num params: 605402

Epoch 1, Time: 11.534321784973145, Loss: 1.8501152007476143
Epoch 1, Accuracy: 42.4%
Epoch 2, Time: 11.569469213485718, Loss: 1.5256061244498738
Epoch 2, Accuracy: 48.37%
Epoch 3, Time: 11.614109754562378, Loss: 1.3859810594402615
Epoch 3, Accuracy: 52.34%
Epoch 4, Time: 11.518414974212646, Loss: 1.293138159739087
Epoch 4, Accuracy: 53.6%
Epoch 5, Time: 11.577592134475708, Loss: 1.224854966563642
Epoch 5, Accuracy: 54.62%
Epoch 6, Time: 11.532033205032349, Loss: 1.1735991206772798
Epoch 6, Accuracy: 56.54%
Epoch 7, Time: 11.515662670135498, Loss: 1.1292454381580548
Epoch 7, Accuracy: 57.92%
Epoch 8, Time: 11.60501480102539, Loss: 1.0846991555770036
Epoch 8, Accuracy: 58.78%
Epoch 9, Time: 11.518913984298706, Loss: 1.0411738232730905
Epoch 9, Accuracy: 59.16%
Epoch 10, Time: 11.52650260925293, Loss: 1.0015871463071964
Epoch 10, Accuracy: 60.15%
Epoch 11, Time: 11.568985223770142, Loss: 0.9664529164886231
Epoch 11, Accuracy: 61.16

61.39

### 4 Layer Models

In [14]:

# CNN
CNN_4 = CNN(num_layers=4, num_classes=10, device='cuda')

print("Model: " + CNN_4.name)
print("Num params: " + str(count_parameters(CNN_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(CNN_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(CNN_4, cifar10.test_loader, device='cuda')


Model: CNN
Num params: 171258

Epoch 1, Time: 9.411397695541382, Loss: 1.7285240246816669
Epoch 1, Accuracy: 46.45%
Epoch 2, Time: 9.29889726638794, Loss: 1.45574289758492
Epoch 2, Accuracy: 49.89%
Epoch 3, Time: 9.36499285697937, Loss: 1.3486643751411487
Epoch 3, Accuracy: 52.2%
Epoch 4, Time: 9.255890130996704, Loss: 1.2855684489697752
Epoch 4, Accuracy: 54.0%
Epoch 5, Time: 9.35406494140625, Loss: 1.239130633109061
Epoch 5, Accuracy: 55.07%
Epoch 6, Time: 9.295070886611938, Loss: 1.2012974799746443
Epoch 6, Accuracy: 55.9%
Epoch 7, Time: 9.704035758972168, Loss: 1.1706987409792897
Epoch 7, Accuracy: 56.21%
Epoch 8, Time: 9.375449180603027, Loss: 1.1411116902175766
Epoch 8, Accuracy: 57.04%
Epoch 9, Time: 9.335826873779297, Loss: 1.1129299653003284
Epoch 9, Accuracy: 57.41%
Epoch 10, Time: 9.313803434371948, Loss: 1.0914931456603663
Epoch 10, Accuracy: 58.57%
Epoch 11, Time: 9.405291318893433, Loss: 1.0629782725478072
Epoch 11, Accuracy: 58.71%
Epoch 12, Time: 9.311059713363647, Loss

55.56

In [15]:

# Attention
Attention_4 = Attention(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Attention_4.name)
print("Num params: " + str(count_parameters(Attention_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Attention_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Attention_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Attention_4, cifar10.test_loader, device='cuda')


Model: Attention
Num params: 244938

Epoch 1, Time: 13.060575485229492, Loss: 2.081164107145861
Epoch 1, Accuracy: 23.7%
Epoch 2, Time: 13.077224016189575, Loss: 1.9815512613567245
Epoch 2, Accuracy: 25.79%
Epoch 3, Time: 13.007584571838379, Loss: 1.9269939404924203
Epoch 3, Accuracy: 27.02%
Epoch 4, Time: 13.058099508285522, Loss: 1.883537719469241
Epoch 4, Accuracy: 28.65%
Epoch 5, Time: 13.059804916381836, Loss: 1.8440610225243337
Epoch 5, Accuracy: 31.45%
Epoch 6, Time: 12.988163232803345, Loss: 1.812133391190063
Epoch 6, Accuracy: 32.89%
Epoch 7, Time: 13.101476669311523, Loss: 1.7916435795976682
Epoch 7, Accuracy: 32.43%
Epoch 8, Time: 13.128085613250732, Loss: 1.77718859224978
Epoch 8, Accuracy: 33.84%
Epoch 9, Time: 12.998828172683716, Loss: 1.764094646019704
Epoch 9, Accuracy: 34.59%
Epoch 10, Time: 13.05693793296814, Loss: 1.7536747018089685
Epoch 10, Accuracy: 34.68%
Epoch 11, Time: 13.110954999923706, Loss: 1.7425747195168224
Epoch 11, Accuracy: 35.96%
Epoch 12, Time: 12.99

43.84

In [16]:
# ConvNN All 
ConvNN_All_4 = ConvNN_K_All(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_All_4.name)
print("Num params: " + str(count_parameters(ConvNN_All_4)))
print()


# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_All_4.parameters(), lr=0.0001)
num_epochs = 100  
train_eval(ConvNN_All_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_All_4, cifar10.test_loader, device='cuda')


Model: ConvNN_K_All
Num params: 282826

Epoch 1, Time: 12.519649505615234, Loss: 1.7236269515798526
Epoch 1, Accuracy: 44.2%
Epoch 2, Time: 12.497701644897461, Loss: 1.4481888546053405
Epoch 2, Accuracy: 51.18%
Epoch 3, Time: 12.46299147605896, Loss: 1.2966134138881702
Epoch 3, Accuracy: 52.6%
Epoch 4, Time: 12.404221534729004, Loss: 1.2037022249473026
Epoch 4, Accuracy: 55.1%
Epoch 5, Time: 12.492671012878418, Loss: 1.1312220725409514
Epoch 5, Accuracy: 55.94%
Epoch 6, Time: 12.434356689453125, Loss: 1.0749836006890172
Epoch 6, Accuracy: 56.15%
Epoch 7, Time: 12.407952547073364, Loss: 1.0283911443884721
Epoch 7, Accuracy: 57.32%
Epoch 8, Time: 12.482184886932373, Loss: 0.9846481774621607
Epoch 8, Accuracy: 56.63%
Epoch 9, Time: 12.370888710021973, Loss: 0.9380636863086534
Epoch 9, Accuracy: 57.87%
Epoch 10, Time: 12.417500495910645, Loss: 0.8988052477007327
Epoch 10, Accuracy: 56.98%
Epoch 11, Time: 12.47435736656189, Loss: 0.8532449073346374
Epoch 11, Accuracy: 57.64%
Epoch 12, Time:

51.16

In [17]:
# ConvNN N 
ConvNN_N_4 = ConvNN_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_N_4, cifar10.test_loader, device='cuda')


Model: ConvNN_K_N
Num params: 282826

Epoch 1, Time: 12.37149977684021, Loss: 1.7944607896268214
Epoch 1, Accuracy: 43.06%
Epoch 2, Time: 12.313930034637451, Loss: 1.588069195485176
Epoch 2, Accuracy: 45.05%
Epoch 3, Time: 12.379019975662231, Loss: 1.4946583783839975
Epoch 3, Accuracy: 48.0%
Epoch 4, Time: 12.336661577224731, Loss: 1.4239532636559529
Epoch 4, Accuracy: 48.57%
Epoch 5, Time: 12.299131631851196, Loss: 1.3677063050020077
Epoch 5, Accuracy: 50.31%
Epoch 6, Time: 12.327710628509521, Loss: 1.3253652733914993
Epoch 6, Accuracy: 51.72%
Epoch 7, Time: 12.301263093948364, Loss: 1.2773237854928312
Epoch 7, Accuracy: 51.01%
Epoch 8, Time: 12.338500738143921, Loss: 1.2445908529526741
Epoch 8, Accuracy: 52.91%
Epoch 9, Time: 12.380045175552368, Loss: 1.2106961067527762
Epoch 9, Accuracy: 53.55%
Epoch 10, Time: 12.259198904037476, Loss: 1.1744412517608585
Epoch 10, Accuracy: 54.14%
Epoch 11, Time: 12.424313068389893, Loss: 1.1482155627911659
Epoch 11, Accuracy: 55.06%
Epoch 12, Time:

56.34

In [18]:
# ConvNN Spatial N
ConvNN_Spatial_N_4 = ConvNN_Spatial_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_Spatial_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_Spatial_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Spatial_N_4, cifar10.test_loader, device='cuda')


Model: ConvNN_Spatial_K_N
Num params: 282826

Epoch 1, Time: 17.09542202949524, Loss: 1.7831205394871705
Epoch 1, Accuracy: 43.67%
Epoch 2, Time: 17.049602270126343, Loss: 1.5427683832700296
Epoch 2, Accuracy: 46.74%
Epoch 3, Time: 17.07875418663025, Loss: 1.4338646528056211
Epoch 3, Accuracy: 48.47%
Epoch 4, Time: 17.047104120254517, Loss: 1.3677962753169066
Epoch 4, Accuracy: 49.72%
Epoch 5, Time: 17.06481432914734, Loss: 1.3087958421396173
Epoch 5, Accuracy: 50.17%
Epoch 6, Time: 17.03537130355835, Loss: 1.2646293406138944
Epoch 6, Accuracy: 50.5%
Epoch 7, Time: 17.04889750480652, Loss: 1.2245055116960764
Epoch 7, Accuracy: 51.88%
Epoch 8, Time: 17.06704592704773, Loss: 1.186126531771077
Epoch 8, Accuracy: 52.07%
Epoch 9, Time: 17.095964431762695, Loss: 1.1489176427007026
Epoch 9, Accuracy: 52.5%
Epoch 10, Time: 17.096144437789917, Loss: 1.1086640888467774
Epoch 10, Accuracy: 52.39%
Epoch 11, Time: 17.001723051071167, Loss: 1.0739461226993814
Epoch 11, Accuracy: 53.06%
Epoch 12, Tim

47.94

In [19]:
# ConvNN Attention N
ConvNN_Attn_N_4 = ConvNN_Attn_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_N_4, cifar10.test_loader, device='cuda')


Model: ConvNN_Attn_K_N
Num params: 1069258

Epoch 1, Time: 12.975575923919678, Loss: 2.1052265080344648
Epoch 1, Accuracy: 26.16%
Epoch 2, Time: 13.045248031616211, Loss: 1.854600947226405
Epoch 2, Accuracy: 30.85%
Epoch 3, Time: 13.044166803359985, Loss: 1.785134963824621
Epoch 3, Accuracy: 35.2%
Epoch 4, Time: 12.978342056274414, Loss: 1.7278149917607417
Epoch 4, Accuracy: 38.01%
Epoch 5, Time: 13.053700923919678, Loss: 1.6595890329926826
Epoch 5, Accuracy: 39.84%
Epoch 6, Time: 13.044067859649658, Loss: 1.6063258721090643
Epoch 6, Accuracy: 42.7%
Epoch 7, Time: 13.004730939865112, Loss: 1.5477413602192382
Epoch 7, Accuracy: 44.46%
Epoch 8, Time: 13.063765048980713, Loss: 1.4972705838015623
Epoch 8, Accuracy: 45.15%
Epoch 9, Time: 13.080734968185425, Loss: 1.4506422026687875
Epoch 9, Accuracy: 47.6%
Epoch 10, Time: 12.966824769973755, Loss: 1.4051055833506767
Epoch 10, Accuracy: 48.68%
Epoch 11, Time: 13.001868724822998, Loss: 1.3643979324251794
Epoch 11, Accuracy: 50.22%
Epoch 12, T

55.26

#### ii. Branching

In [20]:
# Branching ConvNN All
Branching_ConvNN_All_4 = Branching_ConvNN_K_All(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_All_4.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_All_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_All_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_All_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_All_4, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_K_All
Num params: 292346

Epoch 1, Time: 13.5658278465271, Loss: 1.852853455506932
Epoch 1, Accuracy: 40.27%
Epoch 2, Time: 13.522315740585327, Loss: 1.58052790607028
Epoch 2, Accuracy: 46.43%
Epoch 3, Time: 13.53363561630249, Loss: 1.4461782387150524
Epoch 3, Accuracy: 49.22%
Epoch 4, Time: 13.571393966674805, Loss: 1.3684175534321523
Epoch 4, Accuracy: 50.83%
Epoch 5, Time: 13.485316753387451, Loss: 1.3063685163055234
Epoch 5, Accuracy: 51.68%
Epoch 6, Time: 13.500327110290527, Loss: 1.2569658266156531
Epoch 6, Accuracy: 52.66%
Epoch 7, Time: 13.47579836845398, Loss: 1.2112999177344925
Epoch 7, Accuracy: 53.7%
Epoch 8, Time: 13.46775197982788, Loss: 1.171588484054941
Epoch 8, Accuracy: 54.44%
Epoch 9, Time: 13.477111339569092, Loss: 1.128583028798213
Epoch 9, Accuracy: 55.86%
Epoch 10, Time: 13.48945426940918, Loss: 1.088348383641304
Epoch 10, Accuracy: 56.5%
Epoch 11, Time: 13.509267330169678, Loss: 1.0484675775708445
Epoch 11, Accuracy: 57.27%
Epoch 12, Time

54.15

In [21]:
# Branching ConvNN N
Branching_ConvNN_N_4 = Branching_ConvNN_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_N_4.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_N_4, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_K_N
Num params: 292346

Epoch 1, Time: 13.5924232006073, Loss: 1.9020026007576671
Epoch 1, Accuracy: 37.81%
Epoch 2, Time: 13.660791635513306, Loss: 1.6756119087833883
Epoch 2, Accuracy: 43.12%
Epoch 3, Time: 13.677706003189087, Loss: 1.5618313053989654
Epoch 3, Accuracy: 46.43%
Epoch 4, Time: 13.560994625091553, Loss: 1.480591014370589
Epoch 4, Accuracy: 46.89%
Epoch 5, Time: 13.655873537063599, Loss: 1.4157902606003119
Epoch 5, Accuracy: 50.25%
Epoch 6, Time: 13.537368297576904, Loss: 1.3539513525603069
Epoch 6, Accuracy: 51.51%
Epoch 7, Time: 13.565556049346924, Loss: 1.3040816962261639
Epoch 7, Accuracy: 53.13%
Epoch 8, Time: 13.546173572540283, Loss: 1.2485992765944938
Epoch 8, Accuracy: 53.91%
Epoch 9, Time: 13.640750408172607, Loss: 1.2080453567187805
Epoch 9, Accuracy: 54.28%
Epoch 10, Time: 13.603673219680786, Loss: 1.1660564075345579
Epoch 10, Accuracy: 54.29%
Epoch 11, Time: 13.586263418197632, Loss: 1.1313960380718837
Epoch 11, Accuracy: 55.69%
Epoch

57.38

In [22]:
# Branching ConvNN Spatial N
Branching_ConvNN_Spatial_N_4 = Branching_ConvNN_Spatial_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Spatial_N_4.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Spatial_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Spatial_N_4, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_Spatial_K_N
Num params: 292346

Epoch 1, Time: 18.56782054901123, Loss: 1.8955026723234856
Epoch 1, Accuracy: 39.52%
Epoch 2, Time: 18.4909245967865, Loss: 1.6414695151931489
Epoch 2, Accuracy: 44.02%
Epoch 3, Time: 18.476143836975098, Loss: 1.5228545926416013
Epoch 3, Accuracy: 48.02%
Epoch 4, Time: 18.505168437957764, Loss: 1.4282489951004458
Epoch 4, Accuracy: 49.32%
Epoch 5, Time: 18.49952244758606, Loss: 1.360961976334872
Epoch 5, Accuracy: 51.54%
Epoch 6, Time: 18.44394588470459, Loss: 1.3028453134209907
Epoch 6, Accuracy: 50.31%
Epoch 7, Time: 18.458922386169434, Loss: 1.253053999389224
Epoch 7, Accuracy: 52.76%
Epoch 8, Time: 18.484856128692627, Loss: 1.2060706773987206
Epoch 8, Accuracy: 54.11%
Epoch 9, Time: 18.47229528427124, Loss: 1.166204820584763
Epoch 9, Accuracy: 54.27%
Epoch 10, Time: 18.45161509513855, Loss: 1.1290935888467237
Epoch 10, Accuracy: 55.19%
Epoch 11, Time: 18.47714138031006, Loss: 1.0963238809267273
Epoch 11, Accuracy: 55.85%
Epoch

54.68

In [23]:
# Branching ConvNN Attention N 
Branching_ConvNN_Attn_N_4 = Branching_ConvNN_Attention_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Attn_N_4.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Attn_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Attn_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Attn_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Attn_N_4, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_Attention_K_N
Num params: 1078778

Epoch 1, Time: 14.401794195175171, Loss: 1.9758076556503315
Epoch 1, Accuracy: 40.47%
Epoch 2, Time: 14.355144262313843, Loss: 1.6081038181434202
Epoch 2, Accuracy: 46.43%
Epoch 3, Time: 14.38712477684021, Loss: 1.4642822038182213
Epoch 3, Accuracy: 49.32%
Epoch 4, Time: 14.395912408828735, Loss: 1.3680021703395697
Epoch 4, Accuracy: 51.44%
Epoch 5, Time: 14.379359006881714, Loss: 1.296820192538259
Epoch 5, Accuracy: 53.03%
Epoch 6, Time: 14.3293776512146, Loss: 1.2363335794347632
Epoch 6, Accuracy: 54.58%
Epoch 7, Time: 14.34569764137268, Loss: 1.1835466347387076
Epoch 7, Accuracy: 54.87%
Epoch 8, Time: 14.416313171386719, Loss: 1.1323179428839623
Epoch 8, Accuracy: 55.28%
Epoch 9, Time: 14.406411409378052, Loss: 1.0858629595897997
Epoch 9, Accuracy: 55.74%
Epoch 10, Time: 14.398784637451172, Loss: 1.0368567590823259
Epoch 10, Accuracy: 56.65%
Epoch 11, Time: 14.34460711479187, Loss: 0.9894329128820268
Epoch 11, Accuracy: 56.7

56.06

### 8 Layer Models

In [24]:

# CNN
CNN_8 = CNN(num_layers=8, num_classes=10, device='cuda')

print("Model: " + CNN_8.name)
print("Num params: " + str(count_parameters(CNN_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(CNN_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(CNN_8, cifar10.test_loader, device='cuda')


Model: CNN
Num params: 180538

Epoch 1, Time: 9.855607986450195, Loss: 1.9025056793561677
Epoch 1, Accuracy: 44.25%
Epoch 2, Time: 9.918110132217407, Loss: 1.481844261784078
Epoch 2, Accuracy: 49.06%
Epoch 3, Time: 9.869308471679688, Loss: 1.3579369602758256
Epoch 3, Accuracy: 52.34%
Epoch 4, Time: 9.89134669303894, Loss: 1.272978605974056
Epoch 4, Accuracy: 53.95%
Epoch 5, Time: 9.836044788360596, Loss: 1.206574300594647
Epoch 5, Accuracy: 56.44%
Epoch 6, Time: 9.838967561721802, Loss: 1.1541531600458237
Epoch 6, Accuracy: 57.25%
Epoch 7, Time: 9.814980268478394, Loss: 1.1063483911554526
Epoch 7, Accuracy: 58.22%
Epoch 8, Time: 9.847747802734375, Loss: 1.0666150791413338
Epoch 8, Accuracy: 58.5%
Epoch 9, Time: 9.845415830612183, Loss: 1.0301593171666041
Epoch 9, Accuracy: 59.6%
Epoch 10, Time: 9.832460880279541, Loss: 0.9986855423511447
Epoch 10, Accuracy: 59.78%
Epoch 11, Time: 9.822374105453491, Loss: 0.9623058598364711
Epoch 11, Accuracy: 59.69%
Epoch 12, Time: 9.812793254852295, L

54.16

In [25]:

# Attention
Attention_8 = Attention(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Attention_8.name)
print("Num params: " + str(count_parameters(Attention_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Attention_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Attention_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Attention_8, cifar10.test_loader, device='cuda')


Model: Attention
Num params: 329354

Epoch 1, Time: 17.08330798149109, Loss: 2.1498796273680294
Epoch 1, Accuracy: 24.13%
Epoch 2, Time: 17.089473962783813, Loss: 1.9964493298164718
Epoch 2, Accuracy: 26.27%
Epoch 3, Time: 17.066333293914795, Loss: 1.9677907844333697
Epoch 3, Accuracy: 25.65%
Epoch 4, Time: 17.094459056854248, Loss: 1.9566807342917107
Epoch 4, Accuracy: 27.26%
Epoch 5, Time: 17.045637845993042, Loss: 1.9457023229135577
Epoch 5, Accuracy: 26.78%
Epoch 6, Time: 17.037859439849854, Loss: 1.9391471229855666
Epoch 6, Accuracy: 27.28%
Epoch 7, Time: 17.047289848327637, Loss: 1.9349905208248617
Epoch 7, Accuracy: 27.87%
Epoch 8, Time: 17.037007093429565, Loss: 1.9328029259391453
Epoch 8, Accuracy: 27.19%
Epoch 9, Time: 17.03754711151123, Loss: 1.9275074581356
Epoch 9, Accuracy: 27.9%
Epoch 10, Time: 17.032230854034424, Loss: 1.9223835114627847
Epoch 10, Accuracy: 27.11%
Epoch 11, Time: 17.085395336151123, Loss: 1.9161288643736973
Epoch 11, Accuracy: 27.44%
Epoch 12, Time: 17.

39.47

In [26]:
# ConvNN All 
ConvNN_All_8 = ConvNN_K_All(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_All_8.name)
print("Num params: " + str(count_parameters(ConvNN_All_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_All_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_All_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_All_8, cifar10.test_loader, device='cuda')


Model: ConvNN_K_All
Num params: 431754

Epoch 1, Time: 17.332510232925415, Loss: 1.8383276948843466
Epoch 1, Accuracy: 43.05%
Epoch 2, Time: 17.189846515655518, Loss: 1.5251811762599994
Epoch 2, Accuracy: 48.48%
Epoch 3, Time: 17.157214879989624, Loss: 1.3639150504261026
Epoch 3, Accuracy: 51.34%
Epoch 4, Time: 17.160196781158447, Loss: 1.279626542802357
Epoch 4, Accuracy: 52.87%
Epoch 5, Time: 17.188408374786377, Loss: 1.2180892401339147
Epoch 5, Accuracy: 53.7%
Epoch 6, Time: 17.18293595314026, Loss: 1.1699190233522059
Epoch 6, Accuracy: 53.88%
Epoch 7, Time: 17.147879123687744, Loss: 1.1342971792916203
Epoch 7, Accuracy: 54.17%
Epoch 8, Time: 17.15701127052307, Loss: 1.097041347981109
Epoch 8, Accuracy: 54.1%
Epoch 9, Time: 17.131885766983032, Loss: 1.0658443102141475
Epoch 9, Accuracy: 55.46%
Epoch 10, Time: 17.102141618728638, Loss: 1.0326122119451118
Epoch 10, Accuracy: 55.76%
Epoch 11, Time: 17.147902965545654, Loss: 0.9975586811752271
Epoch 11, Accuracy: 55.71%
Epoch 12, Time: 

49.89

In [27]:
# ConvNN N 
ConvNN_N_8 = ConvNN_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_N_8, cifar10.test_loader, device='cuda')


Model: ConvNN_K_N
Num params: 431754

Epoch 1, Time: 16.20113492012024, Loss: 1.906471816932454
Epoch 1, Accuracy: 38.88%
Epoch 2, Time: 16.223448991775513, Loss: 1.6765319759888417
Epoch 2, Accuracy: 40.87%
Epoch 3, Time: 16.2391414642334, Loss: 1.5756457669045918
Epoch 3, Accuracy: 44.48%
Epoch 4, Time: 16.280445337295532, Loss: 1.5076517314862108
Epoch 4, Accuracy: 45.02%
Epoch 5, Time: 16.254319667816162, Loss: 1.4589817311300342
Epoch 5, Accuracy: 48.2%
Epoch 6, Time: 16.230822801589966, Loss: 1.4203247781604758
Epoch 6, Accuracy: 47.88%
Epoch 7, Time: 16.21392250061035, Loss: 1.383765828960082
Epoch 7, Accuracy: 49.14%
Epoch 8, Time: 16.138346433639526, Loss: 1.3585506541954586
Epoch 8, Accuracy: 49.54%
Epoch 9, Time: 16.21010661125183, Loss: 1.3288691926490315
Epoch 9, Accuracy: 50.73%
Epoch 10, Time: 16.23798418045044, Loss: 1.3038517888397207
Epoch 10, Accuracy: 51.11%
Epoch 11, Time: 16.205057859420776, Loss: 1.284060780654478
Epoch 11, Accuracy: 50.23%
Epoch 12, Time: 16.234

56.95

In [28]:
# ConvNN Spatial N
ConvNN_Spatial_N_8 = ConvNN_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_Spatial_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_Spatial_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Spatial_N_8, cifar10.test_loader, device='cuda')


Model: ConvNN_Spatial_K_N
Num params: 431754

Epoch 1, Time: 26.43557047843933, Loss: 1.8975181395135572
Epoch 1, Accuracy: 38.43%
Epoch 2, Time: 26.35677218437195, Loss: 1.6821536418726988
Epoch 2, Accuracy: 42.01%
Epoch 3, Time: 26.401371240615845, Loss: 1.5824027044693831
Epoch 3, Accuracy: 45.7%
Epoch 4, Time: 26.377134084701538, Loss: 1.5000085589830832
Epoch 4, Accuracy: 44.89%
Epoch 5, Time: 26.334394216537476, Loss: 1.4411777512496695
Epoch 5, Accuracy: 47.32%
Epoch 6, Time: 26.291471004486084, Loss: 1.3912783930521182
Epoch 6, Accuracy: 48.58%
Epoch 7, Time: 26.32463026046753, Loss: 1.3501555345704794
Epoch 7, Accuracy: 49.09%
Epoch 8, Time: 26.294374465942383, Loss: 1.3092825934862542
Epoch 8, Accuracy: 50.07%
Epoch 9, Time: 26.36677384376526, Loss: 1.2772374834550921
Epoch 9, Accuracy: 50.24%
Epoch 10, Time: 26.27069616317749, Loss: 1.2424285345522643
Epoch 10, Accuracy: 50.31%
Epoch 11, Time: 26.23786759376526, Loss: 1.211384786897913
Epoch 11, Accuracy: 51.92%
Epoch 12, Ti

46.88

In [29]:
# ConvNN Attention N
ConvNN_Attn_N_8 = ConvNN_Attn_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_N_8)))  
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_N_8, cifar10.test_loader, device='cuda')


Model: ConvNN_Attn_K_N
Num params: 2004618

Epoch 1, Time: 17.730464458465576, Loss: 2.303244354779763
Epoch 1, Accuracy: 9.89%
Epoch 2, Time: 17.74323558807373, Loss: 2.3026200752429036
Epoch 2, Accuracy: 10.35%
Epoch 3, Time: 17.722782611846924, Loss: 2.3026442783872794
Epoch 3, Accuracy: 10.14%
Epoch 4, Time: 17.704108238220215, Loss: 2.302634682191912
Epoch 4, Accuracy: 9.74%
Epoch 5, Time: 17.711208820343018, Loss: 2.302614453503543
Epoch 5, Accuracy: 10.18%
Epoch 6, Time: 17.644155502319336, Loss: 2.1172068823329018
Epoch 6, Accuracy: 21.39%
Epoch 7, Time: 17.67440152168274, Loss: 2.0077957940833344
Epoch 7, Accuracy: 24.11%
Epoch 8, Time: 17.691233158111572, Loss: 1.954183894959862
Epoch 8, Accuracy: 25.11%
Epoch 9, Time: 17.682640075683594, Loss: 1.901059565641691
Epoch 9, Accuracy: 26.4%
Epoch 10, Time: 17.674365043640137, Loss: 1.8621844170648423
Epoch 10, Accuracy: 28.51%
Epoch 11, Time: 17.650773763656616, Loss: 1.8310270211885653
Epoch 11, Accuracy: 29.74%
Epoch 12, Time: 

51.74

#### ii. Branching

In [30]:
# Branching ConvNN All
Branching_ConvNN_All_8 = Branching_ConvNN_K_All(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_All_8.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_All_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_All_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_All_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_All_8, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_K_All
Num params: 452666

Epoch 1, Time: 19.412723541259766, Loss: 1.9567141563386259
Epoch 1, Accuracy: 40.47%
Epoch 2, Time: 19.239327430725098, Loss: 1.6197774611470643
Epoch 2, Accuracy: 43.75%
Epoch 3, Time: 19.23990249633789, Loss: 1.5048373425403214
Epoch 3, Accuracy: 46.46%
Epoch 4, Time: 19.188838481903076, Loss: 1.4182720306279408
Epoch 4, Accuracy: 48.68%
Epoch 5, Time: 19.18712091445923, Loss: 1.338855461010238
Epoch 5, Accuracy: 49.77%
Epoch 6, Time: 19.19215750694275, Loss: 1.2625008780328209
Epoch 6, Accuracy: 52.13%
Epoch 7, Time: 19.163695812225342, Loss: 1.1969298641852406
Epoch 7, Accuracy: 53.46%
Epoch 8, Time: 19.104652881622314, Loss: 1.136238768887337
Epoch 8, Accuracy: 54.07%
Epoch 9, Time: 19.113155841827393, Loss: 1.0820027354275783
Epoch 9, Accuracy: 55.28%
Epoch 10, Time: 19.079179048538208, Loss: 1.0337450375489872
Epoch 10, Accuracy: 54.54%
Epoch 11, Time: 19.11572790145874, Loss: 0.9903117330635295
Epoch 11, Accuracy: 54.97%
Epoch 

51.56

In [31]:
# Branching ConvNN N
Branching_ConvNN_N_8 = Branching_ConvNN_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_N_8.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_N_8, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_K_N
Num params: 452666

Epoch 1, Time: 18.900405406951904, Loss: 2.059544919244469
Epoch 1, Accuracy: 33.35%
Epoch 2, Time: 18.98857808113098, Loss: 1.7815393782637614
Epoch 2, Accuracy: 40.28%
Epoch 3, Time: 18.841235876083374, Loss: 1.607883637518529
Epoch 3, Accuracy: 44.58%
Epoch 4, Time: 18.938177585601807, Loss: 1.5127042370379125
Epoch 4, Accuracy: 46.63%
Epoch 5, Time: 18.94485092163086, Loss: 1.4447479708420345
Epoch 5, Accuracy: 48.84%
Epoch 6, Time: 18.966708183288574, Loss: 1.377014492173939
Epoch 6, Accuracy: 49.54%
Epoch 7, Time: 18.856157064437866, Loss: 1.3229994729656698
Epoch 7, Accuracy: 51.91%
Epoch 8, Time: 18.85072922706604, Loss: 1.2722017912151258
Epoch 8, Accuracy: 53.32%
Epoch 9, Time: 18.927858591079712, Loss: 1.2140401738226567
Epoch 9, Accuracy: 53.77%
Epoch 10, Time: 18.927599668502808, Loss: 1.1745766797638915
Epoch 10, Accuracy: 55.0%
Epoch 11, Time: 18.94946527481079, Loss: 1.1239151932546854
Epoch 11, Accuracy: 55.94%
Epoch 12, 

53.5

In [32]:
# Branching ConvNN Spatial N
Branching_ConvNN_Spatial_N_8 = Branching_ConvNN_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Spatial_N_8.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Spatial_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Spatial_N_8, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_Spatial_K_N
Num params: 452666

Epoch 1, Time: 29.301878690719604, Loss: 2.0140679865846853
Epoch 1, Accuracy: 37.26%
Epoch 2, Time: 29.275882959365845, Loss: 1.7108328993363149
Epoch 2, Accuracy: 41.6%
Epoch 3, Time: 29.219414472579956, Loss: 1.5867718490188385
Epoch 3, Accuracy: 44.18%
Epoch 4, Time: 29.15677261352539, Loss: 1.4968896605779447
Epoch 4, Accuracy: 45.51%
Epoch 5, Time: 29.152565240859985, Loss: 1.43032564257112
Epoch 5, Accuracy: 47.55%
Epoch 6, Time: 29.175868272781372, Loss: 1.3744162188466553
Epoch 6, Accuracy: 49.99%
Epoch 7, Time: 29.171180963516235, Loss: 1.3196497183779012
Epoch 7, Accuracy: 51.16%
Epoch 8, Time: 29.266093730926514, Loss: 1.2681402685239798
Epoch 8, Accuracy: 51.55%
Epoch 9, Time: 29.17015314102173, Loss: 1.2151921255814144
Epoch 9, Accuracy: 52.25%
Epoch 10, Time: 29.218697786331177, Loss: 1.169948922779859
Epoch 10, Accuracy: 53.69%
Epoch 11, Time: 29.457860231399536, Loss: 1.1180735875273604
Epoch 11, Accuracy: 53.28%


53.67

In [33]:
# Branching ConvNN Attention N 
Branching_ConvNN_Attn_N_8 = Branching_ConvNN_Attention_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Attn_N_8.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Attn_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Attn_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Attn_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Attn_N_8, cifar10.test_loader, device='cuda')


Model: Branching_ConvNN_Attention_K_N
Num params: 2025530

Epoch 1, Time: 20.42871642112732, Loss: 2.191744473126843
Epoch 1, Accuracy: 21.03%
Epoch 2, Time: 20.425170421600342, Loss: 1.9678860535402127
Epoch 2, Accuracy: 23.65%
Epoch 3, Time: 20.45176339149475, Loss: 1.9127227982596668
Epoch 3, Accuracy: 25.12%
Epoch 4, Time: 20.414900541305542, Loss: 1.8592942561334966
Epoch 4, Accuracy: 28.61%
Epoch 5, Time: 20.369629621505737, Loss: 1.80834724302487
Epoch 5, Accuracy: 31.16%
Epoch 6, Time: 20.38279914855957, Loss: 1.76850943126337
Epoch 6, Accuracy: 34.12%
Epoch 7, Time: 20.38697385787964, Loss: 1.71662865453364
Epoch 7, Accuracy: 36.66%
Epoch 8, Time: 20.431525945663452, Loss: 1.644626538924244
Epoch 8, Accuracy: 37.93%
Epoch 9, Time: 20.407214403152466, Loss: 1.5875407564060768
Epoch 9, Accuracy: 41.06%
Epoch 10, Time: 20.373979330062866, Loss: 1.5349118915360298
Epoch 10, Accuracy: 43.4%
Epoch 11, Time: 20.432059288024902, Loss: 1.489105942304177
Epoch 11, Accuracy: 43.39%
Epoch

58.78