# ConvNN Attention Test *** TABLE 2 IN LATEX
## I. 2D Training for testing with CIFAR10 Dataset

In [1]:
# Torch
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch import optim 


# Train + Data 
import sys 
sys.path.append('../Layers')
from Conv1d_NN_spatial import * 
from Conv2d_NN_spatial import * 

sys.path.append('../Data')
from CIFAR10 import * 


sys.path.append('../Models')
from CIFAR_experiment_models.Attention import Attention
from CIFAR_experiment_models.Branching import B_Conv2d_ConvNN_K_All, B_Conv2d_ConvNN_K_N, B_Conv2d_ConvNN_Spatial_K_N, B_Conv2d_ConvNN_Attn_K_N, B_Conv2d_ConvNN_Attn_Spatial_K_N, B_Attention_ConvNN_K_All, B_Attention_ConvNN_K_N, B_Attention_ConvNN_Spatial_K_N, B_Attention_ConvNN_Attn_K_N, B_Attention_ConvNN_Attn_Spatial_K_N, B_Attention_Conv2d

from CIFAR_experiment_models.ConvNN import ConvNN_K_All,ConvNN_K_N, ConvNN_Spatial_K_N, ConvNN_Attn_K_N, ConvNN_Attn_Spatial_K_N
from CIFAR_experiment_models.CNN_Control import CNN


sys.path.append('../Train')
from train2d import train_eval, evaluate_accuracy




In [2]:
cifar10 = CIFAR10()

Files already downloaded and verified
Files already downloaded and verified


In [3]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


### 2 Layer Models

In [4]:

# CNN
CNN_2 = CNN(num_layers=2, num_classes=10, device='cuda')

print("Model: " + CNN_2.name)
print("Num params: " + str(count_parameters(CNN_2)))
print()
# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(CNN_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(CNN_2, cifar10.test_loader, device='cuda')


Model: CNN
Num params: 166618

Epoch 1, Time: 9.929912090301514, Loss: 1.6776375930632472
Epoch 1, Accuracy: 47.56%
Epoch 2, Time: 9.433855056762695, Loss: 1.3934514211571736
Epoch 2, Accuracy: 52.85%
Epoch 3, Time: 9.521843433380127, Loss: 1.2868897377530022
Epoch 3, Accuracy: 54.05%
Epoch 4, Time: 9.436091184616089, Loss: 1.2206181579691064
Epoch 4, Accuracy: 55.74%
Epoch 5, Time: 9.513452529907227, Loss: 1.1733492061762554
Epoch 5, Accuracy: 56.8%
Epoch 6, Time: 9.444585084915161, Loss: 1.129993718222279
Epoch 6, Accuracy: 57.88%
Epoch 7, Time: 9.529327869415283, Loss: 1.0921663059602917
Epoch 7, Accuracy: 57.88%
Epoch 8, Time: 9.509526252746582, Loss: 1.055980640908946
Epoch 8, Accuracy: 59.03%
Epoch 9, Time: 9.513754606246948, Loss: 1.0217629460727466
Epoch 9, Accuracy: 59.57%
Epoch 10, Time: 9.379053592681885, Loss: 0.9903096131351597
Epoch 10, Accuracy: 60.29%
Epoch 11, Time: 9.44761610031128, Loss: 0.9623515347538092
Epoch 11, Accuracy: 59.73%
Epoch 12, Time: 9.544854402542114,

56.71

In [5]:
# Attention
Attention_2 = Attention(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Attention_2.name)
print("Num params: " + str(count_parameters(Attention_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Attention_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Attention_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Attention_2, cifar10.test_loader, device='cuda')


Model: Attention
Num params: 202730

Epoch 1, Time: 13.62110948562622, Loss: 1.976603442903065
Epoch 1, Accuracy: 30.83%
Epoch 2, Time: 13.645280599594116, Loss: 1.8180294597850126
Epoch 2, Accuracy: 36.66%
Epoch 3, Time: 13.633504390716553, Loss: 1.6766212299046919
Epoch 3, Accuracy: 41.6%
Epoch 4, Time: 13.523088932037354, Loss: 1.5842826503621952
Epoch 4, Accuracy: 43.94%
Epoch 5, Time: 13.576640367507935, Loss: 1.51634542259109
Epoch 5, Accuracy: 46.01%
Epoch 6, Time: 13.602166175842285, Loss: 1.4653882382775816
Epoch 6, Accuracy: 47.5%
Epoch 7, Time: 13.557263851165771, Loss: 1.4283329603617148
Epoch 7, Accuracy: 47.96%
Epoch 8, Time: 13.584348201751709, Loss: 1.3941730000173953
Epoch 8, Accuracy: 47.68%
Epoch 9, Time: 13.645338773727417, Loss: 1.368415098437263
Epoch 9, Accuracy: 49.88%
Epoch 10, Time: 13.625293016433716, Loss: 1.342715308489397
Epoch 10, Accuracy: 50.03%
Epoch 11, Time: 13.792454481124878, Loss: 1.3203978341101381
Epoch 11, Accuracy: 49.85%
Epoch 12, Time: 13.61

45.72

In [6]:
# ConvNN All 
ConvNN_All_2 = ConvNN_K_All(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_All_2.name)
print("Num params: " + str(count_parameters(ConvNN_All_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_All_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_All_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_All_2, cifar10.test_loader, device='cuda')


Model: ConvNN_K_All
Num params: 208362

Epoch 1, Time: 10.990770816802979, Loss: 1.6772492738331066
Epoch 1, Accuracy: 47.39%
Epoch 2, Time: 10.88559603691101, Loss: 1.388473522022862
Epoch 2, Accuracy: 51.23%
Epoch 3, Time: 10.917341709136963, Loss: 1.2670931001301007
Epoch 3, Accuracy: 53.88%
Epoch 4, Time: 10.88235878944397, Loss: 1.1897382931331235
Epoch 4, Accuracy: 55.42%
Epoch 5, Time: 10.981065034866333, Loss: 1.1296552351825988
Epoch 5, Accuracy: 56.61%
Epoch 6, Time: 10.900222063064575, Loss: 1.0829751890181276
Epoch 6, Accuracy: 56.11%
Epoch 7, Time: 10.94907832145691, Loss: 1.0350721820693491
Epoch 7, Accuracy: 57.78%
Epoch 8, Time: 10.904703378677368, Loss: 0.9918388762437474
Epoch 8, Accuracy: 58.19%
Epoch 9, Time: 10.922194004058838, Loss: 0.9551750334632366
Epoch 9, Accuracy: 59.02%
Epoch 10, Time: 10.958358526229858, Loss: 0.9172295960776337
Epoch 10, Accuracy: 58.3%
Epoch 11, Time: 10.904813289642334, Loss: 0.8835436752080308
Epoch 11, Accuracy: 58.91%
Epoch 12, Time:

53.12

In [7]:
# ConvNN N 
ConvNN_N_2 = ConvNN_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_N_2, cifar10.test_loader, device='cuda')


Model: ConvNN_K_N
Num params: 208362

Epoch 1, Time: 10.869735717773438, Loss: 1.753354160986898
Epoch 1, Accuracy: 43.09%
Epoch 2, Time: 10.774229049682617, Loss: 1.5489984055614228
Epoch 2, Accuracy: 46.56%
Epoch 3, Time: 10.800350904464722, Loss: 1.4586215295145273
Epoch 3, Accuracy: 47.86%
Epoch 4, Time: 10.83734130859375, Loss: 1.3851885158387596
Epoch 4, Accuracy: 50.36%
Epoch 5, Time: 10.774821758270264, Loss: 1.3269096881989628
Epoch 5, Accuracy: 52.07%
Epoch 6, Time: 10.813143491744995, Loss: 1.277229668005653
Epoch 6, Accuracy: 52.31%
Epoch 7, Time: 10.789751291275024, Loss: 1.2349835208919653
Epoch 7, Accuracy: 53.38%
Epoch 8, Time: 10.782684087753296, Loss: 1.1977486844410372
Epoch 8, Accuracy: 54.04%
Epoch 9, Time: 10.857162475585938, Loss: 1.1579350159143853
Epoch 9, Accuracy: 55.22%
Epoch 10, Time: 10.807456731796265, Loss: 1.128784360254512
Epoch 10, Accuracy: 55.98%
Epoch 11, Time: 10.75136947631836, Loss: 1.0973646821420822
Epoch 11, Accuracy: 56.44%
Epoch 12, Time: 1

56.81

In [8]:
# ConvNN Spatial N
ConvNN_Spatial_N_2 = ConvNN_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_Spatial_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_Spatial_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Spatial_N_2, cifar10.test_loader, device='cuda')


Model: ConvNN_Spatial_K_N
Num params: 208362

Epoch 1, Time: 12.89163613319397, Loss: 1.7338953621857
Epoch 1, Accuracy: 43.0%
Epoch 2, Time: 12.954474687576294, Loss: 1.5122488231000388
Epoch 2, Accuracy: 47.0%
Epoch 3, Time: 12.95904803276062, Loss: 1.411323207418632
Epoch 3, Accuracy: 49.31%
Epoch 4, Time: 12.86198091506958, Loss: 1.3350789267998522
Epoch 4, Accuracy: 50.84%
Epoch 5, Time: 12.92915153503418, Loss: 1.2711789317600561
Epoch 5, Accuracy: 50.65%
Epoch 6, Time: 12.837065935134888, Loss: 1.2155742242817988
Epoch 6, Accuracy: 53.04%
Epoch 7, Time: 12.981858730316162, Loss: 1.1685967964437
Epoch 7, Accuracy: 53.55%
Epoch 8, Time: 12.885660648345947, Loss: 1.1248726557435282
Epoch 8, Accuracy: 53.83%
Epoch 9, Time: 12.752533197402954, Loss: 1.0836800138663758
Epoch 9, Accuracy: 54.59%
Epoch 10, Time: 12.792362928390503, Loss: 1.0399947652731405
Epoch 10, Accuracy: 54.67%
Epoch 11, Time: 12.821062803268433, Loss: 1.001406570072369
Epoch 11, Accuracy: 54.83%
Epoch 12, Time: 12

49.92

In [9]:
# ConvNN Attention N
ConvNN_Attn_N_2 = ConvNN_Attn_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_N_2)))
print()


# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_N_2, cifar10.test_loader, device='cuda')


Model: ConvNN_Attn_K_N
Num params: 601578

Epoch 1, Time: 11.313341617584229, Loss: 1.9540260652142107
Epoch 1, Accuracy: 38.47%
Epoch 2, Time: 11.286083459854126, Loss: 1.6209518859148635
Epoch 2, Accuracy: 46.0%
Epoch 3, Time: 11.264748573303223, Loss: 1.4829907638337605
Epoch 3, Accuracy: 49.19%
Epoch 4, Time: 11.309548377990723, Loss: 1.3927658479232008
Epoch 4, Accuracy: 51.2%
Epoch 5, Time: 11.22412395477295, Loss: 1.3130912792957044
Epoch 5, Accuracy: 54.0%
Epoch 6, Time: 11.359054327011108, Loss: 1.2477152731717396
Epoch 6, Accuracy: 54.97%
Epoch 7, Time: 11.317809581756592, Loss: 1.1895526133839736
Epoch 7, Accuracy: 55.61%
Epoch 8, Time: 11.252662897109985, Loss: 1.1384979173198075
Epoch 8, Accuracy: 56.45%
Epoch 9, Time: 11.203557252883911, Loss: 1.0912957274547928
Epoch 9, Accuracy: 56.97%
Epoch 10, Time: 11.144129753112793, Loss: 1.0505669541523586
Epoch 10, Accuracy: 57.56%
Epoch 11, Time: 11.238753080368042, Loss: 1.0107298329510652
Epoch 11, Accuracy: 57.49%
Epoch 12, T

54.66

In [12]:
# ConvNN Attention Spatial N
ConvNN_Attn_Spatial_N_2 = ConvNN_Attn_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_Spatial_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_Spatial_N_2)))
print()


# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_Spatial_N_2, cifar10.test_loader, device='cuda')


Model: ConvNN_Attn_Spatial_K_N
Num params: 478698

Epoch 1, Time: 11.457454204559326, Loss: 1.8507177485224535
Epoch 1, Accuracy: 42.93%
Epoch 2, Time: 11.507236957550049, Loss: 1.5454313909001363
Epoch 2, Accuracy: 48.35%
Epoch 3, Time: 11.430986881256104, Loss: 1.4176900635290024
Epoch 3, Accuracy: 51.39%
Epoch 4, Time: 11.611697673797607, Loss: 1.3300903050033637
Epoch 4, Accuracy: 52.77%
Epoch 5, Time: 11.539352655410767, Loss: 1.2592125179822489
Epoch 5, Accuracy: 53.66%
Epoch 6, Time: 11.44853138923645, Loss: 1.201928383325372
Epoch 6, Accuracy: 54.77%
Epoch 7, Time: 11.511327981948853, Loss: 1.152803932157014
Epoch 7, Accuracy: 55.47%
Epoch 8, Time: 11.42148494720459, Loss: 1.1063422707797925
Epoch 8, Accuracy: 56.13%
Epoch 9, Time: 11.396982908248901, Loss: 1.0655339585088404
Epoch 9, Accuracy: 55.79%
Epoch 10, Time: 11.452541589736938, Loss: 1.0250346793238159
Epoch 10, Accuracy: 56.7%
Epoch 11, Time: 11.398571968078613, Loss: 0.9892121517597257
Epoch 11, Accuracy: 57.45%
Epoc

53.75

#### ii. Branching

In [10]:

# Branching Conv2d + ConvNN All
B_Conv2d_ConvNN_All_2 = B_Conv2d_ConvNN_K_All(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_All_2.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_All_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_All_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_All_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_All_2, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_K_All
Num params: 212186

Epoch 1, Time: 11.66477370262146, Loss: 1.7442329350639791
Epoch 1, Accuracy: 45.38%
Epoch 2, Time: 11.554768323898315, Loss: 1.4624716647140814
Epoch 2, Accuracy: 49.8%
Epoch 3, Time: 11.563044786453247, Loss: 1.3546515252736524
Epoch 3, Accuracy: 50.12%
Epoch 4, Time: 11.57263731956482, Loss: 1.2888805833466523
Epoch 4, Accuracy: 53.86%
Epoch 5, Time: 11.4683096408844, Loss: 1.231405520149509
Epoch 5, Accuracy: 54.56%
Epoch 6, Time: 11.449414730072021, Loss: 1.1795091064994598
Epoch 6, Accuracy: 56.1%
Epoch 7, Time: 11.529018878936768, Loss: 1.134068252332985
Epoch 7, Accuracy: 56.71%
Epoch 8, Time: 11.501482725143433, Loss: 1.0895875224372005
Epoch 8, Accuracy: 56.97%
Epoch 9, Time: 11.56770658493042, Loss: 1.0546800114614578
Epoch 9, Accuracy: 57.62%
Epoch 10, Time: 11.53633189201355, Loss: 1.0162915876301963
Epoch 10, Accuracy: 57.97%
Epoch 11, Time: 11.514337539672852, Loss: 0.9854496107686816
Epoch 11, Accuracy: 58.77%
Epoch 12, T

56.14

In [11]:

# Branching Conv2d + ConvNN N
B_Conv2d_ConvNN_N_2 = B_Conv2d_ConvNN_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_N_2.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_N_2, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_K_N
Num params: 212186

Epoch 1, Time: 11.479115724563599, Loss: 1.7902001421469862
Epoch 1, Accuracy: 43.68%
Epoch 2, Time: 11.429724216461182, Loss: 1.5345327584334956
Epoch 2, Accuracy: 47.69%
Epoch 3, Time: 11.48944091796875, Loss: 1.4189601778374303
Epoch 3, Accuracy: 50.62%
Epoch 4, Time: 11.454773902893066, Loss: 1.3310941123901425
Epoch 4, Accuracy: 51.87%
Epoch 5, Time: 11.377359867095947, Loss: 1.2652079096383146
Epoch 5, Accuracy: 54.11%
Epoch 6, Time: 11.515221118927002, Loss: 1.205287548358483
Epoch 6, Accuracy: 54.92%
Epoch 7, Time: 11.439621925354004, Loss: 1.1611496364064229
Epoch 7, Accuracy: 56.18%
Epoch 8, Time: 11.509211301803589, Loss: 1.124665157874222
Epoch 8, Accuracy: 56.45%
Epoch 9, Time: 11.43850564956665, Loss: 1.0912109671346366
Epoch 9, Accuracy: 57.46%
Epoch 10, Time: 11.462551593780518, Loss: 1.0601530419591139
Epoch 10, Accuracy: 57.63%
Epoch 11, Time: 11.501789331436157, Loss: 1.0317389862921538
Epoch 11, Accuracy: 58.65%
Epoch 1

56.9

In [12]:

# Branching Conv2d + ConvNN Spatial N
B_Conv2d_ConvNN_Spatial_N_2 = B_Conv2d_ConvNN_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_Spatial_N_2.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_Spatial_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_Spatial_N_2, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_Spatial_K_N
Num params: 212186

Epoch 1, Time: 13.720765113830566, Loss: 1.7781474573532943
Epoch 1, Accuracy: 42.62%
Epoch 2, Time: 13.427390336990356, Loss: 1.558278109563891
Epoch 2, Accuracy: 46.17%
Epoch 3, Time: 13.7702317237854, Loss: 1.4595780401583522
Epoch 3, Accuracy: 49.07%
Epoch 4, Time: 13.950456380844116, Loss: 1.382107056925059
Epoch 4, Accuracy: 50.94%
Epoch 5, Time: 13.305539608001709, Loss: 1.3288475158421889
Epoch 5, Accuracy: 52.81%
Epoch 6, Time: 13.614088535308838, Loss: 1.276314022900808
Epoch 6, Accuracy: 53.12%
Epoch 7, Time: 13.68719220161438, Loss: 1.233759952018328
Epoch 7, Accuracy: 53.86%
Epoch 8, Time: 13.61751389503479, Loss: 1.1929427344170982
Epoch 8, Accuracy: 55.56%
Epoch 9, Time: 13.714960813522339, Loss: 1.1565806774989418
Epoch 9, Accuracy: 55.77%
Epoch 10, Time: 13.613940238952637, Loss: 1.1180710967849283
Epoch 10, Accuracy: 56.51%
Epoch 11, Time: 13.411194562911987, Loss: 1.0856712649545401
Epoch 11, Accuracy: 56.59%
Epo

55.1

In [13]:

# Branching Conv2d + ConvNN Attention N 
B_Conv2d_ConvNN_Attn_N_2 = B_Conv2d_ConvNN_Attn_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_Attn_N_2.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_Attn_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_Attn_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_Attn_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_Attn_N_2, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_Attn_K_N
Num params: 605402

Epoch 1, Time: 12.02936315536499, Loss: 1.8405163571657732
Epoch 1, Accuracy: 42.46%
Epoch 2, Time: 12.092821598052979, Loss: 1.5406736080603831
Epoch 2, Accuracy: 46.42%
Epoch 3, Time: 12.052278518676758, Loss: 1.4061094583452816
Epoch 3, Accuracy: 49.99%
Epoch 4, Time: 11.93317461013794, Loss: 1.3228882835496723
Epoch 4, Accuracy: 52.36%
Epoch 5, Time: 12.025632381439209, Loss: 1.262867131349071
Epoch 5, Accuracy: 54.23%
Epoch 6, Time: 12.009690284729004, Loss: 1.2098530930326419
Epoch 6, Accuracy: 55.03%
Epoch 7, Time: 11.997647523880005, Loss: 1.1639350822667027
Epoch 7, Accuracy: 56.99%
Epoch 8, Time: 12.109815120697021, Loss: 1.1221536099910736
Epoch 8, Accuracy: 57.48%
Epoch 9, Time: 12.045375108718872, Loss: 1.0883479949153598
Epoch 9, Accuracy: 57.94%
Epoch 10, Time: 12.03829550743103, Loss: 1.0483716666850897
Epoch 10, Accuracy: 58.31%
Epoch 11, Time: 12.02396297454834, Loss: 1.0168574111693351
Epoch 11, Accuracy: 58.96%
Epo

57.36

In [13]:

# Branching Conv2d + ConvNN Attention Spatial N 
B_Conv2d_ConvNN_Attn_Spatial_N_2 = B_Conv2d_ConvNN_Attn_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_Attn_Spatial_N_2.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_Attn_Spatial_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_Attn_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_Attn_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_Attn_Spatial_N_2, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_Attn_Spatial_K_N
Num params: 482522

Epoch 1, Time: 12.42453908920288, Loss: 1.8000488252286106
Epoch 1, Accuracy: 42.99%
Epoch 2, Time: 12.444100379943848, Loss: 1.4971873496499513
Epoch 2, Accuracy: 48.33%
Epoch 3, Time: 12.266646146774292, Loss: 1.3661172287848296
Epoch 3, Accuracy: 51.57%
Epoch 4, Time: 12.580313920974731, Loss: 1.2748286068591925
Epoch 4, Accuracy: 53.56%
Epoch 5, Time: 12.47283124923706, Loss: 1.2109876966384976
Epoch 5, Accuracy: 55.29%
Epoch 6, Time: 12.429862976074219, Loss: 1.1629685761068789
Epoch 6, Accuracy: 54.93%
Epoch 7, Time: 12.50183653831482, Loss: 1.1135176303594008
Epoch 7, Accuracy: 57.04%
Epoch 8, Time: 12.388922929763794, Loss: 1.0708790405479538
Epoch 8, Accuracy: 57.4%
Epoch 9, Time: 12.38380765914917, Loss: 1.0305916711954815
Epoch 9, Accuracy: 58.38%
Epoch 10, Time: 12.41169810295105, Loss: 0.9921375956681683
Epoch 10, Accuracy: 58.56%
Epoch 11, Time: 12.49501657485962, Loss: 0.9536405817017226
Epoch 11, Accuracy: 59.2

58.46

In [14]:

# Branching Attention + ConvNN All Samples
B_Attention_ConvNN_All_2 = B_Attention_ConvNN_K_All(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_All_2.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_All_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_All_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_All_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_All_2, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_K_All
Num params: 248298

Epoch 1, Time: 15.7055983543396, Loss: 1.7752687237451754
Epoch 1, Accuracy: 43.47%
Epoch 2, Time: 15.701957941055298, Loss: 1.509267544654934
Epoch 2, Accuracy: 48.86%
Epoch 3, Time: 15.754456043243408, Loss: 1.3568705811982265
Epoch 3, Accuracy: 52.35%
Epoch 4, Time: 15.695666790008545, Loss: 1.2608320315170776
Epoch 4, Accuracy: 53.84%
Epoch 5, Time: 15.726603507995605, Loss: 1.1973406901140042
Epoch 5, Accuracy: 55.59%
Epoch 6, Time: 15.769630670547485, Loss: 1.143841531041943
Epoch 6, Accuracy: 55.7%
Epoch 7, Time: 15.809056520462036, Loss: 1.103570012409059
Epoch 7, Accuracy: 55.55%
Epoch 8, Time: 15.71822190284729, Loss: 1.068947273980626
Epoch 8, Accuracy: 56.93%
Epoch 9, Time: 15.767165899276733, Loss: 1.0384969094677654
Epoch 9, Accuracy: 57.76%
Epoch 10, Time: 15.739661455154419, Loss: 1.0067507277059433
Epoch 10, Accuracy: 58.06%
Epoch 11, Time: 15.751143455505371, Loss: 0.980204692658256
Epoch 11, Accuracy: 58.55%
Epoch 1

53.83

In [15]:

# Branching Attention + ConvNN N Samples
B_Attention_ConvNN_N_2 = B_Attention_ConvNN_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_N_2.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_N_2, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_K_N
Num params: 248298

Epoch 1, Time: 15.77831768989563, Loss: 1.791998118238376
Epoch 1, Accuracy: 42.32%
Epoch 2, Time: 15.729645252227783, Loss: 1.5765835600131004
Epoch 2, Accuracy: 46.16%
Epoch 3, Time: 15.760147333145142, Loss: 1.4890358060827036
Epoch 3, Accuracy: 48.72%
Epoch 4, Time: 15.775424718856812, Loss: 1.425425607987377
Epoch 4, Accuracy: 50.15%
Epoch 5, Time: 16.447328090667725, Loss: 1.3745536720356368
Epoch 5, Accuracy: 49.78%
Epoch 6, Time: 15.72526478767395, Loss: 1.3359193275956547
Epoch 6, Accuracy: 50.73%
Epoch 7, Time: 15.731736183166504, Loss: 1.295138097099026
Epoch 7, Accuracy: 51.71%
Epoch 8, Time: 15.741304636001587, Loss: 1.2613746412574787
Epoch 8, Accuracy: 52.68%
Epoch 9, Time: 15.72561764717102, Loss: 1.2283992907580208
Epoch 9, Accuracy: 53.53%
Epoch 10, Time: 15.758282899856567, Loss: 1.1959225640577429
Epoch 10, Accuracy: 53.67%
Epoch 11, Time: 15.711442232131958, Loss: 1.1654878630662513
Epoch 11, Accuracy: 54.57%
Epoch 

54.6

In [16]:

# Branching Attention + ConvNN Spatial Samples
B_Attention_ConvNN_Spatial_N_2 = B_Attention_ConvNN_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_Spatial_N_2.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_Spatial_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_Spatial_N_2, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_Spatial_K_N
Num params: 248298

Epoch 1, Time: 17.89390754699707, Loss: 1.8044623689883201
Epoch 1, Accuracy: 42.58%
Epoch 2, Time: 17.92329239845276, Loss: 1.5739785490743339
Epoch 2, Accuracy: 44.35%
Epoch 3, Time: 17.937607526779175, Loss: 1.4735418724282014
Epoch 3, Accuracy: 48.79%
Epoch 4, Time: 18.339150190353394, Loss: 1.4093378757881692
Epoch 4, Accuracy: 49.62%
Epoch 5, Time: 18.23250389099121, Loss: 1.3574875385864922
Epoch 5, Accuracy: 50.97%
Epoch 6, Time: 18.10436749458313, Loss: 1.3106578461197027
Epoch 6, Accuracy: 50.81%
Epoch 7, Time: 17.7756130695343, Loss: 1.2687903491737287
Epoch 7, Accuracy: 52.45%
Epoch 8, Time: 17.696356534957886, Loss: 1.2282359600067139
Epoch 8, Accuracy: 53.23%
Epoch 9, Time: 17.87688159942627, Loss: 1.1934844198281809
Epoch 9, Accuracy: 53.2%
Epoch 10, Time: 17.979543447494507, Loss: 1.159934325824918
Epoch 10, Accuracy: 54.16%
Epoch 11, Time: 17.86279273033142, Loss: 1.1310613705678974
Epoch 11, Accuracy: 54.85%
Ep

50.91

In [17]:

# Branching Attention ConvNN Attn N Samples
B_Attention_ConvNN_Attn_N_2 = B_Attention_ConvNN_Attn_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_Attn_N_2.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_Attn_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_Attn_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_Attn_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_Attn_N_2, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_Attn_K_N
Num params: 641514

Epoch 1, Time: 16.08865523338318, Loss: 1.9545401269212708
Epoch 1, Accuracy: 33.8%
Epoch 2, Time: 16.110215425491333, Loss: 1.7135569922759404
Epoch 2, Accuracy: 40.07%
Epoch 3, Time: 16.16624665260315, Loss: 1.6128847102070099
Epoch 3, Accuracy: 43.05%
Epoch 4, Time: 16.17288613319397, Loss: 1.5410765372883632
Epoch 4, Accuracy: 45.8%
Epoch 5, Time: 16.163732528686523, Loss: 1.4757294506977892
Epoch 5, Accuracy: 47.14%
Epoch 6, Time: 16.166686058044434, Loss: 1.4233460668712625
Epoch 6, Accuracy: 48.68%
Epoch 7, Time: 16.143619298934937, Loss: 1.3776961551297962
Epoch 7, Accuracy: 50.6%
Epoch 8, Time: 16.129433631896973, Loss: 1.3414061964320405
Epoch 8, Accuracy: 51.59%
Epoch 9, Time: 16.18224334716797, Loss: 1.3083420536097359
Epoch 9, Accuracy: 52.45%
Epoch 10, Time: 16.0955548286438, Loss: 1.2769837437383353
Epoch 10, Accuracy: 52.89%
Epoch 11, Time: 16.15980625152588, Loss: 1.2508116266916476
Epoch 11, Accuracy: 53.48%
Epoch

54.4

In [14]:

# Branching Attention ConvNN Attn N Samples
B_Attention_ConvNN_Attn_Spatial_N_2 = B_Attention_ConvNN_Attn_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_Attn_Spatial_N_2.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_Attn_Spatial_N_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_Attn_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_Attn_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_Attn_Spatial_N_2, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_Attn_Spatial_K_N
Num params: 518634

Epoch 1, Time: 20.56518268585205, Loss: 1.9001243335511677
Epoch 1, Accuracy: 38.44%
Epoch 2, Time: 20.557051420211792, Loss: 1.6150945630829658
Epoch 2, Accuracy: 44.15%
Epoch 3, Time: 19.59791135787964, Loss: 1.4938314363474736
Epoch 3, Accuracy: 48.29%
Epoch 4, Time: 17.736446142196655, Loss: 1.397563331267413
Epoch 4, Accuracy: 50.1%
Epoch 5, Time: 18.28118920326233, Loss: 1.3330501630483076
Epoch 5, Accuracy: 51.95%
Epoch 6, Time: 20.55126118659973, Loss: 1.285283558204046
Epoch 6, Accuracy: 52.54%
Epoch 7, Time: 18.10776376724243, Loss: 1.2427171087630875
Epoch 7, Accuracy: 52.87%
Epoch 8, Time: 16.52337622642517, Loss: 1.2090809872693113
Epoch 8, Accuracy: 53.78%
Epoch 9, Time: 16.652496337890625, Loss: 1.1712870004841738
Epoch 9, Accuracy: 55.15%
Epoch 10, Time: 18.274088859558105, Loss: 1.1390099145872208
Epoch 10, Accuracy: 55.35%
Epoch 11, Time: 20.437202215194702, Loss: 1.1076731937163322
Epoch 11, Accuracy: 54.

54.92

In [18]:

# Branching Attention Conv2d
B_Attention_Conv2d_2 = B_Attention_Conv2d(num_layers=2, num_classes=10, device='cuda')

print("Model: " + B_Attention_Conv2d_2.name)
print("Num params: " + str(count_parameters(B_Attention_Conv2d_2)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_Conv2d_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_Conv2d_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_Conv2d_2, cifar10.test_loader, device='cuda')



Model: B_Attention_Conv2d
Num params: 206554

Epoch 1, Time: 14.100418090820312, Loss: 1.8220332074348273
Epoch 1, Accuracy: 43.77%
Epoch 2, Time: 14.228880405426025, Loss: 1.5450673325897177
Epoch 2, Accuracy: 47.0%
Epoch 3, Time: 14.159112453460693, Loss: 1.451904360138242
Epoch 3, Accuracy: 50.02%
Epoch 4, Time: 14.213355302810669, Loss: 1.3661977043542106
Epoch 4, Accuracy: 52.78%
Epoch 5, Time: 14.110468626022339, Loss: 1.2942766541105402
Epoch 5, Accuracy: 54.0%
Epoch 6, Time: 14.02746057510376, Loss: 1.237797254872749
Epoch 6, Accuracy: 55.42%
Epoch 7, Time: 14.233490705490112, Loss: 1.1884083608379754
Epoch 7, Accuracy: 56.09%
Epoch 8, Time: 14.007227182388306, Loss: 1.15470355726264
Epoch 8, Accuracy: 57.67%
Epoch 9, Time: 14.201476573944092, Loss: 1.1234882390864975
Epoch 9, Accuracy: 59.16%
Epoch 10, Time: 14.156533002853394, Loss: 1.0967370798368283
Epoch 10, Accuracy: 58.61%
Epoch 11, Time: 14.084997177124023, Loss: 1.0707558239512431
Epoch 11, Accuracy: 58.74%
Epoch 12, T

59.23

### 4 Layer Models

In [5]:

# CNN
CNN_4 = CNN(num_layers=4, num_classes=10, device='cuda')

print("Model: " + CNN_4.name)
print("Num params: " + str(count_parameters(CNN_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(CNN_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(CNN_4, cifar10.test_loader, device='cuda')


Model: CNN
Num params: 171258

Epoch 1, Time: 13.512456893920898, Loss: 1.7363582177235342
Epoch 1, Accuracy: 45.55%
Epoch 2, Time: 12.213690519332886, Loss: 1.4432498439193686
Epoch 2, Accuracy: 50.41%
Epoch 3, Time: 9.832875967025757, Loss: 1.3312438883439963
Epoch 3, Accuracy: 52.49%
Epoch 4, Time: 9.632941961288452, Loss: 1.2535796317907855
Epoch 4, Accuracy: 54.56%
Epoch 5, Time: 9.61450481414795, Loss: 1.1984308138680275
Epoch 5, Accuracy: 56.51%
Epoch 6, Time: 20.979419946670532, Loss: 1.1493343152963291
Epoch 6, Accuracy: 56.17%
Epoch 7, Time: 9.735760927200317, Loss: 1.10862077639231
Epoch 7, Accuracy: 57.92%
Epoch 8, Time: 9.69012188911438, Loss: 1.0696126238616837
Epoch 8, Accuracy: 59.31%
Epoch 9, Time: 9.748415470123291, Loss: 1.0366809385664322
Epoch 9, Accuracy: 59.62%
Epoch 10, Time: 9.724631309509277, Loss: 1.0033417498059285
Epoch 10, Accuracy: 60.8%
Epoch 11, Time: 9.66859745979309, Loss: 0.9769828138906328
Epoch 11, Accuracy: 61.07%
Epoch 12, Time: 9.782186269760132

56.88

In [6]:

# Attention
Attention_4 = Attention(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Attention_4.name)
print("Num params: " + str(count_parameters(Attention_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Attention_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Attention_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Attention_4, cifar10.test_loader, device='cuda')


Model: Attention
Num params: 244938

Epoch 1, Time: 18.906435251235962, Loss: 2.0552867154026275
Epoch 1, Accuracy: 25.29%
Epoch 2, Time: 18.35961151123047, Loss: 1.903042280613004
Epoch 2, Accuracy: 29.08%
Epoch 3, Time: 18.389500379562378, Loss: 1.8302611922059218
Epoch 3, Accuracy: 30.57%
Epoch 4, Time: 18.46805238723755, Loss: 1.7922319041188721
Epoch 4, Accuracy: 33.05%
Epoch 5, Time: 18.48231029510498, Loss: 1.7616777723402623
Epoch 5, Accuracy: 34.01%
Epoch 6, Time: 18.984187364578247, Loss: 1.7389654843398676
Epoch 6, Accuracy: 34.56%
Epoch 7, Time: 18.600693941116333, Loss: 1.7196172463619495
Epoch 7, Accuracy: 34.95%
Epoch 8, Time: 18.47274613380432, Loss: 1.6951753792860318
Epoch 8, Accuracy: 36.43%
Epoch 9, Time: 19.047625064849854, Loss: 1.668821575696511
Epoch 9, Accuracy: 38.09%
Epoch 10, Time: 18.60256028175354, Loss: 1.6477475959016843
Epoch 10, Accuracy: 38.25%
Epoch 11, Time: 18.749018907546997, Loss: 1.6315738070956276
Epoch 11, Accuracy: 38.47%
Epoch 12, Time: 18.5

52.15

In [7]:
# ConvNN All 
ConvNN_All_4 = ConvNN_K_All(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_All_4.name)
print("Num params: " + str(count_parameters(ConvNN_All_4)))
print()


# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_All_4.parameters(), lr=0.0001)
num_epochs = 100  
train_eval(ConvNN_All_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_All_4, cifar10.test_loader, device='cuda')


Model: ConvNN_K_All
Num params: 282826

Epoch 1, Time: 14.341137170791626, Loss: 1.6769683768072396
Epoch 1, Accuracy: 45.37%
Epoch 2, Time: 14.601525068283081, Loss: 1.4055886892101648
Epoch 2, Accuracy: 50.71%
Epoch 3, Time: 14.063429832458496, Loss: 1.2938502573448678
Epoch 3, Accuracy: 53.24%
Epoch 4, Time: 15.619990348815918, Loss: 1.21819140844028
Epoch 4, Accuracy: 54.88%
Epoch 5, Time: 14.342267513275146, Loss: 1.1473159870070875
Epoch 5, Accuracy: 55.2%
Epoch 6, Time: 14.368480682373047, Loss: 1.091538448391668
Epoch 6, Accuracy: 57.2%
Epoch 7, Time: 14.10326886177063, Loss: 1.0445676728740068
Epoch 7, Accuracy: 57.63%
Epoch 8, Time: 14.03005838394165, Loss: 1.0009944705707032
Epoch 8, Accuracy: 58.15%
Epoch 9, Time: 14.104018211364746, Loss: 0.9581754567373134
Epoch 9, Accuracy: 57.69%
Epoch 10, Time: 14.41249418258667, Loss: 0.917463857423314
Epoch 10, Accuracy: 57.28%
Epoch 11, Time: 14.01971960067749, Loss: 0.8791479036173857
Epoch 11, Accuracy: 57.48%
Epoch 12, Time: 15.6

51.47

In [8]:
# ConvNN N 
ConvNN_N_4 = ConvNN_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_N_4, cifar10.test_loader, device='cuda')


Model: ConvNN_K_N
Num params: 282826

Epoch 1, Time: 13.731082677841187, Loss: 1.7893762628135779
Epoch 1, Accuracy: 42.72%
Epoch 2, Time: 13.72454309463501, Loss: 1.5794377116595997
Epoch 2, Accuracy: 45.33%
Epoch 3, Time: 14.188150405883789, Loss: 1.4781426474871233
Epoch 3, Accuracy: 48.04%
Epoch 4, Time: 13.80013108253479, Loss: 1.4044881048409834
Epoch 4, Accuracy: 49.11%
Epoch 5, Time: 13.746007680892944, Loss: 1.3524691488431848
Epoch 5, Accuracy: 49.92%
Epoch 6, Time: 13.987950563430786, Loss: 1.3129168149760313
Epoch 6, Accuracy: 51.72%
Epoch 7, Time: 13.865897178649902, Loss: 1.2747173298655263
Epoch 7, Accuracy: 52.79%
Epoch 8, Time: 14.072409868240356, Loss: 1.23975011782573
Epoch 8, Accuracy: 53.23%
Epoch 9, Time: 13.693495035171509, Loss: 1.2136398289362182
Epoch 9, Accuracy: 53.42%
Epoch 10, Time: 14.101994752883911, Loss: 1.1876782233757741
Epoch 10, Accuracy: 53.65%
Epoch 11, Time: 13.784587621688843, Loss: 1.1575237943998078
Epoch 11, Accuracy: 54.21%
Epoch 12, Time: 

55.58

In [9]:
# ConvNN Spatial N
ConvNN_Spatial_N_4 = ConvNN_Spatial_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_Spatial_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_Spatial_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Spatial_N_4, cifar10.test_loader, device='cuda')


Model: ConvNN_Spatial_K_N
Num params: 282826

Epoch 1, Time: 17.72710418701172, Loss: 1.774576862144958
Epoch 1, Accuracy: 42.74%
Epoch 2, Time: 18.388045072555542, Loss: 1.5438247242242173
Epoch 2, Accuracy: 46.68%
Epoch 3, Time: 17.792329788208008, Loss: 1.4395392120189374
Epoch 3, Accuracy: 48.6%
Epoch 4, Time: 17.731676816940308, Loss: 1.3717431999228495
Epoch 4, Accuracy: 50.12%
Epoch 5, Time: 17.484087467193604, Loss: 1.3179255928987128
Epoch 5, Accuracy: 50.67%
Epoch 6, Time: 17.52597141265869, Loss: 1.2675838939978947
Epoch 6, Accuracy: 51.03%
Epoch 7, Time: 18.21599006652832, Loss: 1.2250832473225606
Epoch 7, Accuracy: 51.17%
Epoch 8, Time: 18.466261625289917, Loss: 1.1838960184160705
Epoch 8, Accuracy: 52.45%
Epoch 9, Time: 18.51888680458069, Loss: 1.1429411242989933
Epoch 9, Accuracy: 53.0%
Epoch 10, Time: 18.062358856201172, Loss: 1.103382740941499
Epoch 10, Accuracy: 52.38%
Epoch 11, Time: 19.223039150238037, Loss: 1.0673276171507433
Epoch 11, Accuracy: 52.76%
Epoch 12, Ti

47.92

In [10]:
# ConvNN Attention N
ConvNN_Attn_N_4 = ConvNN_Attn_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_N_4, cifar10.test_loader, device='cuda')


Model: ConvNN_Attn_K_N
Num params: 1069258

Epoch 1, Time: 14.480890035629272, Loss: 2.1023478431774834
Epoch 1, Accuracy: 23.97%
Epoch 2, Time: 14.401057958602905, Loss: 1.8792488782302192
Epoch 2, Accuracy: 28.82%
Epoch 3, Time: 14.44253420829773, Loss: 1.7997485784923328
Epoch 3, Accuracy: 32.99%
Epoch 4, Time: 14.406382083892822, Loss: 1.7306088195432483
Epoch 4, Accuracy: 37.95%
Epoch 5, Time: 14.494624614715576, Loss: 1.6470243171657748
Epoch 5, Accuracy: 40.82%
Epoch 6, Time: 14.319504022598267, Loss: 1.5714040907752482
Epoch 6, Accuracy: 44.32%
Epoch 7, Time: 14.382968664169312, Loss: 1.4990419256107888
Epoch 7, Accuracy: 45.96%
Epoch 8, Time: 14.433298587799072, Loss: 1.4288982849596712
Epoch 8, Accuracy: 48.52%
Epoch 9, Time: 14.440179109573364, Loss: 1.3792317878559728
Epoch 9, Accuracy: 49.89%
Epoch 10, Time: 14.481025457382202, Loss: 1.3433445959597292
Epoch 10, Accuracy: 51.05%
Epoch 11, Time: 14.409048080444336, Loss: 1.3143029873785765
Epoch 11, Accuracy: 50.84%
Epoch 1

56.42

In [15]:
# ConvNN Attention Spatial N
ConvNN_Attn_Spatial_N_4 = ConvNN_Attn_Spatial_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_Spatial_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_Spatial_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_Spatial_N_4, cifar10.test_loader, device='cuda')


Model: ConvNN_Attn_Spatial_K_N
Num params: 823498

Epoch 1, Time: 17.843294143676758, Loss: 1.9925470047289757
Epoch 1, Accuracy: 32.41%
Epoch 2, Time: 16.74996256828308, Loss: 1.7307800453947024
Epoch 2, Accuracy: 38.65%
Epoch 3, Time: 14.596435785293579, Loss: 1.5837917812644977
Epoch 3, Accuracy: 44.64%
Epoch 4, Time: 15.221187591552734, Loss: 1.4683142310518134
Epoch 4, Accuracy: 47.06%
Epoch 5, Time: 14.61793065071106, Loss: 1.3834312079507676
Epoch 5, Accuracy: 49.41%
Epoch 6, Time: 14.840885162353516, Loss: 1.3122290227266833
Epoch 6, Accuracy: 50.49%
Epoch 7, Time: 17.132341623306274, Loss: 1.2483409544086213
Epoch 7, Accuracy: 52.51%
Epoch 8, Time: 16.03181552886963, Loss: 1.1981237724309077
Epoch 8, Accuracy: 53.15%
Epoch 9, Time: 15.29961109161377, Loss: 1.1517157022605466
Epoch 9, Accuracy: 53.59%
Epoch 10, Time: 14.597798109054565, Loss: 1.1132803047861894
Epoch 10, Accuracy: 54.32%
Epoch 11, Time: 14.78703498840332, Loss: 1.0731401379455996
Epoch 11, Accuracy: 54.26%
Epoc

51.18

#### ii. Branching

In [11]:
# Branching Conv2d + ConvNN All
B_Conv2d_ConvNN_All_4 = B_Conv2d_ConvNN_K_All(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_All_4.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_All_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_All_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_All_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_All_4, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_K_All
Num params: 292346

Epoch 1, Time: 15.40282940864563, Loss: 1.845730258528229
Epoch 1, Accuracy: 41.44%
Epoch 2, Time: 15.394604682922363, Loss: 1.5596463643681362
Epoch 2, Accuracy: 47.33%
Epoch 3, Time: 15.325387001037598, Loss: 1.4259966188074682
Epoch 3, Accuracy: 49.98%
Epoch 4, Time: 15.257440328598022, Loss: 1.3338941528516657
Epoch 4, Accuracy: 51.56%
Epoch 5, Time: 15.511467456817627, Loss: 1.2699903341967735
Epoch 5, Accuracy: 53.17%
Epoch 6, Time: 15.34885025024414, Loss: 1.2139268708046136
Epoch 6, Accuracy: 54.01%
Epoch 7, Time: 15.36905026435852, Loss: 1.1668979265653263
Epoch 7, Accuracy: 55.43%
Epoch 8, Time: 16.60437273979187, Loss: 1.1221146807646203
Epoch 8, Accuracy: 55.97%
Epoch 9, Time: 15.28474473953247, Loss: 1.0896054957528858
Epoch 9, Accuracy: 56.02%
Epoch 10, Time: 15.315227746963501, Loss: 1.054974775027741
Epoch 10, Accuracy: 56.52%
Epoch 11, Time: 15.320003032684326, Loss: 1.023929354327414
Epoch 11, Accuracy: 56.5%
Epoch 12, 

54.51

In [12]:
# Branching Conv2d + ConvNN N
B_Conv2d_ConvNN_N_4 = B_Conv2d_ConvNN_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_N_4.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_N_4, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_K_N
Num params: 292346

Epoch 1, Time: 15.26199746131897, Loss: 1.8876872146525956
Epoch 1, Accuracy: 40.19%
Epoch 2, Time: 15.226144790649414, Loss: 1.6216855218343418
Epoch 2, Accuracy: 45.68%
Epoch 3, Time: 15.240885019302368, Loss: 1.4965965293557442
Epoch 3, Accuracy: 47.46%
Epoch 4, Time: 15.131094932556152, Loss: 1.414128805365404
Epoch 4, Accuracy: 49.47%
Epoch 5, Time: 15.075191974639893, Loss: 1.3611875831166191
Epoch 5, Accuracy: 50.19%
Epoch 6, Time: 15.333395719528198, Loss: 1.3186462148833458
Epoch 6, Accuracy: 51.1%
Epoch 7, Time: 15.13015365600586, Loss: 1.282922518299059
Epoch 7, Accuracy: 52.47%
Epoch 8, Time: 14.99138879776001, Loss: 1.2465299851906575
Epoch 8, Accuracy: 52.94%
Epoch 9, Time: 15.16837739944458, Loss: 1.214747181710075
Epoch 9, Accuracy: 54.77%
Epoch 10, Time: 15.178504467010498, Loss: 1.1841507704971392
Epoch 10, Accuracy: 55.55%
Epoch 11, Time: 15.270206928253174, Loss: 1.1583536492894069
Epoch 11, Accuracy: 56.03%
Epoch 12, T

57.46

In [13]:
# Branching Conv2d + ConvNN Spatial N
B_Conv2d_ConvNN_Spatial_N_4 = B_Conv2d_ConvNN_Spatial_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_Spatial_N_4.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_Spatial_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_Spatial_N_4, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_Spatial_K_N
Num params: 292346

Epoch 1, Time: 19.690129280090332, Loss: 1.8831509778566677
Epoch 1, Accuracy: 39.79%
Epoch 2, Time: 19.60747265815735, Loss: 1.615970454709914
Epoch 2, Accuracy: 45.47%
Epoch 3, Time: 19.191784381866455, Loss: 1.4950582048167353
Epoch 3, Accuracy: 47.61%
Epoch 4, Time: 19.362842321395874, Loss: 1.4232411703185353
Epoch 4, Accuracy: 48.77%
Epoch 5, Time: 18.949430227279663, Loss: 1.3651953476774112
Epoch 5, Accuracy: 50.55%
Epoch 6, Time: 19.80279040336609, Loss: 1.3123024748566816
Epoch 6, Accuracy: 52.12%
Epoch 7, Time: 21.46120262145996, Loss: 1.2703899115400241
Epoch 7, Accuracy: 53.38%
Epoch 8, Time: 19.91799521446228, Loss: 1.227029134169259
Epoch 8, Accuracy: 53.4%
Epoch 9, Time: 19.782876014709473, Loss: 1.1923050985616797
Epoch 9, Accuracy: 54.28%
Epoch 10, Time: 19.694787979125977, Loss: 1.1571989961902198
Epoch 10, Accuracy: 54.8%
Epoch 11, Time: 19.5934579372406, Loss: 1.1244626751031412
Epoch 11, Accuracy: 55.55%
Epoch

53.51

In [14]:
# Branching Conv2d + ConvNN Attn N 
B_Conv2d_ConvNN_Attn_N_4 = B_Conv2d_ConvNN_Attn_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_Attn_N_4.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_Attn_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_Attn_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_Attn_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_Attn_N_4, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_Attn_K_N
Num params: 1078778

Epoch 1, Time: 16.54823637008667, Loss: 2.033364650995835
Epoch 1, Accuracy: 34.64%
Epoch 2, Time: 19.04072642326355, Loss: 1.7254458056081592
Epoch 2, Accuracy: 41.32%
Epoch 3, Time: 19.41620683670044, Loss: 1.5446672703299071
Epoch 3, Accuracy: 46.63%
Epoch 4, Time: 19.307297229766846, Loss: 1.44293741253026
Epoch 4, Accuracy: 49.59%
Epoch 5, Time: 19.940260648727417, Loss: 1.3687809828449697
Epoch 5, Accuracy: 51.01%
Epoch 6, Time: 18.457438707351685, Loss: 1.3103186466047525
Epoch 6, Accuracy: 52.21%
Epoch 7, Time: 18.063579320907593, Loss: 1.2590563411602889
Epoch 7, Accuracy: 53.2%
Epoch 8, Time: 18.91383409500122, Loss: 1.213734247190568
Epoch 8, Accuracy: 54.13%
Epoch 9, Time: 17.818350315093994, Loss: 1.1710384305175918
Epoch 9, Accuracy: 54.82%
Epoch 10, Time: 18.811686277389526, Loss: 1.134411526762921
Epoch 10, Accuracy: 54.32%
Epoch 11, Time: 18.519883632659912, Loss: 1.0939760221849621
Epoch 11, Accuracy: 55.53%
Epoch 1

53.97

In [17]:
# Branching Conv2d + ConvNN Attn Spatial N 
B_Conv2d_ConvNN_Attn_Spatial_N_4 = B_Conv2d_ConvNN_Attn_Spatial_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_Attn_Spatial_N_4.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_Attn_Spatial_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_Attn_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_Attn_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_Attn_Spatial_N_4, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_Attn_Spatial_K_N
Num params: 833018

Epoch 1, Time: 17.198922395706177, Loss: 2.0029403564265316
Epoch 1, Accuracy: 34.91%
Epoch 2, Time: 17.963162422180176, Loss: 1.6804913924173321
Epoch 2, Accuracy: 40.3%
Epoch 3, Time: 16.057100772857666, Loss: 1.566323329268209
Epoch 3, Accuracy: 43.77%
Epoch 4, Time: 15.927003622055054, Loss: 1.472268793893897
Epoch 4, Accuracy: 46.84%
Epoch 5, Time: 15.877135038375854, Loss: 1.4000726660804066
Epoch 5, Accuracy: 48.86%
Epoch 6, Time: 15.92756175994873, Loss: 1.3434963972519731
Epoch 6, Accuracy: 50.66%
Epoch 7, Time: 16.267938137054443, Loss: 1.2991935298266009
Epoch 7, Accuracy: 50.96%
Epoch 8, Time: 16.674750089645386, Loss: 1.2567405033751826
Epoch 8, Accuracy: 53.19%
Epoch 9, Time: 17.094624280929565, Loss: 1.217875548717006
Epoch 9, Accuracy: 52.87%
Epoch 10, Time: 15.85529351234436, Loss: 1.1847493912252929
Epoch 10, Accuracy: 54.23%
Epoch 11, Time: 16.550307512283325, Loss: 1.1501582710029523
Epoch 11, Accuracy: 55.

57.55

In [18]:

# Branching Attention ConvNN All Samples
B_Attention_ConvNN_All_4 = B_Attention_ConvNN_K_All(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_All_4.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_All_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_All_4.parameters(), lr=0.0001)
num_epochs = 100
train_eval(B_Attention_ConvNN_All_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_All_4, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_K_All
Num params: 366026

Epoch 1, Time: 24.680163621902466, Loss: 1.8628804900152298
Epoch 1, Accuracy: 42.61%
Epoch 2, Time: 24.426358222961426, Loss: 1.5684952081926644
Epoch 2, Accuracy: 46.32%
Epoch 3, Time: 25.00921368598938, Loss: 1.4558910855551814
Epoch 3, Accuracy: 48.12%
Epoch 4, Time: 23.918203353881836, Loss: 1.3791113512595292
Epoch 4, Accuracy: 50.81%
Epoch 5, Time: 24.31944227218628, Loss: 1.3150133825171635
Epoch 5, Accuracy: 51.2%
Epoch 6, Time: 25.696349382400513, Loss: 1.2545974736323442
Epoch 6, Accuracy: 53.81%
Epoch 7, Time: 24.391780614852905, Loss: 1.2023593605021992
Epoch 7, Accuracy: 54.2%
Epoch 8, Time: 24.685472011566162, Loss: 1.1612648615599288
Epoch 8, Accuracy: 55.09%
Epoch 9, Time: 25.333788633346558, Loss: 1.1271527525409104
Epoch 9, Accuracy: 55.51%
Epoch 10, Time: 24.67674946784973, Loss: 1.0890105923118494
Epoch 10, Accuracy: 56.73%
Epoch 11, Time: 23.881394386291504, Loss: 1.057657334658191
Epoch 11, Accuracy: 56.31%
Epoc

51.32

In [19]:

# Branching Attention ConvNN N Samples
B_Attention_ConvNN_N_4 = B_Attention_ConvNN_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_N_4.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_N_4, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_K_N
Num params: 366026

Epoch 1, Time: 23.256299018859863, Loss: 1.9042067681736958
Epoch 1, Accuracy: 39.32%
Epoch 2, Time: 24.510672330856323, Loss: 1.6392910632940814
Epoch 2, Accuracy: 43.82%
Epoch 3, Time: 25.5377254486084, Loss: 1.5557253264710116
Epoch 3, Accuracy: 44.6%
Epoch 4, Time: 23.68681049346924, Loss: 1.4950113790419401
Epoch 4, Accuracy: 47.95%
Epoch 5, Time: 23.980528354644775, Loss: 1.4506384853816703
Epoch 5, Accuracy: 48.54%
Epoch 6, Time: 24.573429346084595, Loss: 1.414393612643337
Epoch 6, Accuracy: 49.5%
Epoch 7, Time: 24.992807149887085, Loss: 1.3802857338010197
Epoch 7, Accuracy: 49.3%
Epoch 8, Time: 25.512392044067383, Loss: 1.3478979235872284
Epoch 8, Accuracy: 49.36%
Epoch 9, Time: 24.717648029327393, Loss: 1.3159321956622325
Epoch 9, Accuracy: 51.92%
Epoch 10, Time: 24.605496883392334, Loss: 1.2927573462733832
Epoch 10, Accuracy: 51.79%
Epoch 11, Time: 23.698153018951416, Loss: 1.262801988655344
Epoch 11, Accuracy: 52.52%
Epoch 12

49.46

### REDO

In [4]:

# Branching Attention + ConvNN Spatial Samples
B_Attention_ConvNN_Spatial_N_4 = B_Attention_ConvNN_Spatial_K_N(num_layers=4,  num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_Spatial_N_4.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_Spatial_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_Spatial_N_4, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_Spatial_K_N
Num params: 366026

Epoch 1, Time: 32.83654189109802, Loss: 1.918828506603875
Epoch 1, Accuracy: 39.75%
Epoch 2, Time: 27.062032461166382, Loss: 1.6637188100144076
Epoch 2, Accuracy: 43.77%
Epoch 3, Time: 27.053757190704346, Loss: 1.575149332166023
Epoch 3, Accuracy: 45.59%
Epoch 4, Time: 27.033091068267822, Loss: 1.502392826177885
Epoch 4, Accuracy: 47.0%
Epoch 5, Time: 26.62640929222107, Loss: 1.4469835556986388
Epoch 5, Accuracy: 48.08%
Epoch 6, Time: 26.930038452148438, Loss: 1.40500650366249
Epoch 6, Accuracy: 48.42%
Epoch 7, Time: 26.957622528076172, Loss: 1.366987978253523
Epoch 7, Accuracy: 49.36%
Epoch 8, Time: 26.990817308425903, Loss: 1.3334819381041905
Epoch 8, Accuracy: 50.1%
Epoch 9, Time: 26.880933046340942, Loss: 1.3004971824186233
Epoch 9, Accuracy: 50.41%
Epoch 10, Time: 27.105823755264282, Loss: 1.2686404400621838
Epoch 10, Accuracy: 50.59%
Epoch 11, Time: 26.932446479797363, Loss: 1.2400453910803246
Epoch 11, Accuracy: 50.76%
Ep

47.92

In [21]:

# Branching Attention ConvNN Attn N Samples
B_Attention_ConvNN_Attn_N_4 = B_Attention_ConvNN_Attn_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_Attn_N_4.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_Attn_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_Attn_N_4.parameters(), lr=0.0001)
num_epochs = 100
train_eval(B_Attention_ConvNN_Attn_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_Attn_N_4, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_Attn_K_N
Num params: 1152458

Epoch 1, Time: 24.35545778274536, Loss: 2.122382764621159
Epoch 1, Accuracy: 23.23%
Epoch 2, Time: 24.213001012802124, Loss: 1.868332574587039
Epoch 2, Accuracy: 31.43%
Epoch 3, Time: 24.19591236114502, Loss: 1.7450683744972015
Epoch 3, Accuracy: 35.57%
Epoch 4, Time: 24.20915389060974, Loss: 1.6650548307487116
Epoch 4, Accuracy: 39.84%
Epoch 5, Time: 26.46943688392639, Loss: 1.595127745662504
Epoch 5, Accuracy: 42.55%
Epoch 6, Time: 26.667376041412354, Loss: 1.5364344255698612
Epoch 6, Accuracy: 44.24%
Epoch 7, Time: 24.283058881759644, Loss: 1.495460039819293
Epoch 7, Accuracy: 45.9%
Epoch 8, Time: 25.27608633041382, Loss: 1.4578679522589955
Epoch 8, Accuracy: 46.77%
Epoch 9, Time: 25.266207218170166, Loss: 1.4319164554786195
Epoch 9, Accuracy: 48.28%
Epoch 10, Time: 26.633779525756836, Loss: 1.40902983898397
Epoch 10, Accuracy: 49.36%
Epoch 11, Time: 24.27699375152588, Loss: 1.3835239363143512
Epoch 11, Accuracy: 47.93%
Epoch 1

53.06

In [22]:

# Branching Attention ConvNN Attn Spatial N Samples
B_Attention_ConvNN_Attn_Spatial_N_4 = B_Attention_ConvNN_Attn_Spatial_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_Attn_Spatial_N_4.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_Attn_Spatial_N_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_Attn_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100
train_eval(B_Attention_ConvNN_Attn_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_Attn_Spatial_N_4, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_Attn_Spatial_K_N
Num params: 906698

Epoch 1, Time: 25.39358878135681, Loss: 2.054299613703852
Epoch 1, Accuracy: 29.32%
Epoch 2, Time: 27.2640380859375, Loss: 1.7875938446015653
Epoch 2, Accuracy: 34.49%
Epoch 3, Time: 25.804102897644043, Loss: 1.681485202916138
Epoch 3, Accuracy: 39.64%
Epoch 4, Time: 27.29999828338623, Loss: 1.5741274265377112
Epoch 4, Accuracy: 43.2%
Epoch 5, Time: 25.986742734909058, Loss: 1.495676044003128
Epoch 5, Accuracy: 46.03%
Epoch 6, Time: 26.643325567245483, Loss: 1.431808000635308
Epoch 6, Accuracy: 47.51%
Epoch 7, Time: 27.089059352874756, Loss: 1.3865119682248597
Epoch 7, Accuracy: 49.04%
Epoch 8, Time: 25.56437397003174, Loss: 1.348711145046117
Epoch 8, Accuracy: 49.51%
Epoch 9, Time: 24.59324836730957, Loss: 1.30987567060134
Epoch 9, Accuracy: 49.71%
Epoch 10, Time: 24.587559461593628, Loss: 1.2778306089703688
Epoch 10, Accuracy: 51.04%
Epoch 11, Time: 24.598538398742676, Loss: 1.245504358083086
Epoch 11, Accuracy: 51.79%
Ep

48.84

In [23]:

# Branching Attention Conv2d
B_Attention_Conv2d_4 = B_Attention_Conv2d(num_layers=4, num_classes=10, device='cuda')

print("Model: " + B_Attention_Conv2d_4.name)
print("Num params: " + str(count_parameters(B_Attention_Conv2d_4)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_Conv2d_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_Conv2d_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_Conv2d_4, cifar10.test_loader, device='cuda')



Model: B_Attention_Conv2d
Num params: 254458

Epoch 1, Time: 19.476115226745605, Loss: 1.9902635652695775
Epoch 1, Accuracy: 35.97%
Epoch 2, Time: 19.515493154525757, Loss: 1.7202519786632275
Epoch 2, Accuracy: 40.04%
Epoch 3, Time: 19.3989737033844, Loss: 1.6015176765449213
Epoch 3, Accuracy: 44.41%
Epoch 4, Time: 19.57919669151306, Loss: 1.498610862838033
Epoch 4, Accuracy: 48.2%
Epoch 5, Time: 19.580225229263306, Loss: 1.4115373478521167
Epoch 5, Accuracy: 48.78%
Epoch 6, Time: 19.530154943466187, Loss: 1.3532210682206751
Epoch 6, Accuracy: 51.66%
Epoch 7, Time: 20.20791006088257, Loss: 1.3079091446174076
Epoch 7, Accuracy: 52.15%
Epoch 8, Time: 19.428335428237915, Loss: 1.2679602051025156
Epoch 8, Accuracy: 53.15%
Epoch 9, Time: 19.522911310195923, Loss: 1.2328407631048461
Epoch 9, Accuracy: 53.82%
Epoch 10, Time: 19.586276292800903, Loss: 1.2012693677716852
Epoch 10, Accuracy: 54.78%
Epoch 11, Time: 19.60258722305298, Loss: 1.1721871556985715
Epoch 11, Accuracy: 55.96%
Epoch 12, T

56.18

### 8 Layer Models

In [24]:

# CNN
CNN_8 = CNN(num_layers=8, num_classes=10, device='cuda')

print("Model: " + CNN_8.name)
print("Num params: " + str(count_parameters(CNN_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(CNN_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(CNN_8, cifar10.test_loader, device='cuda')


Model: CNN
Num params: 180538

Epoch 1, Time: 10.503341674804688, Loss: 1.8731899381903432
Epoch 1, Accuracy: 40.62%
Epoch 2, Time: 11.347060441970825, Loss: 1.6082673415808422
Epoch 2, Accuracy: 45.73%
Epoch 3, Time: 11.034446716308594, Loss: 1.4709576871389014
Epoch 3, Accuracy: 48.59%
Epoch 4, Time: 11.693096160888672, Loss: 1.3853170636974637
Epoch 4, Accuracy: 49.87%
Epoch 5, Time: 11.925751686096191, Loss: 1.3279923023775106
Epoch 5, Accuracy: 53.1%
Epoch 6, Time: 12.101538181304932, Loss: 1.2848098614179264
Epoch 6, Accuracy: 53.2%
Epoch 7, Time: 12.071012735366821, Loss: 1.2447210195119425
Epoch 7, Accuracy: 55.02%
Epoch 8, Time: 12.097263813018799, Loss: 1.208370460802332
Epoch 8, Accuracy: 55.54%
Epoch 9, Time: 12.128664255142212, Loss: 1.1737226124309823
Epoch 9, Accuracy: 55.26%
Epoch 10, Time: 12.091695547103882, Loss: 1.1410445229476676
Epoch 10, Accuracy: 56.72%
Epoch 11, Time: 10.918102025985718, Loss: 1.1079266045404517
Epoch 11, Accuracy: 57.4%
Epoch 12, Time: 11.1627

56.23

In [25]:

# Attention
Attention_8 = Attention(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Attention_8.name)
print("Num params: " + str(count_parameters(Attention_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Attention_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Attention_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Attention_8, cifar10.test_loader, device='cuda')


Model: Attention
Num params: 329354

Epoch 1, Time: 28.465176820755005, Loss: 2.0901532997865506
Epoch 1, Accuracy: 24.08%
Epoch 2, Time: 28.17267942428589, Loss: 1.9779397424529581
Epoch 2, Accuracy: 25.91%
Epoch 3, Time: 27.506897926330566, Loss: 1.9624706785697157
Epoch 3, Accuracy: 26.14%
Epoch 4, Time: 28.240234375, Loss: 1.9510867726772338
Epoch 4, Accuracy: 26.05%
Epoch 5, Time: 28.827619791030884, Loss: 1.9374645541391105
Epoch 5, Accuracy: 27.21%
Epoch 6, Time: 27.89975357055664, Loss: 1.925481708305876
Epoch 6, Accuracy: 27.3%
Epoch 7, Time: 28.212062120437622, Loss: 1.9137998607457447
Epoch 7, Accuracy: 27.71%
Epoch 8, Time: 27.78653860092163, Loss: 1.902946403111948
Epoch 8, Accuracy: 28.0%
Epoch 9, Time: 28.0503191947937, Loss: 1.8913125679316118
Epoch 9, Accuracy: 28.39%
Epoch 10, Time: 28.527089834213257, Loss: 1.8793913130565068
Epoch 10, Accuracy: 29.2%
Epoch 11, Time: 28.86582326889038, Loss: 1.8693106511364812
Epoch 11, Accuracy: 29.6%
Epoch 12, Time: 27.957439661026

47.0

In [26]:
# ConvNN All 
ConvNN_All_8 = ConvNN_K_All(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_All_8.name)
print("Num params: " + str(count_parameters(ConvNN_All_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_All_8.parameters(), lr=0.0001)
num_epochs = 100
train_eval(ConvNN_All_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_All_8, cifar10.test_loader, device='cuda')


Model: ConvNN_K_All
Num params: 431754

Epoch 1, Time: 21.866807222366333, Loss: 1.818881847821843
Epoch 1, Accuracy: 41.13%
Epoch 2, Time: 20.3162784576416, Loss: 1.5715558679817279
Epoch 2, Accuracy: 44.79%
Epoch 3, Time: 20.02136754989624, Loss: 1.4389215184904425
Epoch 3, Accuracy: 49.63%
Epoch 4, Time: 20.27530527114868, Loss: 1.3236606787232792
Epoch 4, Accuracy: 52.16%
Epoch 5, Time: 20.872575521469116, Loss: 1.2556759398764052
Epoch 5, Accuracy: 53.45%
Epoch 6, Time: 19.98073101043701, Loss: 1.2010141284874334
Epoch 6, Accuracy: 53.49%
Epoch 7, Time: 20.266287803649902, Loss: 1.1579070878608146
Epoch 7, Accuracy: 55.5%
Epoch 8, Time: 20.623693704605103, Loss: 1.1251882386329535
Epoch 8, Accuracy: 55.99%
Epoch 9, Time: 20.48232388496399, Loss: 1.0898458475956831
Epoch 9, Accuracy: 54.51%
Epoch 10, Time: 21.323194980621338, Loss: 1.060254229609009
Epoch 10, Accuracy: 55.57%
Epoch 11, Time: 20.340587615966797, Loss: 1.027592051898122
Epoch 11, Accuracy: 56.0%
Epoch 12, Time: 20.83

50.67

In [27]:
# ConvNN N 
ConvNN_N_8 = ConvNN_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_N_8.parameters(), lr=0.0001)
num_epochs = 100
train_eval(ConvNN_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_N_8, cifar10.test_loader, device='cuda')


Model: ConvNN_K_N
Num params: 431754

Epoch 1, Time: 21.170989751815796, Loss: 1.900117299288435
Epoch 1, Accuracy: 39.76%
Epoch 2, Time: 20.408852577209473, Loss: 1.6605273687930973
Epoch 2, Accuracy: 43.17%
Epoch 3, Time: 20.737834692001343, Loss: 1.5581386098471444
Epoch 3, Accuracy: 45.29%
Epoch 4, Time: 19.34747076034546, Loss: 1.4977221062116306
Epoch 4, Accuracy: 46.82%
Epoch 5, Time: 19.388168334960938, Loss: 1.453626374302008
Epoch 5, Accuracy: 47.31%
Epoch 6, Time: 19.618134021759033, Loss: 1.4168170097538881
Epoch 6, Accuracy: 49.48%
Epoch 7, Time: 19.58099102973938, Loss: 1.3719632621006588
Epoch 7, Accuracy: 48.39%
Epoch 8, Time: 19.550334215164185, Loss: 1.3447583608920006
Epoch 8, Accuracy: 50.11%
Epoch 9, Time: 20.503564834594727, Loss: 1.312529999276866
Epoch 9, Accuracy: 50.41%
Epoch 10, Time: 20.32954216003418, Loss: 1.2919689602101856
Epoch 10, Accuracy: 51.25%
Epoch 11, Time: 18.815550327301025, Loss: 1.2635241592936504
Epoch 11, Accuracy: 50.96%
Epoch 12, Time: 19

56.61

In [28]:
# ConvNN Spatial N
ConvNN_Spatial_N_8 = ConvNN_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_Spatial_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_Spatial_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100
train_eval(ConvNN_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Spatial_N_8, cifar10.test_loader, device='cuda')


Model: ConvNN_Spatial_K_N
Num params: 431754

Epoch 1, Time: 28.467828273773193, Loss: 1.8745100450942584
Epoch 1, Accuracy: 40.24%
Epoch 2, Time: 27.833685398101807, Loss: 1.662054753516946
Epoch 2, Accuracy: 42.95%
Epoch 3, Time: 27.930328130722046, Loss: 1.5492268265665645
Epoch 3, Accuracy: 45.66%
Epoch 4, Time: 26.5835382938385, Loss: 1.4793426627698152
Epoch 4, Accuracy: 46.29%
Epoch 5, Time: 27.098397731781006, Loss: 1.4282698797447908
Epoch 5, Accuracy: 46.95%
Epoch 6, Time: 26.79739284515381, Loss: 1.388130119465806
Epoch 6, Accuracy: 48.7%
Epoch 7, Time: 26.41672706604004, Loss: 1.3464277440019885
Epoch 7, Accuracy: 49.27%
Epoch 8, Time: 26.326907873153687, Loss: 1.3127722369740382
Epoch 8, Accuracy: 49.37%
Epoch 9, Time: 26.42294144630432, Loss: 1.2769979468696868
Epoch 9, Accuracy: 49.75%
Epoch 10, Time: 26.376021146774292, Loss: 1.247377635344215
Epoch 10, Accuracy: 49.68%
Epoch 11, Time: 26.430574655532837, Loss: 1.215200235852805
Epoch 11, Accuracy: 51.29%
Epoch 12, Time

47.04

In [29]:
# ConvNN Attention N
ConvNN_Attn_N_8 = ConvNN_Attn_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_N_8)))  
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_N_8.parameters(), lr=0.0001)
num_epochs = 100
train_eval(ConvNN_Attn_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_N_8, cifar10.test_loader, device='cuda')


Model: ConvNN_Attn_K_N
Num params: 2004618

Epoch 1, Time: 19.856836318969727, Loss: 2.3030953038379054
Epoch 1, Accuracy: 10.24%
Epoch 2, Time: 20.227174520492554, Loss: 2.3026443046072256
Epoch 2, Accuracy: 10.09%
Epoch 3, Time: 20.453636646270752, Loss: 2.3021632289642566
Epoch 3, Accuracy: 11.27%
Epoch 4, Time: 21.639930963516235, Loss: 2.05396867499632
Epoch 4, Accuracy: 20.63%
Epoch 5, Time: 22.44516921043396, Loss: 1.9626496031766048
Epoch 5, Accuracy: 23.18%
Epoch 6, Time: 20.883638620376587, Loss: 1.929457457626567
Epoch 6, Accuracy: 23.03%
Epoch 7, Time: 20.731147050857544, Loss: 1.9111997355585513
Epoch 7, Accuracy: 24.01%
Epoch 8, Time: 20.754812240600586, Loss: 1.8826782863463283
Epoch 8, Accuracy: 27.61%
Epoch 9, Time: 21.12500548362732, Loss: 1.830667580485039
Epoch 9, Accuracy: 28.59%
Epoch 10, Time: 21.150618076324463, Loss: 1.791327392048848
Epoch 10, Accuracy: 31.6%
Epoch 11, Time: 22.152966260910034, Loss: 1.7607254291434422
Epoch 11, Accuracy: 32.22%
Epoch 12, Time

56.4

In [30]:
# ConvNN Attention N
ConvNN_Attn_Spatial_N_8 = ConvNN_Attn_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_Spatial_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_Spatial_N_8)))  
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100
train_eval(ConvNN_Attn_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_Spatial_N_8, cifar10.test_loader, device='cuda')


Model: ConvNN_Attn_Spatial_K_N
Num params: 1513098

Epoch 1, Time: 21.244386672973633, Loss: 2.297133834770573
Epoch 1, Accuracy: 16.34%
Epoch 2, Time: 22.12599515914917, Loss: 1.9840390139528552
Epoch 2, Accuracy: 22.06%
Epoch 3, Time: 22.056809663772583, Loss: 1.9106064670531036
Epoch 3, Accuracy: 22.7%
Epoch 4, Time: 20.38740301132202, Loss: 1.870162048913024
Epoch 4, Accuracy: 25.25%
Epoch 5, Time: 20.922776699066162, Loss: 1.828619052839401
Epoch 5, Accuracy: 26.55%
Epoch 6, Time: 20.645878791809082, Loss: 1.7506686060324959
Epoch 6, Accuracy: 31.42%
Epoch 7, Time: 22.114463806152344, Loss: 1.6528410926804213
Epoch 7, Accuracy: 37.0%
Epoch 8, Time: 21.799664974212646, Loss: 1.5530846353686985
Epoch 8, Accuracy: 41.0%
Epoch 9, Time: 22.54451060295105, Loss: 1.4729390595575123
Epoch 9, Accuracy: 44.16%
Epoch 10, Time: 20.638683795928955, Loss: 1.4006762798789822
Epoch 10, Accuracy: 46.23%
Epoch 11, Time: 21.478391647338867, Loss: 1.3328692275087546
Epoch 11, Accuracy: 47.74%
Epoch 1

51.08

#### ii. Branching

In [31]:

# Branching Conv2d + ConvNN All
B_Conv2d_ConvNN_All_8 = B_Conv2d_ConvNN_K_All(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_All_8.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_All_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_All_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_All_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_All_8, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_K_All
Num params: 452666

Epoch 1, Time: 23.7456157207489, Loss: 2.150988338548509
Epoch 1, Accuracy: 33.15%
Epoch 2, Time: 23.350743532180786, Loss: 1.6966400855337567
Epoch 2, Accuracy: 41.06%
Epoch 3, Time: 23.578864097595215, Loss: 1.541703917333842
Epoch 3, Accuracy: 44.78%
Epoch 4, Time: 23.014971017837524, Loss: 1.450459602848648
Epoch 4, Accuracy: 47.86%
Epoch 5, Time: 23.587159156799316, Loss: 1.3842385219186164
Epoch 5, Accuracy: 48.99%
Epoch 6, Time: 23.566535234451294, Loss: 1.3252477164920944
Epoch 6, Accuracy: 50.53%
Epoch 7, Time: 22.561113834381104, Loss: 1.2695258904600997
Epoch 7, Accuracy: 51.15%
Epoch 8, Time: 23.31946897506714, Loss: 1.2222061163324225
Epoch 8, Accuracy: 51.98%
Epoch 9, Time: 22.43649935722351, Loss: 1.1704294950608403
Epoch 9, Accuracy: 52.23%
Epoch 10, Time: 23.07009267807007, Loss: 1.1255197053217827
Epoch 10, Accuracy: 54.1%
Epoch 11, Time: 22.634994506835938, Loss: 1.0807230238566923
Epoch 11, Accuracy: 55.1%
Epoch 12, T

51.92

In [32]:

# Branching Conv2d + ConvNN N
B_Conv2d_ConvNN_N_8=B_Conv2d_ConvNN_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_N_8.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_N_8, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_K_N
Num params: 452666

Epoch 1, Time: 21.92768359184265, Loss: 2.050667579064284
Epoch 1, Accuracy: 36.05%
Epoch 2, Time: 21.729592323303223, Loss: 1.6749284546393568
Epoch 2, Accuracy: 43.31%
Epoch 3, Time: 21.358815908432007, Loss: 1.5114260947003084
Epoch 3, Accuracy: 46.06%
Epoch 4, Time: 21.336442947387695, Loss: 1.4231789253861702
Epoch 4, Accuracy: 48.62%
Epoch 5, Time: 22.320770025253296, Loss: 1.3643580479999942
Epoch 5, Accuracy: 50.57%
Epoch 6, Time: 21.900883674621582, Loss: 1.3132619810531208
Epoch 6, Accuracy: 50.55%
Epoch 7, Time: 22.324220895767212, Loss: 1.2667123176862516
Epoch 7, Accuracy: 52.69%
Epoch 8, Time: 23.30625867843628, Loss: 1.2179978592773837
Epoch 8, Accuracy: 54.29%
Epoch 9, Time: 21.451145887374878, Loss: 1.1679529317504609
Epoch 9, Accuracy: 54.93%
Epoch 10, Time: 21.41544270515442, Loss: 1.1225325064281064
Epoch 10, Accuracy: 55.7%
Epoch 11, Time: 23.104021549224854, Loss: 1.0746998690125886
Epoch 11, Accuracy: 55.86%
Epoch 12

54.37

In [33]:

# Branching Conv2d + ConvNN Spatial N
B_Conv2d_ConvNN_Spatial_N_8 = B_Conv2d_ConvNN_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_Spatial_N_8.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_Spatial_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_Spatial_N_8, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_Spatial_K_N
Num params: 452666

Epoch 1, Time: 30.907642602920532, Loss: 2.05662426756471
Epoch 1, Accuracy: 35.73%
Epoch 2, Time: 30.618302583694458, Loss: 1.6833376876838373
Epoch 2, Accuracy: 41.4%
Epoch 3, Time: 31.45012855529785, Loss: 1.528865475331426
Epoch 3, Accuracy: 45.46%
Epoch 4, Time: 30.20979380607605, Loss: 1.4338414304701568
Epoch 4, Accuracy: 46.1%
Epoch 5, Time: 29.924201488494873, Loss: 1.364659415638965
Epoch 5, Accuracy: 49.15%
Epoch 6, Time: 30.4558048248291, Loss: 1.2992572453625673
Epoch 6, Accuracy: 51.06%
Epoch 7, Time: 30.809762001037598, Loss: 1.243393709973606
Epoch 7, Accuracy: 48.94%
Epoch 8, Time: 30.656265020370483, Loss: 1.1931574022221139
Epoch 8, Accuracy: 53.03%
Epoch 9, Time: 30.222825288772583, Loss: 1.1493084966526617
Epoch 9, Accuracy: 54.07%
Epoch 10, Time: 30.65789031982422, Loss: 1.1036248182701638
Epoch 10, Accuracy: 54.13%
Epoch 11, Time: 30.74436926841736, Loss: 1.0631423313599413
Epoch 11, Accuracy: 54.66%
Epoch 12

53.57

In [34]:

# Branching Conv2d + ConvNN Attention N 
B_Conv2d_ConvNN_Attn_N_8 = B_Conv2d_ConvNN_Attn_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_Attn_N_8.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_Attn_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_Attn_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_Attn_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_Attn_N_8, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_Attn_K_N
Num params: 2025530

Epoch 1, Time: 23.443065881729126, Loss: 2.1301698489567205
Epoch 1, Accuracy: 21.24%
Epoch 2, Time: 24.56094789505005, Loss: 1.9151030126435067
Epoch 2, Accuracy: 24.5%
Epoch 3, Time: 24.436293125152588, Loss: 1.8415818549787906
Epoch 3, Accuracy: 28.63%
Epoch 4, Time: 22.958101749420166, Loss: 1.7884496137919024
Epoch 4, Accuracy: 32.13%
Epoch 5, Time: 24.579517602920532, Loss: 1.7248712355828346
Epoch 5, Accuracy: 34.74%
Epoch 6, Time: 24.650315523147583, Loss: 1.6344227835040568
Epoch 6, Accuracy: 37.88%
Epoch 7, Time: 23.769559621810913, Loss: 1.5820932488917085
Epoch 7, Accuracy: 41.02%
Epoch 8, Time: 22.93578791618347, Loss: 1.521399438686078
Epoch 8, Accuracy: 44.06%
Epoch 9, Time: 23.321557760238647, Loss: 1.4476244135585892
Epoch 9, Accuracy: 46.65%
Epoch 10, Time: 22.45690894126892, Loss: 1.38378264745483
Epoch 10, Accuracy: 49.54%
Epoch 11, Time: 24.550788402557373, Loss: 1.328538876284114
Epoch 11, Accuracy: 50.69%
Epoch

58.58

In [35]:

# Branching Conv2d + ConvNN Attention Spatial N 
B_Conv2d_ConvNN_Attn_Spatial_N_8 = B_Conv2d_ConvNN_Attn_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Conv2d_ConvNN_Attn_Spatial_N_8.name)
print("Num params: " + str(count_parameters(B_Conv2d_ConvNN_Attn_Spatial_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Conv2d_ConvNN_Attn_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Conv2d_ConvNN_Attn_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Conv2d_ConvNN_Attn_Spatial_N_8, cifar10.test_loader, device='cuda')


Model: B_Conv2d_ConvNN_Attn_Spatial_K_N
Num params: 1534010

Epoch 1, Time: 24.943262338638306, Loss: 2.303102976220953
Epoch 1, Accuracy: 9.8%
Epoch 2, Time: 23.63971972465515, Loss: 2.302629789732911
Epoch 2, Accuracy: 9.88%
Epoch 3, Time: 23.65147590637207, Loss: 2.302633606259475
Epoch 3, Accuracy: 9.97%
Epoch 4, Time: 25.75595736503601, Loss: 2.302625444539063
Epoch 4, Accuracy: 10.15%
Epoch 5, Time: 23.161121606826782, Loss: 2.3026405649112007
Epoch 5, Accuracy: 10.16%
Epoch 6, Time: 24.733394384384155, Loss: 2.3026224530261494
Epoch 6, Accuracy: 9.78%
Epoch 7, Time: 24.87606954574585, Loss: 2.3026113958309984
Epoch 7, Accuracy: 10.12%
Epoch 8, Time: 25.041768550872803, Loss: 2.302617092266717
Epoch 8, Accuracy: 9.98%
Epoch 9, Time: 23.629510641098022, Loss: 2.3026069792945063
Epoch 9, Accuracy: 10.05%
Epoch 10, Time: 23.970806121826172, Loss: 2.3026091589037416
Epoch 10, Accuracy: 10.2%
Epoch 11, Time: 23.93545937538147, Loss: 2.3026331333857972
Epoch 11, Accuracy: 9.84%
Epoch 1

54.18

In [36]:

# Branching Attention + ConvNN All Samples
B_Attention_ConvNN_All_8 = B_Attention_ConvNN_K_All(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_All_8.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_All_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_All_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_All_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_All_8, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_K_All
Num params: 601482

Epoch 1, Time: 42.17299771308899, Loss: 2.020188313311018
Epoch 1, Accuracy: 38.25%
Epoch 2, Time: 40.89118146896362, Loss: 1.6785629060872072
Epoch 2, Accuracy: 42.88%
Epoch 3, Time: 40.99366235733032, Loss: 1.5809627114354496
Epoch 3, Accuracy: 43.72%
Epoch 4, Time: 41.99638772010803, Loss: 1.5283869390597429
Epoch 4, Accuracy: 45.07%
Epoch 5, Time: 42.09677982330322, Loss: 1.4874201354468266
Epoch 5, Accuracy: 45.01%
Epoch 6, Time: 41.16128587722778, Loss: 1.455062948834256
Epoch 6, Accuracy: 46.61%
Epoch 7, Time: 41.533963441848755, Loss: 1.427334093224362
Epoch 7, Accuracy: 47.16%
Epoch 8, Time: 41.303019762039185, Loss: 1.40133465098603
Epoch 8, Accuracy: 47.9%
Epoch 9, Time: 41.48978567123413, Loss: 1.3811532823021149
Epoch 9, Accuracy: 48.51%
Epoch 10, Time: 41.49040746688843, Loss: 1.3600367075189606
Epoch 10, Accuracy: 47.92%
Epoch 11, Time: 41.2498300075531, Loss: 1.3419502002503865
Epoch 11, Accuracy: 48.44%
Epoch 12, Time

46.9

In [37]:

# Branching Attention + ConvNN N Samples
B_Attention_ConvNN_N_8 = B_Attention_ConvNN_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_N_8.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_N_8, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_K_N
Num params: 601482

Epoch 1, Time: 40.87515687942505, Loss: 2.050107745410841
Epoch 1, Accuracy: 35.79%
Epoch 2, Time: 39.188175439834595, Loss: 1.7565324547345682
Epoch 2, Accuracy: 38.5%
Epoch 3, Time: 39.94305181503296, Loss: 1.6760695058366526
Epoch 3, Accuracy: 41.23%
Epoch 4, Time: 39.420931816101074, Loss: 1.6256242203895392
Epoch 4, Accuracy: 40.68%
Epoch 5, Time: 39.22078466415405, Loss: 1.5886529007226304
Epoch 5, Accuracy: 42.9%
Epoch 6, Time: 39.94586229324341, Loss: 1.556121855440652
Epoch 6, Accuracy: 43.44%
Epoch 7, Time: 38.508994340896606, Loss: 1.52938704447978
Epoch 7, Accuracy: 44.32%
Epoch 8, Time: 38.59583115577698, Loss: 1.5027686075481308
Epoch 8, Accuracy: 44.39%
Epoch 9, Time: 40.574804067611694, Loss: 1.4804151413385824
Epoch 9, Accuracy: 45.31%
Epoch 10, Time: 41.03872108459473, Loss: 1.459612638444242
Epoch 10, Accuracy: 45.33%
Epoch 11, Time: 41.233426094055176, Loss: 1.4367778970457403
Epoch 11, Accuracy: 44.88%
Epoch 12, Tim

45.56

In [38]:

# Branching Attention + ConvNN Spatial Samples
B_Attention_ConvNN_Spatial_N_8 = B_Attention_ConvNN_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_Spatial_N_8.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_Spatial_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_Spatial_N_8, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_Spatial_K_N
Num params: 601482

Epoch 1, Time: 46.61168050765991, Loss: 2.152542359231378
Epoch 1, Accuracy: 33.92%
Epoch 2, Time: 46.810083627700806, Loss: 1.7863254355042792
Epoch 2, Accuracy: 38.85%
Epoch 3, Time: 46.27152442932129, Loss: 1.6925937637038853
Epoch 3, Accuracy: 41.02%
Epoch 4, Time: 46.17511582374573, Loss: 1.6251941038214641
Epoch 4, Accuracy: 42.31%
Epoch 5, Time: 46.6887092590332, Loss: 1.5763017000139827
Epoch 5, Accuracy: 43.57%
Epoch 6, Time: 46.22275400161743, Loss: 1.5420930986209294
Epoch 6, Accuracy: 43.54%
Epoch 7, Time: 46.35326290130615, Loss: 1.5097484568805646
Epoch 7, Accuracy: 44.51%
Epoch 8, Time: 46.35043787956238, Loss: 1.4808877845249517
Epoch 8, Accuracy: 45.53%
Epoch 9, Time: 46.43386697769165, Loss: 1.4519924779072442
Epoch 9, Accuracy: 45.19%
Epoch 10, Time: 46.71322560310364, Loss: 1.43020269312822
Epoch 10, Accuracy: 46.03%
Epoch 11, Time: 46.16123294830322, Loss: 1.4080666071923493
Epoch 11, Accuracy: 45.76%
Epoch 

42.57

In [43]:

# Branching Attention ConvNN Attn N Samples
B_Attention_ConvNN_Attn_N_8 = B_Attention_ConvNN_Attn_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_Attn_N_8.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_Attn_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_Attn_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_Attn_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_Attn_N_8, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_Attn_K_N
Num params: 2174346

Epoch 1, Time: 40.27579998970032, Loss: 2.3037165821055927
Epoch 1, Accuracy: 10.48%
Epoch 2, Time: 41.69819521903992, Loss: 2.3026855848634336
Epoch 2, Accuracy: 10.03%
Epoch 3, Time: 40.334468841552734, Loss: 2.302626049122237
Epoch 3, Accuracy: 10.22%
Epoch 4, Time: 41.16828727722168, Loss: 2.302619026445062
Epoch 4, Accuracy: 9.36%
Epoch 5, Time: 40.74784445762634, Loss: 2.302617128852688
Epoch 5, Accuracy: 10.0%
Epoch 6, Time: 40.188294887542725, Loss: 2.3026162919485964
Epoch 6, Accuracy: 10.36%
Epoch 7, Time: 41.084755659103394, Loss: 2.302608427184317
Epoch 7, Accuracy: 9.79%
Epoch 8, Time: 42.48478364944458, Loss: 2.3026073488128156
Epoch 8, Accuracy: 10.07%
Epoch 9, Time: 42.359257221221924, Loss: 2.3026369889373974
Epoch 9, Accuracy: 10.08%
Epoch 10, Time: 40.10337710380554, Loss: 2.3026071957615026
Epoch 10, Accuracy: 10.35%
Epoch 11, Time: 40.475887298583984, Loss: 2.302619202972373
Epoch 11, Accuracy: 9.92%
Epoch 12,

10.0

In [42]:

# Branching Attention ConvNN Attn Spatial N Samples
B_Attention_ConvNN_Attn_Spatial_N_8 = B_Attention_ConvNN_Attn_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Attention_ConvNN_Attn_Spatial_N_8.name)
print("Num params: " + str(count_parameters(B_Attention_ConvNN_Attn_Spatial_N_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_ConvNN_Attn_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_ConvNN_Attn_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_ConvNN_Attn_Spatial_N_8, cifar10.test_loader, device='cuda')


Model: B_Attention_ConvNN_Attn_Spatial_K_N
Num params: 1682826

Epoch 1, Time: 40.78361248970032, Loss: 2.3035642003159387
Epoch 1, Accuracy: 9.79%
Epoch 2, Time: 41.52797317504883, Loss: 2.3026363675856527
Epoch 2, Accuracy: 9.96%
Epoch 3, Time: 40.6495897769928, Loss: 2.3026207020825438
Epoch 3, Accuracy: 10.0%
Epoch 4, Time: 42.88854670524597, Loss: 2.3026382127381346
Epoch 4, Accuracy: 9.74%
Epoch 5, Time: 43.37523126602173, Loss: 2.302619264863641
Epoch 5, Accuracy: 10.29%
Epoch 6, Time: 42.25349760055542, Loss: 2.302642874705517
Epoch 6, Accuracy: 10.19%
Epoch 7, Time: 40.803550243377686, Loss: 2.302609513787662
Epoch 7, Accuracy: 9.42%
Epoch 8, Time: 42.46760129928589, Loss: 2.302605216765343
Epoch 8, Accuracy: 9.81%
Epoch 9, Time: 42.685943603515625, Loss: 2.302606567092564
Epoch 9, Accuracy: 9.98%
Epoch 10, Time: 41.5747492313385, Loss: 2.302636400513027
Epoch 10, Accuracy: 10.29%
Epoch 11, Time: 41.72130560874939, Loss: 2.3026066411791555
Epoch 11, Accuracy: 10.05%
Epoch 12, 

10.0

In [41]:

# Branching Attention Conv2d
B_Attention_Conv2d_8 = B_Attention_Conv2d(num_layers=8, num_classes=10, device='cuda')

print("Model: " + B_Attention_Conv2d_8.name)
print("Num params: " + str(count_parameters(B_Attention_Conv2d_8)))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(B_Attention_Conv2d_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(B_Attention_Conv2d_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(B_Attention_Conv2d_8, cifar10.test_loader, device='cuda')



Model: B_Attention_Conv2d
Num params: 350266

Epoch 1, Time: 31.2140052318573, Loss: 2.1532357909795268
Epoch 1, Accuracy: 23.23%
Epoch 2, Time: 31.040987014770508, Loss: 1.9692856245638464
Epoch 2, Accuracy: 26.36%
Epoch 3, Time: 30.357370853424072, Loss: 1.843303451300277
Epoch 3, Accuracy: 37.76%
Epoch 4, Time: 30.377668142318726, Loss: 1.67559206653434
Epoch 4, Accuracy: 40.33%
Epoch 5, Time: 30.467301845550537, Loss: 1.6106748190682258
Epoch 5, Accuracy: 41.96%
Epoch 6, Time: 30.426469326019287, Loss: 1.5696987422835795
Epoch 6, Accuracy: 43.41%
Epoch 7, Time: 31.10246729850769, Loss: 1.5358229579827976
Epoch 7, Accuracy: 44.67%
Epoch 8, Time: 30.98026132583618, Loss: 1.5097383015296038
Epoch 8, Accuracy: 46.04%
Epoch 9, Time: 32.13800764083862, Loss: 1.4883151171762314
Epoch 9, Accuracy: 46.18%
Epoch 10, Time: 32.24999260902405, Loss: 1.4671714918692704
Epoch 10, Accuracy: 47.57%
Epoch 11, Time: 30.484927892684937, Loss: 1.4459010481529528
Epoch 11, Accuracy: 47.38%
Epoch 12, Tim

54.15