# ConvNN Attention Test 
## I. 2D Training for testing with CIFAR10 Dataset

In [None]:
# Torch
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch import optim 


# Train + Data 
import sys 
sys.path.append('../Layers')
from Conv1d_NN_spatial import * 
from Conv2d_NN_spatial import * 

sys.path.append('../Data')
from CIFAR10 import * 


sys.path.append('../Models')
from CIFAR_experiment_models.Attention import Attention
from CIFAR_experiment_models.BranchingConvNN import Branching_ConvNN_K_All, Branching_ConvNN_K_N, Branching_ConvNN_Spatial_K_N, Branching_ConvNN_Attention_K_N
from CIFAR_experiment_models.ConvNN import ConvNN_K_All,ConvNN_K_N, ConvNN_Spatial_K_N, ConvNN_Attn_K_N
from CIFAR_experiment_models.CNN_Control import CNN


sys.path.append('../Train')
from train2d import train_eval, evaluate_accuracy


In [16]:
cifar10 = CIFAR10()

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


### 2 Layer Models

In [None]:

# CNN
CNN_2 = CNN(num_layers=2, num_classes=10, device='cuda')

print("Model: " + CNN_2.name)
print("Num params: " + str(count_parameters(CNN_2)))
print("Num layers: " + str(CNN_2.num_layers))
print()
# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(CNN_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(CNN_2, cifar10.test_loader, device='cuda')


AssertionError: Torch not compiled with CUDA enabled

In [None]:
# Attention
Attention_2 = Attention(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Attention_2.name)
print("Num params: " + str(count_parameters(Attention_2)))
print("Num layers: " + str(Attention_2.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Attention_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Attention_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Attention_2, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN All 
ConvNN_All_2 = ConvNN_K_All(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_All_2.name)
print("Num params: " + str(count_parameters(ConvNN_All_2)))
print("Num layers: " + str(ConvNN_All_2.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_All_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_All_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_All_2, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN N 
ConvNN_N_2 = ConvNN_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_N_2)))
print("Num layers: " + str(ConvNN_N_2.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_N_2, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN Spatial N
ConvNN_Spatial_N_2 = ConvNN_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_Spatial_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_Spatial_N_2)))
print("Num layers: " + str(ConvNN_Spatial_N_2.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Spatial_N_2, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN Attention N
ConvNN_Attn_N_2 = ConvNN_Attn_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_N_2.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_N_2)))
print("Num layers: " + str(ConvNN_Attn_N_2.num_layers))
print()


# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_N_2, cifar10.test_loader, device='cuda')


#### ii. Branching

In [None]:

# Branching ConvNN All
Branching_ConvNN_All_2 = Branching_ConvNN_K_All(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_All_2.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_All_2)))
print("Num layers: " + str(Branching_ConvNN_All_2.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_All_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_All_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_All_2, cifar10.test_loader, device='cuda')


In [None]:

# Branching ConvNN N
Branching_ConvNN_N_2 = Branching_ConvNN_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_N_2.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_N_2)))
print("Num layers: " + str(Branching_ConvNN_N_2.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_N_2, cifar10.test_loader, device='cuda')


In [None]:

# Branching ConvNN Spatial N
Branching_ConvNN_Spatial_N_2 = Branching_ConvNN_Spatial_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Spatial_N_2.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Spatial_N_2)))
print("Num layers: " + str(Branching_ConvNN_Spatial_N_2.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Spatial_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Spatial_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Spatial_N_2, cifar10.test_loader, device='cuda')


In [None]:

# Branching ConvNN Attention N 
Branching_ConvNN_Attn_N_2 = Branching_ConvNN_Attention_K_N(num_layers=2, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Attn_N_2.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Attn_N_2)))
print("Num layers: " + str(Branching_ConvNN_Attn_N_2.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Attn_N_2.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Attn_N_2, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Attn_N_2, cifar10.test_loader, device='cuda')


### 4 Layer Models

In [None]:

# CNN
CNN_4 = CNN(num_layers=4, num_classes=10, device='cuda')

print("Model: " + CNN_4.name)
print("Num params: " + str(count_parameters(CNN_4)))
print("Num layers: " + str(CNN_4.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(CNN_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(CNN_4, cifar10.test_loader, device='cuda')


In [None]:

# Attention
Attention_4 = Attention(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Attention_4.name)
print("Num params: " + str(count_parameters(Attention_4)))
print("Num layers: " + str(Attention_4.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Attention_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Attention_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Attention_4, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN All 
ConvNN_All_4 = ConvNN_K_All(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_All_4.name)
print("Num params: " + str(count_parameters(ConvNN_All_4)))
print("Num layers: " + str(ConvNN_All_4.num_layers))
print()


# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_All_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_All_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_All_4, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN N 
ConvNN_N_4 = ConvNN_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_N_4)))
print("Num layers: " + str(ConvNN_N_4.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_N_4, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN Spatial N
ConvNN_Spatial_N_4 = ConvNN_Spatial_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_Spatial_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_Spatial_N_4)))
print("Num layers: " + str(ConvNN_Spatial_N_4.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Spatial_N_4, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN Attention N
ConvNN_Attn_N_4 = ConvNN_Attn_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_N_4.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_N_4)))
print("Num layers: " + str(ConvNN_Attn_N_4.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_N_4, cifar10.test_loader, device='cuda')


#### ii. Branching

In [None]:
# Branching ConvNN All
Branching_ConvNN_All_4 = Branching_ConvNN_K_All(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_All_4.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_All_4)))
print("Num layers: " + str(Branching_ConvNN_All_4.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_All_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_All_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_All_4, cifar10.test_loader, device='cuda')


In [None]:
# Branching ConvNN N
Branching_ConvNN_N_4 = Branching_ConvNN_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_N_4.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_N_4)))
print("Num layers: " + str(Branching_ConvNN_N_4.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_N_4, cifar10.test_loader, device='cuda')


In [None]:
# Branching ConvNN Spatial N
Branching_ConvNN_Spatial_N_4 = Branching_ConvNN_Spatial_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Spatial_N_4.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Spatial_N_4)))
print("Num layers: " + str(Branching_ConvNN_Spatial_N_4.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Spatial_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Spatial_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Spatial_N_4, cifar10.test_loader, device='cuda')


In [None]:
# Branching ConvNN Attention N 
Branching_ConvNN_Attn_N_4 = Branching_ConvNN_Attention_K_N(num_layers=4, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Attn_N_4.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Attn_N_4)))
print("Num layers: " + str(Branching_ConvNN_Attn_N_4.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Attn_N_4.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Attn_N_4, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Attn_N_4, cifar10.test_loader, device='cuda')


### 8 Layer Models

In [None]:

# CNN
CNN_8 = CNN(num_layers=8, num_classes=10, device='cuda')

print("Model: " + CNN_8.name)
print("Num params: " + str(count_parameters(CNN_8)))
print("Num layers: " + str(CNN_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(CNN_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(CNN_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(CNN_8, cifar10.test_loader, device='cuda')


In [None]:

# Attention
Attention_8 = Attention(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Attention_8.name)
print("Num params: " + str(count_parameters(Attention_8)))
print("Num layers: " + str(Attention_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Attention_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Attention_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Attention_8, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN All 
ConvNN_All_8 = ConvNN_K_All(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_All_8.name)
print("Num params: " + str(count_parameters(ConvNN_All_8)))
print("Num layers: " + str(ConvNN_All_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_All_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_All_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_All_8, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN N 
ConvNN_N_8 = ConvNN_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_N_8)))
print("Num layers: " + str(ConvNN_N_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_N_8, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN Spatial N
ConvNN_Spatial_N_8 = ConvNN_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_Spatial_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_Spatial_N_8)))
print("Num layers: " + str(ConvNN_Spatial_N_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Spatial_N_8, cifar10.test_loader, device='cuda')


In [None]:
# ConvNN Attention N
ConvNN_Attn_N_8 = ConvNN_Attn_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + ConvNN_Attn_N_8.name)
print("Num params: " + str(count_parameters(ConvNN_Attn_N_8)))  
print("Num layers: " + str(ConvNN_Attn_N_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(ConvNN_Attn_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(ConvNN_Attn_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(ConvNN_Attn_N_8, cifar10.test_loader, device='cuda')


#### ii. Branching

In [None]:
# Branching ConvNN All
Branching_ConvNN_All_8 = Branching_ConvNN_K_All(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_All_8.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_All_8)))
print("Num layers: " + str(Branching_ConvNN_All_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_All_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_All_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_All_8, cifar10.test_loader, device='cuda')


In [None]:
# Branching ConvNN N
Branching_ConvNN_N_8 = Branching_ConvNN_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_N_8.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_N_8)))
print("Num layers: " + str(Branching_ConvNN_N_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_N_8, cifar10.test_loader, device='cuda')


In [None]:
# Branching ConvNN Spatial N
Branching_ConvNN_Spatial_N_8 = Branching_ConvNN_Spatial_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Spatial_N_8.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Spatial_N_8)))
print("Num layers: " + str(Branching_ConvNN_Spatial_N_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Spatial_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Spatial_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Spatial_N_8, cifar10.test_loader, device='cuda')


In [None]:
# Branching ConvNN Attention N 
Branching_ConvNN_Attn_N_8 = Branching_ConvNN_Attention_K_N(num_layers=8, num_classes=10, device='cuda')

print("Model: " + Branching_ConvNN_Attn_N_8.name)
print("Num params: " + str(count_parameters(Branching_ConvNN_Attn_N_8)))
print("Num layers: " + str(Branching_ConvNN_Attn_N_8.num_layers))
print()

# Test + Eval
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Branching_ConvNN_Attn_N_8.parameters(), lr=0.0001)
num_epochs = 100 
train_eval(Branching_ConvNN_Attn_N_8, cifar10.train_loader, cifar10.test_loader, criterion, optimizer, num_epochs, device='cuda')
evaluate_accuracy(Branching_ConvNN_Attn_N_8, cifar10.test_loader, device='cuda')
