In [1]:
import torch
import torch.nn as nn                 
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from model import *
# Define the Vision Transformer model

device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")

from tqdm import tqdm

import pandas as pd
# auto reload
%load_ext autoreload
%autoreload 2


In [2]:

    
# Define the Vision Transformer model
class VisionTransformer(nn.Module):
    def __init__(self, num_classes, embed_dim, dim, num_heads, img_size, patch_size, in_channels=3):
        super(VisionTransformer, self).__init__()
        self.num_patches = (img_size // patch_size) ** 2
        self.patch_embedding = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.positional_embedding = nn.Parameter(torch.randn(1, self.num_patches, embed_dim))
        self.hopfield = KHopfield(N=dim, n=embed_dim * self.num_patches)
        self.fc = nn.Linear(embed_dim * self.num_patches, num_classes)
        self.num_heads = num_heads

    def forward(self, x):
        x1 = self.patch_embedding(x)  # (batch_size, embed_dim, num_patches_h, num_patches_w)
        x2 = x1.permute(0, 2, 3, 1)  # (batch_size, num_patches_h, num_patches_w, embed_dim)
        x3 = x2.reshape(x2.size(0), -1, x2.size(-1))  # (batch_size, num_patches, embed_dim)
        
        x4 = x3 + self.positional_embedding  # Add positional embedding
        # combine second and third dimension
        x5 = x4.flatten(1, 2)
        x6 = self.hopfield(x5, self.num_heads)
        x7 = x6.mean(dim=2)  # Global average pooling
        x8 = self.fc(x7)
        return x8
    
    def to(self, device):
        super(VisionTransformer, self).to(device)
        self.hopfield = self.hopfield.to(device)
        return self
    
# Uses #num_heads k=1 hopfield networks, rather than k=num_heads-hopfield networks
class VisionTransformerV(nn.Module):
    def __init__(self, num_classes, embed_dim, dim, num_heads, img_size, patch_size, in_channels=3):
        super(VisionTransformerV, self).__init__()
        self.num_patches = (img_size // patch_size) ** 2
        self.patch_embedding = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.positional_embedding = nn.Parameter(torch.randn(1, self.num_patches, embed_dim))

        self.hopfields = nn.ModuleList([KHopfield(N=dim, n=embed_dim * self.num_patches) for _ in range(num_heads)])
        self.fc = nn.Linear(embed_dim * self.num_patches, num_classes)
        self.num_heads = num_heads

    def forward(self, x):
        x1 = self.patch_embedding(x)  # (batch_size, embed_dim, num_patches_h, num_patches_w)
        x2 = x1.permute(0, 2, 3, 1)  # (batch_size, num_patches_h, num_patches_w, embed_dim)
        x3 = x2.reshape(x2.size(0), -1, x2.size(-1))  # (batch_size, num_patches, embed_dim)
        
        x4 = x3 + self.positional_embedding  # Add positional embedding
        # combine second and third dimension
        x5 = x4.flatten(1, 2)
        x6 = [self.hopfields[i](x5, 1) for i in range(self.num_heads)]
        # take average of all heads
        x6 = torch.stack(x6, dim=2).squeeze()
        x7 = x6.mean(dim=2)  # Global average pooling
        x8 = self.fc(x7)
        return x8
    


In [3]:
# # Define the Vision Transformer model
# class VisionTransformer(nn.Module):
#     def __init__(self, num_classes, embed_dim, num_heads, num_layers, img_size, patch_size):
#         super(VisionTransformer, self).__init__()
#         num_patches = (img_size // patch_size) ** 2
#         self.patch_embedding = nn.Conv2d(3, embed_dim, kernel_size=patch_size, stride=patch_size)
#         self.positional_embedding = nn.Parameter(torch.randn(1, num_patches + 1, embed_dim))
#         self.transformer = nn.Transformer(
#             d_model=embed_dim,
#             nhead=num_heads,
#             num_encoder_layers=num_layers,
#             dim_feedforward=2048,
#             dropout=0.1,
#         )
#         self.fc = nn.Linear(embed_dim, num_classes)

#     def forward(self, x):
#         x = self.patch_embedding(x)  # (batch_size, embed_dim, num_patches_h, num_patches_w)
#         x = x.permute(0, 2, 3, 1)  # (batch_size, num_patches_h, num_patches_w, embed_dim)
#         x = x.reshape(x.size(0), -1, x.size(-1))  # (batch_size, num_patches, embed_dim)
#         x = torch.cat([self.positional_embedding, x], dim=1)
#         x = self.transformer(x)
#         x = x.mean(dim=1)  # Global average pooling
#         x = self.fc(x)
#         return x


In [5]:
# Hyperparameters
batch_size = 256
num_epochs = 10
learning_rate = 1e-3
num_classes = 10
img_size = 32  # Assuming CIFAR-10 image size

#mnist

patch_size = 16  # Adjust this based on your preference

datast = 'mnist'
dataset = 'cifar'
num_heads = 4

if dataset == 'mnist':
    transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = torchvision.datasets.MNIST(root='~/data', train=True, transform=transforms.ToTensor(), download=True)
    test_dataset = torchvision.datasets.MNIST(root='~/data', train=False, transform=transforms.ToTensor(), download=True)
    img_size = 28
    in_channels = 1
    num_classes = 10
else:
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    train_dataset = torchvision.datasets.CIFAR10(root='~/data', train=True, transform=transform, download=True)
    test_dataset = torchvision.datasets.CIFAR10(root='~/data', train=False, transform=transform, download=True)
    img_size = 32
    in_channels = 3
    num_classes = 10

# Data preprocessing
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)


test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# Initialize the model and optimizer
model = VisionTransformer(
    num_classes = num_classes, 
    embed_dim = 256, 
    dim  = 1024,
    num_heads = num_heads, 
    img_size = img_size, 
    patch_size = patch_size, 
    in_channels = in_channels,
)
modelV= VisionTransformerV(
    num_classes = num_classes, 
    embed_dim = 256, 
    dim  = 1024,
    num_heads = num_heads, 
    img_size = img_size, 
    patch_size = patch_size, 
    in_channels = in_channels,
)
modelS = SimpleViT(
    image_size = img_size,
    patch_size = patch_size,
    num_classes = num_classes,
    dim = 1024,
    depth = 1,
    heads = 4,
    mlp_dim = 2048
)


optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

model = model.to(device)
modelV = modelV.to(device)
print(device)






Files already downloaded and verified
Files already downloaded and verified
transformer 1
cuda:3


In [3]:
def validate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(val_loader):
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return 100 * correct / total

In [7]:
# Training loop
for epoch in range(num_epochs):

    # show loss in tqdm
    model.train()
    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for i, (images, labels) in pbar:
        images = images.to(device)
        labels = labels.to(device)

    
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        pbar.set_description(f'Epoch [{epoch + 1}/{num_epochs}] Loss: {loss.item():.4f}')

    # validate
    accuracy = validate(model, test_loader)

    print(f'Epoch [{epoch + 1}/{num_epochs}] Loss: {loss.item():.4f}, Val Accuracy: {accuracy:.4f}')
    

print('Training finished!')

  0%|          | 0/196 [00:00<?, ?it/s]

Epoch [1/10] Loss: 2.2988:   1%|          | 2/196 [00:00<00:29,  6.64it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.2893:   2%|▏         | 4/196 [00:00<00:28,  6.76it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.2714:   3%|▎         | 6/196 [00:00<00:27,  6.92it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.2638:   4%|▍         | 8/196 [00:01<00:25,  7.30it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.2408:   5%|▌         | 10/196 [00:01<00:24,  7.54it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.2058:   6%|▌         | 12/196 [00:01<00:23,  7.72it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.1973:   7%|▋         | 14/196 [00:01<00:23,  7.81it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.1753:   8%|▊         | 16/196 [00:02<00:23,  7.79it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.1454:   9%|▉         | 18/196 [00:02<00:23,  7.59it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.1325:  10%|█         | 20/196 [00:02<00:22,  7.71it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.1310:  11%|█         | 22/196 [00:02<00:22,  7.73it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.0804:  12%|█▏        | 24/196 [00:03<00:21,  7.85it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.0545:  13%|█▎        | 26/196 [00:03<00:22,  7.65it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.0068:  14%|█▍        | 28/196 [00:03<00:22,  7.61it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.0279:  15%|█▌        | 30/196 [00:03<00:21,  7.71it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 2.0463:  16%|█▋        | 32/196 [00:04<00:21,  7.75it/s]

shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])
shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


Epoch [1/10] Loss: 1.9906:  17%|█▋        | 33/196 [00:04<00:21,  7.45it/s]


shapes
torch.Size([256, 1024, 4])
torch.Size([256, 1024])


KeyboardInterrupt: 

In [8]:
X = torch.nn.Parameter(torch.randn(59, 16))

hopfield = KHopfield(N=100, n=16 )

# optimize the hopfield network
optimizer = optim.Adam(hopfield.parameters(), lr=learning_rate)

for i in range(10):
    print(i)
    Y = hopfield(X, 4)
    loss = torch.norm(Y)
    loss.backward()
    print(loss)
    optimizer.step()

0
tensor(0.8274, grad_fn=<CopyBackwards>)
1
tensor(0.7943, grad_fn=<CopyBackwards>)
2
tensor(0.7643, grad_fn=<CopyBackwards>)
3
tensor(0.7368, grad_fn=<CopyBackwards>)
4
tensor(0.7115, grad_fn=<CopyBackwards>)
5
tensor(0.6887, grad_fn=<CopyBackwards>)
6
tensor(0.6683, grad_fn=<CopyBackwards>)
7
tensor(0.6504, grad_fn=<CopyBackwards>)
8
tensor(0.6348, grad_fn=<CopyBackwards>)
9
tensor(0.6215, grad_fn=<CopyBackwards>)


In [33]:
# Initialize the model and optimizer
num_heads = 3


model = VisionTransformer(
    num_classes = num_classes, 
    embed_dim = 1024, 
    num_heads = num_heads, 
    img_size = img_size, 
    patch_size = patch_size, 
    in_channels = in_channels,
)
model = SimpleViT(
    image_size = img_size,
    patch_size = patch_size,
    num_classes = num_classes,
    dim = 1024,
    depth = 1,
    heads = 4,
    mlp_dim = 2048
)

transformer 1


In [4]:
def get_model_and_data(
        data = 'mnist',
        model = 'hopfield',
        batch_size = 256,
        heads = 4,
        dim=256,
        embed_dim=1024):
    if data == 'mnist':
        transform = transforms.Compose([transforms.ToTensor()])
        train_dataset = torchvision.datasets.MNIST(root='~/data', train=True, transform=transforms.ToTensor(), download=True)
        test_dataset = torchvision.datasets.MNIST(root='~/data', train=False, transform=transforms.ToTensor(), download=True)
        img_size = 28
        in_channels = 1
        num_classes = 10

    elif data  == 'cifar10':
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

        train_dataset = torchvision.datasets.CIFAR10(root='~/data', train=True, transform=transform, download=True)
        test_dataset = torchvision.datasets.CIFAR10(root='~/data', train=False, transform=transform, download=True)
        img_size = 32
        in_channels = 3
        num_classes = 10
    else:
        raise Exception('data not found')
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    patch_size = 16

    if model == 'hopfield':
        model = VisionTransformer(
            num_classes = num_classes, 
            embed_dim = embed_dim, 
            dim = dim,
            num_heads = heads, 
            img_size = img_size, 
            patch_size = patch_size, 
            in_channels = in_channels,
        )
    elif model == 'hopfieldV':
        model = VisionTransformerV(
            num_classes = num_classes, 
            embed_dim = embed_dim, 
            dim = dim,
            num_heads = heads, 
            img_size = img_size, 
            patch_size = patch_size, 
            in_channels = in_channels,
        )
    elif model == 'vit':
        model = SimpleViT(
            image_size = img_size,
            patch_size = patch_size,
            num_classes = num_classes,
            dim = dim,
            depth = 1,
            heads = heads,
            mlp_dim = 1024
        )
    return model, train_loader, test_loader

In [5]:
def run_experiment(train_loader, test_loader, model, num_epochs=30, k=1):
    # Initialize the model and optimizer
    model = model.to(device)
    learning_rate = 1e-3
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    val_accuracy = []
    # Training loop
    for epoch in range(num_epochs):

        # show loss in tqdm
        model.train()
        pbar = tqdm(enumerate(train_loader), total=len(train_loader))
        for i, (images, labels) in pbar:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            pbar.set_description(f'K: {k}, Epoch [{epoch + 1}/{num_epochs}] Loss: {loss.item():.4f}')

        # validate
        accuracy = validate(model, test_loader)

        print(f'Epoch [{epoch + 1}/{num_epochs}] Loss: {loss.item():.4f}, Val Accuracy: {accuracy:.4f}')
        val_accuracy.append(accuracy)

    print('Training finished!')

    return val_accuracy

In [11]:
model, train_loader, test_loader = get_model_and_data(data = 'cifar10', model = 'hopfieldV', batch_size = 256, heads = 4, dim=256)

Files already downloaded and verified
Files already downloaded and verified


In [12]:
val_accuracy  = run_experiment(train_loader, test_loader, model)

  0%|          | 0/196 [00:00<?, ?it/s]

Epoch [1/10] Loss: 1.7703:  69%|██████▉   | 135/196 [00:28<00:12,  4.78it/s]



Epoch [1/10] Loss: 1.7718:  81%|████████  | 158/196 [00:33<00:07,  4.80it/s]



Epoch [1/10] Loss: 1.7759: 100%|██████████| 196/196 [00:42<00:00,  4.66it/s]
  0%|          | 0/40 [00:00<?, ?it/s]



  2%|▎         | 1/40 [00:00<00:27,  1.43it/s]



  5%|▌         | 2/40 [00:01<00:26,  1.43it/s]



 10%|█         | 4/40 [00:01<00:11,  3.27it/s]



 12%|█▎        | 5/40 [00:02<00:16,  2.18it/s]



 15%|█▌        | 6/40 [00:02<00:11,  2.93it/s]



 18%|█▊        | 7/40 [00:03<00:15,  2.17it/s]



 32%|███▎      | 13/40 [00:03<00:04,  6.43it/s]



 35%|███▌      | 14/40 [00:04<00:08,  3.11it/s]



 38%|███▊      | 15/40 [00:05<00:10,  2.30it/s]



 42%|████▎     | 17/40 [00:05<00:06,  3.69it/s]



 45%|████▌     | 18/40 [00:06<00:08,  2.51it/s]



 65%|██████▌   | 26/40 [00:07<00:01,  7.78it/s]



 68%|██████▊   | 27/40 [00:07<00:03,  3.30it/s]



 70%|███████   | 28/40 [00:08<00:05,  2.37it/s]



 75%|███████▌  | 30/40 [00:08<00:02,  3.79it/s]



 78%|███████▊  | 31/40 [00:09<00:03,  2.50it/s]



 85%|████████▌ | 34/40 [00:09<00:01,  4.74it/s]



 88%|████████▊ | 35/40 [00:10<00:01,  2.79it/s]



 90%|█████████ | 36/40 [00:10<00:01,  3.49it/s]



 92%|█████████▎| 37/40 [00:11<00:01,  2.43it/s]



 95%|█████████▌| 38/40 [00:11<00:00,  3.10it/s]



100%|██████████| 40/40 [00:12<00:00,  3.28it/s]


Epoch [1/10] Loss: 1.7759, Val Accuracy: 37.1900


  0%|          | 0/196 [00:00<?, ?it/s]



Epoch [2/10] Loss: 1.7572:   1%|          | 2/196 [00:01<01:27,  2.21it/s]



Epoch [2/10] Loss: 1.6693:   4%|▎         | 7/196 [00:02<00:52,  3.61it/s]



Epoch [2/10] Loss: 1.6940:   6%|▌         | 11/196 [00:04<00:52,  3.50it/s]



Epoch [2/10] Loss: 1.7229:   7%|▋         | 13/196 [00:05<01:07,  2.69it/s]



Epoch [2/10] Loss: 1.6652:   7%|▋         | 14/196 [00:05<01:30,  2.01it/s]



Epoch [2/10] Loss: 1.7735:   8%|▊         | 15/196 [00:06<01:46,  1.70it/s]



Epoch [2/10] Loss: 1.7192:  10%|▉         | 19/196 [00:08<01:03,  2.79it/s]



Epoch [2/10] Loss: 1.7291:  11%|█         | 21/196 [00:09<01:10,  2.46it/s]



Epoch [2/10] Loss: 1.8094:  12%|█▏        | 23/196 [00:10<01:14,  2.33it/s]



Epoch [2/10] Loss: 1.7165:  13%|█▎        | 26/196 [00:11<01:02,  2.71it/s]



Epoch [2/10] Loss: 1.7220:  14%|█▍        | 27/196 [00:12<01:23,  2.01it/s]



Epoch [2/10] Loss: 1.6341:  15%|█▍        | 29/196 [00:13<01:19,  2.11it/s]



Epoch [2/10] Loss: 1.6674:  16%|█▌        | 31/196 [00:14<01:16,  2.15it/s]



Epoch [2/10] Loss: 1.7895:  17%|█▋        | 34/196 [00:15<01:13,  2.22it/s]


KeyboardInterrupt: 

In [20]:
ks = [1, 4, 8, 16]

df = pd.DataFrame(columns = ['k', 'accuracy', 'model', 'epoch'])
data = 'cifar10'
model = 'hopfield'
for k in ks:
    model, train_loader, test_loader = get_model_and_data(data = data, model = model, batch_size = 256, heads = k, dim=100)
    val_accuracy  = run_experiment(train_loader, test_loader, model, num_epochs=30, k =k )
    # add every val accuracy to dataframe
    for i, acc in enumerate(val_accuracy):
        df = df.append({'k': k, 'accuracy': acc, 'model': 'hopfield', 'epoch': i}, ignore_index=True)
    
    # save dataframe
    df.to_csv(f'./results/{data}_{model}_heads.csv')

Files already downloaded and verified
Files already downloaded and verified


K: 1, Epoch [1/30] Loss: 1.9899:  76%|███████▌  | 148/196 [00:12<00:04, 10.00it/s]



K: 1, Epoch [1/30] Loss: 1.8530:  93%|█████████▎| 182/196 [00:16<00:01,  9.84it/s]



K: 1, Epoch [1/30] Loss: 1.8990:  95%|█████████▍| 186/196 [00:16<00:01,  9.06it/s]



K: 1, Epoch [1/30] Loss: 1.7691:  97%|█████████▋| 190/196 [00:17<00:00, 10.29it/s]



K: 1, Epoch [1/30] Loss: 1.7936:  98%|█████████▊| 192/196 [00:17<00:00,  7.22it/s]



K: 1, Epoch [1/30] Loss: 1.8571:  99%|█████████▉| 195/196 [00:17<00:00,  7.60it/s]



K: 1, Epoch [1/30] Loss: 1.8542: 100%|██████████| 196/196 [00:17<00:00, 11.03it/s]
  2%|▎         | 1/40 [00:00<00:08,  4.48it/s]



 10%|█         | 4/40 [00:00<00:04,  7.76it/s]



 12%|█▎        | 5/40 [00:00<00:05,  6.33it/s]



 22%|██▎       | 9/40 [00:01<00:03,  8.47it/s]



 30%|███       | 12/40 [00:01<00:03,  8.88it/s]



 35%|███▌      | 14/40 [00:02<00:03,  6.66it/s]



 42%|████▎     | 17/40 [00:02<00:02,  7.71it/s]



 50%|█████     | 20/40 [00:02<00:02,  8.38it/s]



 65%|██████▌   | 26/40 [00:03<00:01,  9.90it/s]



 75%|███████▌  | 30/40 [00:03<00:01,  8.62it/s]



 80%|████████  | 32/40 [00:04<00:00,  8.03it/s]



 82%|████████▎ | 33/40 [00:04<00:00,  7.05it/s]



 88%|████████▊ | 35/40 [00:04<00:00,  7.02it/s]



 92%|█████████▎| 37/40 [00:04<00:00,  7.00it/s]



100%|██████████| 40/40 [00:05<00:00,  7.58it/s]


Epoch [1/30] Loss: 1.8542, Val Accuracy: 33.0700


K: 1, Epoch [2/30] Loss: 1.8611:   1%|          | 1/196 [00:00<00:49,  3.98it/s]



K: 1, Epoch [2/30] Loss: 1.8520:   2%|▏         | 4/196 [00:00<00:27,  7.00it/s]



K: 1, Epoch [2/30] Loss: 1.9077:   4%|▍         | 8/196 [00:01<00:19,  9.75it/s]



K: 1, Epoch [2/30] Loss: 1.9280:   6%|▌         | 12/196 [00:01<00:23,  7.89it/s]



K: 1, Epoch [2/30] Loss: 1.8902:   7%|▋         | 14/196 [00:02<00:25,  7.28it/s]



K: 1, Epoch [2/30] Loss: 1.9837:   9%|▊         | 17/196 [00:02<00:24,  7.34it/s]



K: 1, Epoch [2/30] Loss: 1.8473:   9%|▉         | 18/196 [00:02<00:27,  6.37it/s]



K: 1, Epoch [2/30] Loss: 1.8863:  10%|█         | 20/196 [00:02<00:27,  6.32it/s]



K: 1, Epoch [2/30] Loss: 1.7815:  11%|█         | 21/196 [00:03<00:30,  5.76it/s]



K: 1, Epoch [2/30] Loss: 1.8763:  12%|█▏        | 23/196 [00:03<00:29,  5.93it/s]



K: 1, Epoch [2/30] Loss: 1.9251:  14%|█▍        | 27/196 [00:03<00:23,  7.24it/s]



K: 1, Epoch [2/30] Loss: 1.9585:  16%|█▋        | 32/196 [00:04<00:21,  7.60it/s]



K: 1, Epoch [2/30] Loss: 1.8533:  18%|█▊        | 36/196 [00:05<00:19,  8.19it/s]



K: 1, Epoch [2/30] Loss: 1.7738:  19%|█▉        | 37/196 [00:05<00:22,  6.97it/s]



K: 1, Epoch [2/30] Loss: 1.9169:  19%|█▉        | 38/196 [00:05<00:25,  6.13it/s]



K: 1, Epoch [2/30] Loss: 1.8123:  20%|█▉        | 39/196 [00:05<00:28,  5.58it/s]



K: 1, Epoch [2/30] Loss: 1.7795:  20%|██        | 40/196 [00:06<00:30,  5.18it/s]



K: 1, Epoch [2/30] Loss: 1.8319:  21%|██▏       | 42/196 [00:06<00:27,  5.60it/s]



K: 1, Epoch [2/30] Loss: 1.8156:  22%|██▏       | 43/196 [00:06<00:29,  5.22it/s]



K: 1, Epoch [2/30] Loss: 1.8617:  23%|██▎       | 46/196 [00:06<00:23,  6.51it/s]



K: 1, Epoch [2/30] Loss: 1.8907:  24%|██▍       | 47/196 [00:07<00:26,  5.69it/s]



K: 1, Epoch [2/30] Loss: 1.9927:  24%|██▍       | 48/196 [00:07<00:28,  5.25it/s]



K: 1, Epoch [2/30] Loss: 1.8434:  26%|██▌       | 50/196 [00:07<00:26,  5.59it/s]



K: 1, Epoch [2/30] Loss: 1.8015:  27%|██▋       | 52/196 [00:08<00:24,  5.84it/s]



K: 1, Epoch [2/30] Loss: 1.7920:  27%|██▋       | 53/196 [00:08<00:26,  5.42it/s]



K: 1, Epoch [2/30] Loss: 1.8738:  28%|██▊       | 54/196 [00:08<00:27,  5.11it/s]



K: 1, Epoch [2/30] Loss: 1.8394:  28%|██▊       | 55/196 [00:08<00:29,  4.84it/s]



K: 1, Epoch [2/30] Loss: 1.8135:  29%|██▊       | 56/196 [00:08<00:30,  4.63it/s]



K: 1, Epoch [2/30] Loss: 1.8603:  29%|██▉       | 57/196 [00:09<00:30,  4.52it/s]



K: 1, Epoch [2/30] Loss: 1.8803:  30%|██▉       | 58/196 [00:09<00:31,  4.45it/s]



K: 1, Epoch [2/30] Loss: 1.7925:  30%|███       | 59/196 [00:09<00:31,  4.40it/s]



K: 1, Epoch [2/30] Loss: 1.8380:  31%|███       | 60/196 [00:10<00:31,  4.34it/s]



K: 1, Epoch [2/30] Loss: 1.7775:  32%|███▏      | 62/196 [00:10<00:26,  5.05it/s]



K: 1, Epoch [2/30] Loss: 1.8727:  32%|███▏      | 63/196 [00:10<00:27,  4.80it/s]



K: 1, Epoch [2/30] Loss: 1.8561:  33%|███▎      | 64/196 [00:10<00:28,  4.65it/s]



K: 1, Epoch [2/30] Loss: 1.8545:  33%|███▎      | 65/196 [00:11<00:29,  4.50it/s]



K: 1, Epoch [2/30] Loss: 1.8048:  34%|███▍      | 67/196 [00:11<00:25,  5.13it/s]



K: 1, Epoch [2/30] Loss: 1.8164:  35%|███▍      | 68/196 [00:11<00:26,  4.87it/s]



K: 1, Epoch [2/30] Loss: 1.8352:  35%|███▌      | 69/196 [00:11<00:27,  4.66it/s]



K: 1, Epoch [2/30] Loss: 1.7518:  36%|███▌      | 71/196 [00:12<00:23,  5.22it/s]



K: 1, Epoch [2/30] Loss: 1.7588:  37%|███▋      | 72/196 [00:12<00:25,  4.95it/s]



K: 1, Epoch [2/30] Loss: 1.8408:  38%|███▊      | 74/196 [00:12<00:22,  5.40it/s]



K: 1, Epoch [2/30] Loss: 1.8350:  38%|███▊      | 75/196 [00:12<00:23,  5.05it/s]



K: 1, Epoch [2/30] Loss: 1.8158:  39%|███▉      | 76/196 [00:13<00:24,  4.84it/s]



K: 1, Epoch [2/30] Loss: 1.8571:  39%|███▉      | 77/196 [00:13<00:25,  4.65it/s]



K: 1, Epoch [2/30] Loss: 1.7974:  40%|███▉      | 78/196 [00:13<00:26,  4.52it/s]



K: 1, Epoch [2/30] Loss: 1.7491:  41%|████▏     | 81/196 [00:13<00:18,  6.11it/s]



K: 1, Epoch [2/30] Loss: 1.7525:  42%|████▏     | 82/196 [00:14<00:20,  5.52it/s]



K: 1, Epoch [2/30] Loss: 1.8710:  42%|████▏     | 83/196 [00:14<00:22,  5.13it/s]



K: 1, Epoch [2/30] Loss: 1.8553:  43%|████▎     | 85/196 [00:14<00:20,  5.48it/s]



K: 1, Epoch [2/30] Loss: 1.7969:  44%|████▍     | 86/196 [00:15<00:21,  5.14it/s]



K: 1, Epoch [2/30] Loss: 1.8126:  44%|████▍     | 87/196 [00:15<00:22,  4.88it/s]



K: 1, Epoch [2/30] Loss: 1.8169:  45%|████▍     | 88/196 [00:15<00:23,  4.68it/s]



K: 1, Epoch [2/30] Loss: 1.9193:  45%|████▌     | 89/196 [00:15<00:23,  4.47it/s]



K: 1, Epoch [2/30] Loss: 1.8370:  46%|████▌     | 90/196 [00:15<00:24,  4.41it/s]



K: 1, Epoch [2/30] Loss: 1.8569:  46%|████▋     | 91/196 [00:16<00:24,  4.35it/s]



K: 1, Epoch [2/30] Loss: 1.7152:  47%|████▋     | 92/196 [00:16<00:24,  4.26it/s]



K: 1, Epoch [2/30] Loss: 1.8830:  47%|████▋     | 93/196 [00:16<00:24,  4.25it/s]



K: 1, Epoch [2/30] Loss: 1.8585:  48%|████▊     | 94/196 [00:16<00:24,  4.25it/s]



K: 1, Epoch [2/30] Loss: 1.9286:  48%|████▊     | 95/196 [00:17<00:23,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.8968:  49%|████▉     | 96/196 [00:17<00:23,  4.24it/s]



K: 1, Epoch [2/30] Loss: 1.7920:  49%|████▉     | 97/196 [00:17<00:23,  4.24it/s]



K: 1, Epoch [2/30] Loss: 1.8342:  50%|█████     | 98/196 [00:17<00:23,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.7740:  51%|█████     | 100/196 [00:18<00:19,  4.99it/s]



K: 1, Epoch [2/30] Loss: 1.8821:  52%|█████▏    | 101/196 [00:18<00:19,  4.80it/s]



K: 1, Epoch [2/30] Loss: 1.9005:  52%|█████▏    | 102/196 [00:18<00:20,  4.65it/s]



K: 1, Epoch [2/30] Loss: 1.8364:  53%|█████▎    | 104/196 [00:19<00:17,  5.19it/s]



K: 1, Epoch [2/30] Loss: 1.8242:  54%|█████▎    | 105/196 [00:19<00:18,  4.93it/s]



K: 1, Epoch [2/30] Loss: 1.8387:  54%|█████▍    | 106/196 [00:19<00:19,  4.68it/s]



K: 1, Epoch [2/30] Loss: 1.8926:  55%|█████▍    | 107/196 [00:19<00:19,  4.56it/s]



K: 1, Epoch [2/30] Loss: 1.9492:  55%|█████▌    | 108/196 [00:19<00:19,  4.46it/s]



K: 1, Epoch [2/30] Loss: 1.9252:  56%|█████▌    | 109/196 [00:20<00:19,  4.41it/s]



K: 1, Epoch [2/30] Loss: 1.8338:  56%|█████▌    | 110/196 [00:20<00:19,  4.36it/s]



K: 1, Epoch [2/30] Loss: 1.8259:  57%|█████▋    | 111/196 [00:20<00:19,  4.32it/s]



K: 1, Epoch [2/30] Loss: 1.8646:  57%|█████▋    | 112/196 [00:20<00:19,  4.28it/s]



K: 1, Epoch [2/30] Loss: 1.8612:  58%|█████▊    | 113/196 [00:21<00:19,  4.27it/s]



K: 1, Epoch [2/30] Loss: 1.8612:  58%|█████▊    | 114/196 [00:21<00:19,  4.18it/s]



K: 1, Epoch [2/30] Loss: 1.8925:  59%|█████▊    | 115/196 [00:21<00:19,  4.18it/s]



K: 1, Epoch [2/30] Loss: 1.8544:  59%|█████▉    | 116/196 [00:21<00:19,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8717:  60%|█████▉    | 117/196 [00:22<00:18,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.8034:  60%|██████    | 118/196 [00:22<00:18,  4.16it/s]



K: 1, Epoch [2/30] Loss: 1.9321:  61%|██████    | 119/196 [00:22<00:18,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.9134:  61%|██████    | 120/196 [00:22<00:18,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.8338:  62%|██████▏   | 121/196 [00:23<00:17,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.8800:  62%|██████▏   | 122/196 [00:23<00:17,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.8492:  63%|██████▎   | 123/196 [00:23<00:17,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.8678:  63%|██████▎   | 124/196 [00:23<00:17,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.7722:  64%|██████▍   | 125/196 [00:23<00:16,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.8888:  64%|██████▍   | 126/196 [00:24<00:16,  4.22it/s]



K: 1, Epoch [2/30] Loss: 1.8694:  65%|██████▍   | 127/196 [00:24<00:16,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.9014:  65%|██████▌   | 128/196 [00:24<00:16,  4.24it/s]



K: 1, Epoch [2/30] Loss: 1.9203:  66%|██████▌   | 129/196 [00:24<00:15,  4.24it/s]



K: 1, Epoch [2/30] Loss: 1.7996:  66%|██████▋   | 130/196 [00:25<00:15,  4.24it/s]



K: 1, Epoch [2/30] Loss: 1.8399:  67%|██████▋   | 131/196 [00:25<00:15,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.9247:  67%|██████▋   | 132/196 [00:25<00:15,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.8926:  68%|██████▊   | 133/196 [00:25<00:14,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8266:  68%|██████▊   | 134/196 [00:26<00:14,  4.22it/s]



K: 1, Epoch [2/30] Loss: 1.8527:  69%|██████▉   | 135/196 [00:26<00:14,  4.22it/s]



K: 1, Epoch [2/30] Loss: 1.9292:  69%|██████▉   | 136/196 [00:26<00:14,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.7759:  70%|██████▉   | 137/196 [00:26<00:13,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.9032:  70%|███████   | 138/196 [00:27<00:13,  4.22it/s]



K: 1, Epoch [2/30] Loss: 1.8769:  71%|███████   | 139/196 [00:27<00:13,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.8090:  71%|███████▏  | 140/196 [00:27<00:13,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.9021:  72%|███████▏  | 141/196 [00:27<00:12,  4.23it/s]



K: 1, Epoch [2/30] Loss: 1.9189:  72%|███████▏  | 142/196 [00:28<00:12,  4.25it/s]



K: 1, Epoch [2/30] Loss: 1.8498:  73%|███████▎  | 143/196 [00:28<00:12,  4.24it/s]



K: 1, Epoch [2/30] Loss: 1.8952:  73%|███████▎  | 144/196 [00:28<00:12,  4.16it/s]



K: 1, Epoch [2/30] Loss: 1.7782:  74%|███████▍  | 145/196 [00:28<00:12,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.8329:  74%|███████▍  | 146/196 [00:28<00:11,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8795:  75%|███████▌  | 147/196 [00:29<00:11,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8600:  76%|███████▌  | 148/196 [00:29<00:11,  4.15it/s]



K: 1, Epoch [2/30] Loss: 1.8186:  76%|███████▌  | 149/196 [00:29<00:11,  4.18it/s]



K: 1, Epoch [2/30] Loss: 1.8434:  77%|███████▋  | 150/196 [00:29<00:11,  4.17it/s]



K: 1, Epoch [2/30] Loss: 1.8396:  77%|███████▋  | 151/196 [00:30<00:10,  4.18it/s]



K: 1, Epoch [2/30] Loss: 1.8249:  78%|███████▊  | 152/196 [00:30<00:10,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8487:  78%|███████▊  | 153/196 [00:30<00:10,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.9165:  79%|███████▊  | 154/196 [00:30<00:09,  4.22it/s]



K: 1, Epoch [2/30] Loss: 1.8733:  79%|███████▉  | 155/196 [00:31<00:09,  4.16it/s]



K: 1, Epoch [2/30] Loss: 1.7991:  80%|███████▉  | 156/196 [00:31<00:09,  4.17it/s]



K: 1, Epoch [2/30] Loss: 1.8263:  80%|████████  | 157/196 [00:31<00:09,  4.18it/s]



K: 1, Epoch [2/30] Loss: 1.7862:  81%|████████  | 158/196 [00:31<00:09,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.8723:  81%|████████  | 159/196 [00:32<00:08,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.9020:  82%|████████▏ | 160/196 [00:32<00:08,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8715:  82%|████████▏ | 161/196 [00:32<00:08,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8353:  83%|████████▎ | 162/196 [00:32<00:08,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.9178:  83%|████████▎ | 163/196 [00:33<00:07,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.9013:  84%|████████▎ | 164/196 [00:33<00:07,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8487:  84%|████████▍ | 165/196 [00:33<00:07,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.7987:  85%|████████▍ | 166/196 [00:33<00:07,  4.18it/s]



K: 1, Epoch [2/30] Loss: 1.8996:  85%|████████▌ | 167/196 [00:33<00:06,  4.16it/s]



K: 1, Epoch [2/30] Loss: 1.8788:  86%|████████▌ | 168/196 [00:34<00:06,  4.17it/s]



K: 1, Epoch [2/30] Loss: 1.8384:  86%|████████▌ | 169/196 [00:34<00:06,  4.17it/s]



K: 1, Epoch [2/30] Loss: 1.8326:  87%|████████▋ | 170/196 [00:34<00:06,  4.18it/s]



K: 1, Epoch [2/30] Loss: 1.7811:  87%|████████▋ | 171/196 [00:34<00:05,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.8271:  88%|████████▊ | 172/196 [00:35<00:05,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.8874:  88%|████████▊ | 173/196 [00:35<00:05,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8218:  89%|████████▉ | 174/196 [00:35<00:05,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.9350:  89%|████████▉ | 175/196 [00:35<00:04,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.7313:  90%|████████▉ | 176/196 [00:36<00:04,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.8227:  90%|█████████ | 177/196 [00:36<00:04,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.9246:  91%|█████████ | 178/196 [00:36<00:04,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.8428:  91%|█████████▏| 179/196 [00:36<00:04,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.9930:  92%|█████████▏| 180/196 [00:37<00:03,  4.21it/s]



K: 1, Epoch [2/30] Loss: 1.8473:  92%|█████████▏| 181/196 [00:37<00:03,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.7693:  93%|█████████▎| 182/196 [00:37<00:03,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8232:  93%|█████████▎| 183/196 [00:37<00:03,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.7130:  94%|█████████▍| 184/196 [00:38<00:02,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.7489:  94%|█████████▍| 185/196 [00:38<00:02,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.9188:  95%|█████████▍| 186/196 [00:38<00:02,  4.18it/s]



K: 1, Epoch [2/30] Loss: 1.8220:  95%|█████████▌| 187/196 [00:38<00:02,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.8067:  96%|█████████▌| 188/196 [00:38<00:01,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8143:  96%|█████████▋| 189/196 [00:39<00:01,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.8594:  97%|█████████▋| 190/196 [00:39<00:01,  4.20it/s]



K: 1, Epoch [2/30] Loss: 1.7935:  97%|█████████▋| 191/196 [00:39<00:01,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.8764:  98%|█████████▊| 192/196 [00:39<00:00,  4.19it/s]



K: 1, Epoch [2/30] Loss: 1.7929:  98%|█████████▊| 193/196 [00:40<00:00,  4.17it/s]



K: 1, Epoch [2/30] Loss: 1.8925:  99%|█████████▉| 194/196 [00:40<00:00,  4.18it/s]



K: 1, Epoch [2/30] Loss: 1.9319: 100%|██████████| 196/196 [00:40<00:00,  4.47it/s]



K: 1, Epoch [2/30] Loss: 1.9319: 100%|██████████| 196/196 [00:40<00:00,  4.80it/s]
  2%|▎         | 1/40 [00:00<00:08,  4.37it/s]



  5%|▌         | 2/40 [00:00<00:08,  4.37it/s]



  8%|▊         | 3/40 [00:00<00:08,  4.39it/s]



 10%|█         | 4/40 [00:00<00:08,  4.40it/s]



 12%|█▎        | 5/40 [00:01<00:07,  4.41it/s]



 15%|█▌        | 6/40 [00:01<00:07,  4.42it/s]



 18%|█▊        | 7/40 [00:01<00:07,  4.41it/s]



 20%|██        | 8/40 [00:01<00:07,  4.39it/s]



 22%|██▎       | 9/40 [00:02<00:07,  4.39it/s]



 25%|██▌       | 10/40 [00:02<00:06,  4.38it/s]



 28%|██▊       | 11/40 [00:02<00:06,  4.39it/s]



 30%|███       | 12/40 [00:02<00:06,  4.40it/s]



 32%|███▎      | 13/40 [00:02<00:06,  4.39it/s]



 35%|███▌      | 14/40 [00:03<00:05,  4.41it/s]



 38%|███▊      | 15/40 [00:03<00:05,  4.42it/s]



 40%|████      | 16/40 [00:03<00:05,  4.44it/s]



 42%|████▎     | 17/40 [00:03<00:05,  4.41it/s]



 45%|████▌     | 18/40 [00:04<00:04,  4.40it/s]



 48%|████▊     | 19/40 [00:04<00:04,  4.41it/s]



 50%|█████     | 20/40 [00:04<00:04,  4.39it/s]



 52%|█████▎    | 21/40 [00:04<00:04,  4.39it/s]



 55%|█████▌    | 22/40 [00:04<00:04,  4.40it/s]



 57%|█████▊    | 23/40 [00:05<00:03,  4.41it/s]



 60%|██████    | 24/40 [00:05<00:03,  4.40it/s]



 62%|██████▎   | 25/40 [00:05<00:03,  4.41it/s]



 65%|██████▌   | 26/40 [00:05<00:03,  4.43it/s]



 68%|██████▊   | 27/40 [00:06<00:02,  4.42it/s]



 70%|███████   | 28/40 [00:06<00:02,  4.42it/s]



 72%|███████▎  | 29/40 [00:06<00:02,  4.43it/s]



 75%|███████▌  | 30/40 [00:06<00:02,  4.45it/s]



 78%|███████▊  | 31/40 [00:07<00:02,  4.44it/s]



 80%|████████  | 32/40 [00:07<00:01,  4.44it/s]



 82%|████████▎ | 33/40 [00:07<00:01,  4.37it/s]



 85%|████████▌ | 34/40 [00:07<00:01,  4.40it/s]



 88%|████████▊ | 35/40 [00:07<00:01,  4.42it/s]



 90%|█████████ | 36/40 [00:08<00:00,  4.44it/s]



 92%|█████████▎| 37/40 [00:08<00:00,  4.46it/s]



 95%|█████████▌| 38/40 [00:08<00:00,  4.47it/s]



100%|██████████| 40/40 [00:08<00:00,  4.52it/s]


Epoch [2/30] Loss: 1.9319, Val Accuracy: 33.0100


K: 1, Epoch [3/30] Loss: 1.8537:   1%|          | 1/196 [00:00<00:47,  4.14it/s]



K: 1, Epoch [3/30] Loss: 1.7299:   1%|          | 2/196 [00:00<00:45,  4.23it/s]



K: 1, Epoch [3/30] Loss: 1.7777:   2%|▏         | 3/196 [00:00<00:45,  4.25it/s]



K: 1, Epoch [3/30] Loss: 1.8558:   2%|▏         | 4/196 [00:00<00:45,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.8022:   3%|▎         | 5/196 [00:01<00:45,  4.19it/s]



K: 1, Epoch [3/30] Loss: 1.7879:   3%|▎         | 6/196 [00:01<00:45,  4.19it/s]



K: 1, Epoch [3/30] Loss: 1.7184:   4%|▎         | 7/196 [00:01<00:44,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.8222:   4%|▍         | 8/196 [00:01<00:44,  4.24it/s]



K: 1, Epoch [3/30] Loss: 1.7229:   5%|▍         | 9/196 [00:02<00:43,  4.25it/s]



K: 1, Epoch [3/30] Loss: 1.8068:   5%|▌         | 10/196 [00:02<00:43,  4.26it/s]



K: 1, Epoch [3/30] Loss: 1.7273:   6%|▌         | 11/196 [00:02<00:43,  4.27it/s]



K: 1, Epoch [3/30] Loss: 1.8521:   6%|▌         | 12/196 [00:02<00:43,  4.28it/s]



K: 1, Epoch [3/30] Loss: 1.8538:   7%|▋         | 13/196 [00:03<00:42,  4.29it/s]



K: 1, Epoch [3/30] Loss: 1.8310:   7%|▋         | 14/196 [00:03<00:42,  4.30it/s]



K: 1, Epoch [3/30] Loss: 1.7933:   8%|▊         | 15/196 [00:03<00:42,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.8064:   8%|▊         | 16/196 [00:03<00:42,  4.24it/s]



K: 1, Epoch [3/30] Loss: 1.7806:   9%|▊         | 17/196 [00:04<00:42,  4.25it/s]



K: 1, Epoch [3/30] Loss: 1.8541:   9%|▉         | 18/196 [00:04<00:41,  4.26it/s]



K: 1, Epoch [3/30] Loss: 1.8876:  10%|▉         | 19/196 [00:04<00:41,  4.26it/s]



K: 1, Epoch [3/30] Loss: 1.8501:  10%|█         | 20/196 [00:04<00:41,  4.27it/s]



K: 1, Epoch [3/30] Loss: 1.9309:  11%|█         | 21/196 [00:04<00:40,  4.28it/s]



K: 1, Epoch [3/30] Loss: 1.8526:  11%|█         | 22/196 [00:05<00:40,  4.28it/s]



K: 1, Epoch [3/30] Loss: 1.8252:  12%|█▏        | 23/196 [00:05<00:40,  4.29it/s]



K: 1, Epoch [3/30] Loss: 1.7535:  12%|█▏        | 24/196 [00:05<00:40,  4.27it/s]



K: 1, Epoch [3/30] Loss: 1.8136:  13%|█▎        | 25/196 [00:05<00:40,  4.25it/s]



K: 1, Epoch [3/30] Loss: 1.7855:  13%|█▎        | 26/196 [00:06<00:39,  4.26it/s]



K: 1, Epoch [3/30] Loss: 1.8781:  14%|█▍        | 27/196 [00:06<00:39,  4.28it/s]



K: 1, Epoch [3/30] Loss: 1.8440:  14%|█▍        | 28/196 [00:06<00:39,  4.27it/s]



K: 1, Epoch [3/30] Loss: 1.8570:  15%|█▍        | 29/196 [00:06<00:39,  4.27it/s]



K: 1, Epoch [3/30] Loss: 1.8347:  15%|█▌        | 30/196 [00:07<00:38,  4.29it/s]



K: 1, Epoch [3/30] Loss: 1.8313:  16%|█▌        | 31/196 [00:07<00:38,  4.29it/s]



K: 1, Epoch [3/30] Loss: 1.7427:  16%|█▋        | 32/196 [00:07<00:38,  4.27it/s]



K: 1, Epoch [3/30] Loss: 1.7586:  17%|█▋        | 33/196 [00:07<00:38,  4.25it/s]



K: 1, Epoch [3/30] Loss: 1.7227:  17%|█▋        | 34/196 [00:07<00:38,  4.26it/s]



K: 1, Epoch [3/30] Loss: 1.7584:  18%|█▊        | 35/196 [00:08<00:37,  4.27it/s]



K: 1, Epoch [3/30] Loss: 1.7656:  18%|█▊        | 36/196 [00:08<00:37,  4.27it/s]



K: 1, Epoch [3/30] Loss: 1.7956:  19%|█▉        | 37/196 [00:08<00:37,  4.23it/s]



K: 1, Epoch [3/30] Loss: 1.7679:  19%|█▉        | 38/196 [00:08<00:37,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.7764:  20%|█▉        | 39/196 [00:09<00:37,  4.23it/s]



K: 1, Epoch [3/30] Loss: 1.8579:  20%|██        | 40/196 [00:09<00:36,  4.23it/s]



K: 1, Epoch [3/30] Loss: 1.7116:  21%|██        | 41/196 [00:09<00:36,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.9185:  21%|██▏       | 42/196 [00:09<00:36,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.7778:  22%|██▏       | 43/196 [00:10<00:36,  4.15it/s]



K: 1, Epoch [3/30] Loss: 1.7657:  22%|██▏       | 44/196 [00:10<00:37,  4.06it/s]



K: 1, Epoch [3/30] Loss: 1.8380:  23%|██▎       | 45/196 [00:10<00:36,  4.08it/s]



K: 1, Epoch [3/30] Loss: 1.6845:  23%|██▎       | 46/196 [00:10<00:36,  4.13it/s]



K: 1, Epoch [3/30] Loss: 1.8373:  24%|██▍       | 47/196 [00:11<00:36,  4.14it/s]



K: 1, Epoch [3/30] Loss: 1.8499:  24%|██▍       | 48/196 [00:11<00:35,  4.13it/s]



K: 1, Epoch [3/30] Loss: 1.8513:  25%|██▌       | 49/196 [00:11<00:35,  4.16it/s]



K: 1, Epoch [3/30] Loss: 1.8232:  26%|██▌       | 50/196 [00:11<00:35,  4.17it/s]



K: 1, Epoch [3/30] Loss: 1.8406:  26%|██▌       | 51/196 [00:12<00:34,  4.17it/s]



K: 1, Epoch [3/30] Loss: 1.8447:  27%|██▋       | 52/196 [00:12<00:34,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.8409:  27%|██▋       | 53/196 [00:12<00:33,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.7476:  28%|██▊       | 54/196 [00:12<00:33,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.7478:  28%|██▊       | 55/196 [00:13<00:33,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.7915:  29%|██▊       | 56/196 [00:13<00:33,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.8429:  29%|██▉       | 57/196 [00:13<00:33,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.7891:  30%|██▉       | 58/196 [00:13<00:33,  4.18it/s]



K: 1, Epoch [3/30] Loss: 1.9827:  30%|███       | 59/196 [00:13<00:32,  4.19it/s]



K: 1, Epoch [3/30] Loss: 1.8593:  31%|███       | 60/196 [00:14<00:32,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.7693:  31%|███       | 61/196 [00:14<00:32,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.9221:  32%|███▏      | 62/196 [00:14<00:31,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.8728:  32%|███▏      | 63/196 [00:14<00:31,  4.23it/s]



K: 1, Epoch [3/30] Loss: 1.8274:  33%|███▎      | 64/196 [00:15<00:31,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.7984:  33%|███▎      | 65/196 [00:15<00:35,  3.73it/s]



K: 1, Epoch [3/30] Loss: 1.8449:  34%|███▎      | 66/196 [00:15<00:33,  3.84it/s]



K: 1, Epoch [3/30] Loss: 1.7503:  34%|███▍      | 67/196 [00:15<00:32,  3.97it/s]



K: 1, Epoch [3/30] Loss: 1.7989:  35%|███▍      | 68/196 [00:16<00:31,  4.02it/s]



K: 1, Epoch [3/30] Loss: 1.7256:  35%|███▌      | 69/196 [00:16<00:31,  4.04it/s]



K: 1, Epoch [3/30] Loss: 1.7261:  36%|███▌      | 70/196 [00:16<00:30,  4.11it/s]



K: 1, Epoch [3/30] Loss: 1.8229:  36%|███▌      | 71/196 [00:16<00:30,  4.10it/s]



K: 1, Epoch [3/30] Loss: 1.7543:  37%|███▋      | 72/196 [00:17<00:29,  4.17it/s]



K: 1, Epoch [3/30] Loss: 1.7199:  37%|███▋      | 73/196 [00:17<00:29,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.8722:  38%|███▊      | 74/196 [00:17<00:29,  4.17it/s]



K: 1, Epoch [3/30] Loss: 1.6632:  38%|███▊      | 75/196 [00:17<00:28,  4.18it/s]



K: 1, Epoch [3/30] Loss: 1.7836:  39%|███▉      | 76/196 [00:18<00:28,  4.17it/s]



K: 1, Epoch [3/30] Loss: 1.7513:  39%|███▉      | 77/196 [00:18<00:28,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.7688:  40%|███▉      | 78/196 [00:18<00:28,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.8924:  40%|████      | 79/196 [00:18<00:27,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.7598:  41%|████      | 80/196 [00:19<00:27,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.7841:  41%|████▏     | 81/196 [00:19<00:27,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.8420:  42%|████▏     | 82/196 [00:19<00:27,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.7326:  42%|████▏     | 83/196 [00:19<00:27,  4.13it/s]



K: 1, Epoch [3/30] Loss: 1.7380:  43%|████▎     | 84/196 [00:20<00:26,  4.16it/s]



K: 1, Epoch [3/30] Loss: 1.8334:  43%|████▎     | 85/196 [00:20<00:26,  4.17it/s]



K: 1, Epoch [3/30] Loss: 1.7611:  44%|████▍     | 86/196 [00:20<00:26,  4.18it/s]



K: 1, Epoch [3/30] Loss: 1.8490:  44%|████▍     | 87/196 [00:20<00:26,  4.19it/s]



K: 1, Epoch [3/30] Loss: 1.8900:  45%|████▍     | 88/196 [00:20<00:25,  4.19it/s]



K: 1, Epoch [3/30] Loss: 1.8322:  45%|████▌     | 89/196 [00:21<00:25,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.8066:  46%|████▌     | 90/196 [00:21<00:25,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.8004:  46%|████▋     | 91/196 [00:21<00:24,  4.23it/s]



K: 1, Epoch [3/30] Loss: 1.8723:  47%|████▋     | 92/196 [00:21<00:24,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.8025:  47%|████▋     | 93/196 [00:22<00:24,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.8582:  48%|████▊     | 94/196 [00:22<00:24,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.8161:  48%|████▊     | 95/196 [00:22<00:23,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.7092:  49%|████▉     | 96/196 [00:22<00:23,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.8911:  49%|████▉     | 97/196 [00:23<00:23,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.6908:  50%|█████     | 98/196 [00:23<00:23,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.7806:  51%|█████     | 99/196 [00:23<00:23,  4.20it/s]



K: 1, Epoch [3/30] Loss: 1.8236:  51%|█████     | 100/196 [00:23<00:22,  4.19it/s]



K: 1, Epoch [3/30] Loss: 1.6625:  52%|█████▏    | 101/196 [00:24<00:22,  4.21it/s]



K: 1, Epoch [3/30] Loss: 1.7806:  52%|█████▏    | 102/196 [00:24<00:22,  4.22it/s]



K: 1, Epoch [3/30] Loss: 1.8300:  53%|█████▎    | 103/196 [00:24<00:22,  4.23it/s]



K: 1, Epoch [3/30] Loss: 1.7447:  53%|█████▎    | 104/196 [00:24<00:21,  4.24it/s]



K: 1, Epoch [3/30] Loss: 1.8436:  54%|█████▎    | 105/196 [00:25<00:21,  4.25it/s]



K: 1, Epoch [3/30] Loss: 1.7803:  54%|█████▍    | 106/196 [00:25<00:21,  4.26it/s]



K: 1, Epoch [3/30] Loss: 1.8212:  55%|█████▍    | 107/196 [00:25<00:20,  4.27it/s]



K: 1, Epoch [3/30] Loss: 1.7398:  55%|█████▌    | 108/196 [00:25<00:20,  4.27it/s]

In [15]:
# get df rows
df = pd.read_csv(f'./results/{data}_{model}_heads.csv')

In [19]:
df.iloc[0].item()

ValueError: can only convert an array of size 1 to a Python scalar