In [13]:
import datetime
import os
import json
from comet_ml import Experiment

import torch
import torch.nn as nn
from torch import optim
from torchvision import datasets
from torchvision import transforms

from sklearn.metrics import confusion_matrix, accuracy_score

with open('config.json','r') as f:
    env = json.load(f)

experiment = Experiment(api_key=env["COMET_API_KEY"], workspace=env["COMET_WORKSPACE"], project_name=env["COMET_PROJECT_NAME"], log_code=True, auto_output_logging="simple")
experiment.set_name("ViT FMNIST")

def get_loader(args):
    data_path = os.path.join('./data/', 'fmnist')
    os.makedirs(data_path, exist_ok=True)
    train_transform = transforms.Compose([transforms.RandomCrop(args['image_size'], padding=2, padding_mode='edge'), 
                                            transforms.RandomHorizontalFlip(),
                                            transforms.ToTensor(), 
                                            transforms.Normalize([0.5], [0.5])])
    train = datasets.FashionMNIST(data_path, train=True, download=True, transform=train_transform)
    test_transform = transforms.Compose([transforms.Resize([args['image_size'], args['image_size']]), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])
    test = datasets.FashionMNIST(data_path, train=False, download=True, transform=test_transform)

    train_loader = torch.utils.data.DataLoader(dataset=train,
                                                 batch_size=args['batch_size'],
                                                 shuffle=True,
                                                 num_workers=args['num_workers'],
                                                 drop_last=True)

    test_loader = torch.utils.data.DataLoader(dataset=test,
                                                batch_size=args['batch_size'] * 2,
                                                shuffle=False,
                                                num_workers=args['num_workers'],
                                                drop_last=False)
    return train_loader, test_loader


class EmbedLayer(nn.Module):
    def __init__(self, n_channels, embed_dim, image_size, patch_size):
        super().__init__()
        self.conv1 = nn.Conv2d(n_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim), requires_grad=True)
        self.pos_embedding = nn.Parameter(torch.zeros(1, (image_size // patch_size) ** 2 + 1, embed_dim), requires_grad=True)

    def forward(self, x):
        x = self.conv1(x)
        x = x.reshape([x.shape[0], x.shape[1], -1])
        x = x.transpose(1, 2)
        x = torch.cat((torch.repeat_interleave(self.cls_token, x.shape[0], 0), x), dim=1)
        x = x + self.pos_embedding
        return x

class SelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads

        self.wq = nn.Linear(self.embed_dim, self.embed_dim)
        self.wk = nn.Linear(self.embed_dim, self.embed_dim)
        self.wv = nn.Linear(self.embed_dim, self.embed_dim)

    def forward(self, x, mask=None):
        B, S, E = x.shape

        xq = self.wq(x)
        xk = self.wk(x)
        xv = self.wv(x)

        xq = xq.view(B, S, self.num_heads, self.head_dim)
        xk = xk.view(B, S, self.num_heads, self.head_dim)
        xv = xv.view(B, S, self.num_heads, self.head_dim)
        xq = xq.transpose(1, 2)
        xk = xk.transpose(1, 2)
        xv = xv.transpose(1, 2)

        xk = xk.transpose(-1, -2)
        x_attn = torch.matmul(xq, xk)
        x_attn /= float(self.head_dim) ** 0.5
        if mask is not None:
            x_attn += mask.to(x_attn.dtype) * x_attn.new_tensor(-1e4)
        x_attn = torch.softmax(x_attn, dim=-1)
        x = torch.matmul(x_attn, xv)

        x = x.transpose(1, 2)
        x = x.reshape(B, S, E)
        return x


class Encoder(nn.Module):
    def __init__(self, embed_dim, n_attention_heads, forward_mul):
        super().__init__()
        self.attention = SelfAttention(embed_dim, n_attention_heads)
        self.fc1 = nn.Linear(embed_dim, embed_dim * forward_mul)
        self.activation = nn.GELU()
        self.fc2 = nn.Linear(embed_dim * forward_mul, embed_dim)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)

    def forward(self, x):
        x = x + self.attention(self.norm1(x))
        x = x + self.fc2(self.activation(self.fc1(self.norm2(x))))
        return x


class Classifier(nn.Module):
    def __init__(self, embed_dim, n_classes):
        super().__init__()
        self.fc1 = nn.Linear(embed_dim, embed_dim)
        self.activation = nn.Tanh()
        self.fc2 = nn.Linear(embed_dim, n_classes)

    def forward(self, x):
        x = x[:, 0, :]
        x = self.fc1(x)
        x = self.activation(x)
        x = self.fc2(x)
        return x


class VisionTransformer(nn.Module):
    def __init__(self, n_channels, embed_dim, n_layers, n_attention_heads, forward_mul, image_size, patch_size, n_classes):
        super().__init__()
        self.embedding = EmbedLayer(n_channels, embed_dim, image_size, patch_size)
        self.encoder = nn.Sequential(*[Encoder(embed_dim, n_attention_heads, forward_mul) for _ in range(n_layers)], nn.LayerNorm(embed_dim))
        self.norm = nn.LayerNorm(embed_dim)
        self.classifier = Classifier(embed_dim, n_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.encoder(x)
        x = self.norm(x)
        x = self.classifier(x)
        return x


class Solver(object):
    def __init__(self, args):
        self.args = args

        self.args['model_path'] = os.path.join(args['model_path'], 'fmnist')
        os.makedirs(self.args['model_path'], exist_ok=True)
        print('Model path: ', self.args['model_path'])
        self.args['n_patches'] = (args['image_size'] // args['patch_size']) ** 2
        
        
        self.train_loader, self.test_loader = get_loader(args)

        self.model = VisionTransformer(n_channels=self.args['n_channels'], embed_dim=self.args['embed_dim'], 
                                        n_layers=self.args['n_layers'], n_attention_heads=self.args['n_attention_heads'], 
                                        forward_mul=self.args['forward_mul'], image_size=self.args['image_size'], 
                                        patch_size=self.args['patch_size'], n_classes=self.args['n_classes'])
        
        if torch.cuda.is_available():  
            dev = "cuda:0"
            self.args['is_cuda'] = True
        else:
            dev = "cpu"
            self.args['is_cuda'] = False
        device = torch.device(dev)
        self.model = self.model.to(device)
        print(f"Using device {device}")

        if args['load_model']:
            print("Using pretrained model")
            self.model.load_state_dict(torch.load(os.path.join(self.args['model_path'], 'ViT_model.pt')))

        self.ce = nn.CrossEntropyLoss()

    def test_dataset(self, loader):
        self.model.eval()

        actual = []
        pred = []

        for (x, y) in loader:
            if self.args['is_cuda']:
                x = x.cuda()

            with torch.no_grad():
                logits = self.model(x)
            predicted = torch.max(logits, 1)[1]

            actual += y.tolist()
            pred += predicted.tolist()

        acc = accuracy_score(y_true=actual, y_pred=pred)
        cm = confusion_matrix(y_true=actual, y_pred=pred, labels=range(self.args['n_classes']))

        return acc, cm

    def test(self, train=True):
        if train:
            acc, cm = self.test_dataset(self.train_loader)
            print(f"Train acc: {acc:.2%}\nTrain Confusion Matrix:")
            print(cm)

        acc, cm = self.test_dataset(self.test_loader)
        print(f"Test acc: {acc:.2%}\nTest Confusion Matrix:")
        print(cm)

        return acc

    def train(self):
        iter_per_epoch = len(self.train_loader)
        #experiment.log_parameters()
        optimizer = optim.AdamW(self.model.parameters(), lr=self.args['lr'], weight_decay=1e-3)
        linear_warmup = optim.lr_scheduler.LinearLR(optimizer, start_factor=1/self.args['warmup_epochs'], end_factor=1.0, total_iters=self.args['warmup_epochs'], last_epoch=-1, verbose=True)
        cos_decay = optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=self.args['epochs']-self.args['warmup_epochs'], eta_min=1e-5, verbose=True)

        best_acc = 0
        for epoch in range(self.args['epochs']):

            self.model.train()

            for i, (x, y) in enumerate(self.train_loader):
                if self.args['is_cuda']:
                    x, y = x.cuda(), y.cuda()

                logits = self.model(x)
                loss = self.ce(logits, y)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if i % 50 == 0 or i == (iter_per_epoch - 1):
                    print(f'Ep: {epoch+1}/{self.args["epochs"]}, It: {i+1}/{iter_per_epoch}, loss: {loss:.4f}')

            test_acc = self.test(train=((epoch+1)%25==0)) # Test training set every 25 epochs
            best_acc = max(test_acc, best_acc)
            print(f"Best test acc: {best_acc:.2%}\n")

            torch.save(self.model.state_dict(), os.path.join(self.args['model_path'], "ViT_model.pt"))
            
            if epoch < self.args['warmup_epochs']:
                linear_warmup.step()
            else:
                cos_decay.step()


def main(args):
    os.makedirs(args['model_path'], exist_ok=True)

    solver = Solver(args)
    solver.train()
    solver.test(train=True)

if __name__ == '__main__':
    torch.manual_seed(0)


    model_args ={"epochs": 200,
                 "batch_size": 256,
                 "num_workers": 4,
                 "lr": 5e-4,
                 "n_classes": 10,
                 "warmup_epochs": 10,
                 "image_size": 28,
                 "n_channels": 1,
                 "embed_dim": 64,
                 "n_attention_heads": 4,
                 "patch_size": 4,
                 "forward_mul": 2,
                 "n_layers": 6,
                 "model_path": './model',
                 "load_model": False}
    
    start_time = datetime.datetime.now()
    print("Started at " + str(start_time.strftime('%Y-%m-%d %H:%M:%S')))
    main(model_args)

    end_time = datetime.datetime.now()
    duration = end_time - start_time
    print("Ended at " + str(end_time.strftime('%Y-%m-%d %H:%M:%S')))
    print("Duration: " + str(duration))

experiment.end()

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : ViT FMNIST
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/snrdrg/vit-fmnist/bdb9548ec97042f0a963b3e7bc00f3e7
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     loss [24] : (1.5326045751571655, 2.334679365158081)
[1;38;5;39mCOMET INFO:[0m   Others:
[1;38;5;39mCOMET INFO:[0m     Name : ViT FMNIST
[1;38;5;39mCOMET INFO:[0m   Uploads:
[1;38;5;39mCOMET INFO:[0m     environment details : 1
[1;38;5;39mCOMET INFO:[0m     filename            : 1
[1;38;5;39mCOMET INFO:[0m     installed

Started at 2024-12-18 11:07:49
Model path:  ./model\fmnist
Using device cuda:0
Ep: 1/200, It: 1/234, loss: 2.3347
Ep: 1/200, It: 51/234, loss: 2.0297
Ep: 1/200, It: 101/234, loss: 1.7983
Ep: 1/200, It: 151/234, loss: 1.6063
Ep: 1/200, It: 201/234, loss: 1.5553
Ep: 1/200, It: 234/234, loss: 1.4870
Test acc: 48.29%
Test Confusion Matrix:
[[ 45   8 134  43 105  30 172  10 368  85]
 [  1 886  22  61   9   1  12   0   5   3]
 [ 37  18 463  15 161   5 210   1  80  10]
 [ 13 330  20 354  57  36  82   4  36  68]
 [ 41  21 332  83 324  10  89   1  78  21]
 [  0  18   0   8   0 676   6 220   9  63]
 [ 43  16 356  34 120  25 255   3 125  23]
 [  1   0   0   0   0 163   1 686   7 142]
 [  9  14 148  58  26  19  35  26 453 212]
 [  8   3   7  21  24   5   3  97 145 687]]
Best test acc: 48.29%

Ep: 2/200, It: 1/234, loss: 1.4708
Ep: 2/200, It: 51/234, loss: 1.4515
Ep: 2/200, It: 101/234, loss: 1.3156
Ep: 2/200, It: 151/234, loss: 1.2787
Ep: 2/200, It: 201/234, loss: 1.1685
Ep: 2/200, It: 234/234, lo

Ep: 13/200, It: 1/234, loss: 0.4509
Ep: 13/200, It: 51/234, loss: 0.5818
Ep: 13/200, It: 101/234, loss: 0.4438
Ep: 13/200, It: 151/234, loss: 0.5121
Ep: 13/200, It: 201/234, loss: 0.5124
Ep: 13/200, It: 234/234, loss: 0.3669
Test acc: 81.80%
Test Confusion Matrix:
[[819   0  23  72   7   2  70   0   7   0]
 [  1 933  10  50   5   0   0   0   1   0]
 [ 10   1 775  14 125   0  72   0   3   0]
 [ 25   3   8 894  27   2  40   0   1   0]
 [  1   0 173  50 727   0  46   0   3   0]
 [  0   0   0   0   0 900   0  66   6  28]
 [217   0 199  70 140   0 354   0  20   0]
 [  0   0   0   0   0  34   0 849   1 116]
 [  0   1  13   3   8   6   7   2 956   4]
 [  0   0   0   1   0   6   0  19   1 973]]
Best test acc: 81.80%

Ep: 14/200, It: 1/234, loss: 0.4127
Ep: 14/200, It: 51/234, loss: 0.5126
Ep: 14/200, It: 101/234, loss: 0.5066
Ep: 14/200, It: 151/234, loss: 0.5502
Ep: 14/200, It: 201/234, loss: 0.4908
Ep: 14/200, It: 234/234, loss: 0.4316
Test acc: 81.83%
Test Confusion Matrix:
[[809   1  14  9

Ep: 25/200, It: 1/234, loss: 0.3501
Ep: 25/200, It: 51/234, loss: 0.3717
Ep: 25/200, It: 101/234, loss: 0.3278
Ep: 25/200, It: 151/234, loss: 0.3469
Ep: 25/200, It: 201/234, loss: 0.4421
Ep: 25/200, It: 234/234, loss: 0.3230
Train acc: 86.76%
Train Confusion Matrix:
[[5005    9   60  243   31    4  578    1   58    1]
 [   8 5856    2  103    9    1    4    0    4    0]
 [  73    6 4613   77  677    4  488    0   55    0]
 [ 179   79   23 5408  174    1  113    0    3    5]
 [  15   13  558  298 4707    1  378    0   23    0]
 [   0    0    0    8    0 5649    1  229   11   90]
 [ 882   13  478  226  569    6 3738    0   80    0]
 [   0    0    0    0    0  186    0 5566    5  230]
 [  26    3   21   36   21   47   46    9 5776   11]
 [   3    2    1   13    2   64    4  244    7 5653]]
Test acc: 86.44%
Test Confusion Matrix:
[[825   1  10  40   4   3 107   0  10   0]
 [  3 966   0  24   3   0   3   0   1   0]
 [ 12   1 790  18  79   0  91   0   9   0]
 [ 27   3   7 903  23   0  36   0

Ep: 36/200, It: 1/234, loss: 0.3044
Ep: 36/200, It: 51/234, loss: 0.2946
Ep: 36/200, It: 101/234, loss: 0.4052
Ep: 36/200, It: 151/234, loss: 0.3205
Ep: 36/200, It: 201/234, loss: 0.3399
Ep: 36/200, It: 234/234, loss: 0.3228
Test acc: 88.15%
Test Confusion Matrix:
[[855   0   7  38   3   1  88   0   8   0]
 [  1 967   0  20   4   0   7   0   1   0]
 [ 21   1 800  17  77   0  79   0   5   0]
 [ 26   4   3 901  31   0  34   0   1   0]
 [  1   0  75  29 829   0  65   0   1   0]
 [  0   0   0   0   0 920   0  46   0  34]
 [147   1  51  24 116   0 648   0  13   0]
 [  0   0   0   0   0  11   0 959   0  30]
 [  1   1   0   4   5   0   4   4 977   4]
 [  0   0   0   0   0   3   0  38   0 959]]
Best test acc: 88.15%

Ep: 37/200, It: 1/234, loss: 0.2574
Ep: 37/200, It: 51/234, loss: 0.2748
Ep: 37/200, It: 101/234, loss: 0.4074
Ep: 37/200, It: 151/234, loss: 0.3392
Ep: 37/200, It: 201/234, loss: 0.3127
Ep: 37/200, It: 234/234, loss: 0.3453
Test acc: 87.54%
Test Confusion Matrix:
[[900   0   8  5

Ep: 48/200, It: 1/234, loss: 0.2615
Ep: 48/200, It: 51/234, loss: 0.2564
Ep: 48/200, It: 101/234, loss: 0.1993
Ep: 48/200, It: 151/234, loss: 0.3023
Ep: 48/200, It: 201/234, loss: 0.2852
Ep: 48/200, It: 234/234, loss: 0.2456
Test acc: 88.81%
Test Confusion Matrix:
[[833   1  14  41   5   2 101   0   3   0]
 [  0 965   3  26   2   0   3   0   1   0]
 [ 10   1 866   9  42   0  71   0   1   0]
 [ 17   4   8 905  29   0  37   0   0   0]
 [  1   1  98  32 804   0  63   0   1   0]
 [  0   0   0   0   0 948   0  40   0  12]
 [113   1  74  27 101   0 681   0   3   0]
 [  0   0   0   0   0  17   0 964   0  19]
 [  4   0   3   3   4   5   5   4 969   3]
 [  0   0   0   1   0   6   0  47   0 946]]
Best test acc: 88.81%

Ep: 49/200, It: 1/234, loss: 0.2311
Ep: 49/200, It: 51/234, loss: 0.2687
Ep: 49/200, It: 101/234, loss: 0.3163
Ep: 49/200, It: 151/234, loss: 0.2870
Ep: 49/200, It: 201/234, loss: 0.2656
Ep: 49/200, It: 234/234, loss: 0.3519
Test acc: 89.14%
Test Confusion Matrix:
[[872   0  18  4

Ep: 59/200, It: 1/234, loss: 0.2137
Ep: 59/200, It: 51/234, loss: 0.2973
Ep: 59/200, It: 101/234, loss: 0.2312
Ep: 59/200, It: 151/234, loss: 0.3056
Ep: 59/200, It: 201/234, loss: 0.3084
Ep: 59/200, It: 234/234, loss: 0.2838
Test acc: 89.77%
Test Confusion Matrix:
[[825   1  18  45   4   2  98   0   7   0]
 [  1 976   4  15   1   0   2   0   1   0]
 [ 15   1 857  11  46   0  69   0   1   0]
 [ 14   7  12 924  18   0  23   0   2   0]
 [  1   0  58  42 834   0  63   0   2   0]
 [  0   0   0   0   0 975   0  18   1   6]
 [ 98   0  64  35  73   1 721   0   8   0]
 [  0   0   0   0   0  34   0 947   0  19]
 [  2   2   5   2   2   3   1   2 978   3]
 [  0   0   0   0   0  12   0  48   0 940]]
Best test acc: 89.90%

Ep: 60/200, It: 1/234, loss: 0.3194
Ep: 60/200, It: 51/234, loss: 0.3305
Ep: 60/200, It: 101/234, loss: 0.2465
Ep: 60/200, It: 151/234, loss: 0.2979
Ep: 60/200, It: 201/234, loss: 0.2458
Ep: 60/200, It: 234/234, loss: 0.2932
Test acc: 89.36%
Test Confusion Matrix:
[[793   1  13  5

Ep: 71/200, It: 1/234, loss: 0.2870
Ep: 71/200, It: 51/234, loss: 0.1773
Ep: 71/200, It: 101/234, loss: 0.2345
Ep: 71/200, It: 151/234, loss: 0.2532
Ep: 71/200, It: 201/234, loss: 0.2076
Ep: 71/200, It: 234/234, loss: 0.2301
Test acc: 90.37%
Test Confusion Matrix:
[[854   1  22  22   3   1  87   0  10   0]
 [  1 979   2  12   2   0   2   0   2   0]
 [ 15   2 861   8  46   0  67   0   1   0]
 [ 17  10  15 898  27   0  31   0   2   0]
 [  1   1  51  26 845   0  75   0   1   0]
 [  0   0   0   0   0 955   0  27   0  18]
 [112   1  50  18  73   0 738   0   8   0]
 [  0   0   0   0   0   6   0 957   0  37]
 [  4   1   2   2   1   0   0   2 985   3]
 [  1   0   0   1   0   3   0  30   0 965]]
Best test acc: 90.37%

Ep: 72/200, It: 1/234, loss: 0.2143
Ep: 72/200, It: 51/234, loss: 0.2709
Ep: 72/200, It: 101/234, loss: 0.2246
Ep: 72/200, It: 151/234, loss: 0.2594
Ep: 72/200, It: 201/234, loss: 0.2071
Ep: 72/200, It: 234/234, loss: 0.2139
Test acc: 90.00%
Test Confusion Matrix:
[[896   0  12  4

Ep: 82/200, It: 1/234, loss: 0.2190
Ep: 82/200, It: 51/234, loss: 0.1851
Ep: 82/200, It: 101/234, loss: 0.2557
Ep: 82/200, It: 151/234, loss: 0.2813
Ep: 82/200, It: 201/234, loss: 0.2540
Ep: 82/200, It: 234/234, loss: 0.2484
Test acc: 90.55%
Test Confusion Matrix:
[[823   1  25  20   6   1 119   0   5   0]
 [  0 983   3   7   2   0   3   0   2   0]
 [ 11   2 871   9  37   0  69   0   1   0]
 [ 15  13  10 900  24   0  38   0   0   0]
 [  1   0  54  38 830   0  77   0   0   0]
 [  0   0   0   0   0 968   0  22   0  10]
 [ 79   2  48  17  76   0 774   0   4   0]
 [  0   0   0   0   0   7   0 968   0  25]
 [  6   1   2   3   1   0   3   2 979   3]
 [  1   0   0   0   0   5   0  35   0 959]]
Best test acc: 90.94%

Ep: 83/200, It: 1/234, loss: 0.2604
Ep: 83/200, It: 51/234, loss: 0.2058
Ep: 83/200, It: 101/234, loss: 0.2386
Ep: 83/200, It: 151/234, loss: 0.2349
Ep: 83/200, It: 201/234, loss: 0.2514
Ep: 83/200, It: 234/234, loss: 0.2077
Test acc: 90.53%
Test Confusion Matrix:
[[822   1  17  3

Ep: 94/200, It: 1/234, loss: 0.1965
Ep: 94/200, It: 51/234, loss: 0.1780
Ep: 94/200, It: 101/234, loss: 0.2669
Ep: 94/200, It: 151/234, loss: 0.1921
Ep: 94/200, It: 201/234, loss: 0.2154
Ep: 94/200, It: 234/234, loss: 0.2116
Test acc: 91.01%
Test Confusion Matrix:
[[877   1  14  23   6   1  71   0   7   0]
 [  1 984   1   9   2   0   1   0   2   0]
 [ 19   2 886  10  45   0  35   0   3   0]
 [ 19  12  10 902  27   0  29   0   1   0]
 [  1   2  46  18 882   0  48   0   3   0]
 [  0   0   0   0   0 964   0  28   0   8]
 [129   1  57  20  89   0 699   0   5   0]
 [  0   0   0   0   0  14   0 968   0  18]
 [  2   1   1   2   0   0   0   3 990   1]
 [  1   0   0   0   0   6   0  44   0 949]]
Best test acc: 91.12%

Ep: 95/200, It: 1/234, loss: 0.2059
Ep: 95/200, It: 51/234, loss: 0.1921
Ep: 95/200, It: 101/234, loss: 0.1779
Ep: 95/200, It: 151/234, loss: 0.2213
Ep: 95/200, It: 201/234, loss: 0.2115
Ep: 95/200, It: 234/234, loss: 0.1709
Test acc: 91.05%
Test Confusion Matrix:
[[864   1  13  3

Ep: 105/200, It: 1/234, loss: 0.1435
Ep: 105/200, It: 51/234, loss: 0.2294
Ep: 105/200, It: 101/234, loss: 0.1835
Ep: 105/200, It: 151/234, loss: 0.2287
Ep: 105/200, It: 201/234, loss: 0.2360
Ep: 105/200, It: 234/234, loss: 0.1898
Test acc: 91.13%
Test Confusion Matrix:
[[863   1  19  18   2   0  85   0  12   0]
 [  3 976   2  15   0   0   2   0   2   0]
 [ 18   0 867  12  46   0  55   0   2   0]
 [ 20   3  10 930  11   0  26   0   0   0]
 [  1   0  31  49 841   1  76   0   1   0]
 [  0   0   0   0   0 959   0  22   3  16]
 [100   1  42  27  57   0 762   0  11   0]
 [  0   0   0   0   0  13   0 956   1  30]
 [  4   0   0   2   0   1   0   1 991   1]
 [  1   0   0   0   0   4   0  27   0 968]]
Best test acc: 91.29%

Ep: 106/200, It: 1/234, loss: 0.1754
Ep: 106/200, It: 51/234, loss: 0.1941
Ep: 106/200, It: 101/234, loss: 0.1434
Ep: 106/200, It: 151/234, loss: 0.2006
Ep: 106/200, It: 201/234, loss: 0.2130
Ep: 106/200, It: 234/234, loss: 0.1883
Test acc: 91.26%
Test Confusion Matrix:
[[85

Test acc: 91.18%
Test Confusion Matrix:
[[858   0  20  37   2   0  74   0   9   0]
 [  3 971   1  19   0   0   3   0   3   0]
 [ 15   0 894  14  38   0  36   0   3   0]
 [ 14   3   5 952  11   0  15   0   0   0]
 [  3   0  55  46 856   0  36   0   4   0]
 [  0   0   0   0   0 976   0  15   1   8]
 [ 96   1  60  31  92   0 708   0  12   0]
 [  0   0   0   0   0  18   0 965   0  17]
 [  2   0   1   2   0   0   0   2 992   1]
 [  1   0   0   0   0  10   0  43   0 946]]
Best test acc: 91.58%

Ep: 117/200, It: 1/234, loss: 0.1647
Ep: 117/200, It: 51/234, loss: 0.2250
Ep: 117/200, It: 101/234, loss: 0.1427
Ep: 117/200, It: 151/234, loss: 0.1834
Ep: 117/200, It: 201/234, loss: 0.2315
Ep: 117/200, It: 234/234, loss: 0.2174
Test acc: 91.60%
Test Confusion Matrix:
[[876   1  24  20   5   0  65   0   9   0]
 [  2 983   2   9   1   0   0   0   3   0]
 [ 18   0 896   8  47   0  31   0   0   0]
 [ 22   9  16 904  30   0  19   0   0   0]
 [  1   0  35  14 907   0  40   0   3   0]
 [  0   0   0   0   

Ep: 127/200, It: 1/234, loss: 0.2288
Ep: 127/200, It: 51/234, loss: 0.2040
Ep: 127/200, It: 101/234, loss: 0.1461
Ep: 127/200, It: 151/234, loss: 0.1762
Ep: 127/200, It: 201/234, loss: 0.1700
Ep: 127/200, It: 234/234, loss: 0.2080
Test acc: 91.46%
Test Confusion Matrix:
[[855   1  13  25   5   0  95   0   5   1]
 [  2 979   3   9   3   0   2   0   2   0]
 [ 12   0 847   9  65   0  66   0   1   0]
 [ 12   7   9 917  33   0  22   0   0   0]
 [  3   1  28  15 891   0  61   0   1   0]
 [  0   0   0   0   0 974   0  20   0   6]
 [ 93   1  36  19  74   0 771   0   6   0]
 [  0   0   0   0   0  14   0 961   0  25]
 [  4   2   1   1   1   1   1   2 986   1]
 [  0   0   0   0   0   5   1  29   0 965]]
Best test acc: 91.96%

Ep: 128/200, It: 1/234, loss: 0.1792
Ep: 128/200, It: 51/234, loss: 0.1589
Ep: 128/200, It: 101/234, loss: 0.1705
Ep: 128/200, It: 151/234, loss: 0.1837
Ep: 128/200, It: 201/234, loss: 0.1342
Ep: 128/200, It: 234/234, loss: 0.1557
Test acc: 91.68%
Test Confusion Matrix:
[[88

Test acc: 91.75%
Test Confusion Matrix:
[[860   1  17  22   3   0  89   0   8   0]
 [  2 979   2  11   1   0   2   0   3   0]
 [ 17   0 886  14  41   0  40   0   2   0]
 [ 11   7  11 931  21   0  19   0   0   0]
 [  2   1  34  32 885   0  46   0   0   0]
 [  0   0   0   0   0 972   0  17   0  11]
 [ 87   1  46  23  87   0 748   0   8   0]
 [  0   0   0   0   0  15   0 961   0  24]
 [  3   0   1   2   0   0   0   1 992   1]
 [  0   0   0   0   0   5   1  33   0 961]]
Best test acc: 91.96%

Ep: 139/200, It: 1/234, loss: 0.0952
Ep: 139/200, It: 51/234, loss: 0.1023
Ep: 139/200, It: 101/234, loss: 0.1604
Ep: 139/200, It: 151/234, loss: 0.1422
Ep: 139/200, It: 201/234, loss: 0.1669
Ep: 139/200, It: 234/234, loss: 0.2051
Test acc: 91.74%
Test Confusion Matrix:
[[856   1  14  25   2   0  95   1   6   0]
 [  2 981   1   9   1   0   3   0   3   0]
 [ 16   0 886  12  43   0  42   0   1   0]
 [ 15   8   9 922  21   0  25   0   0   0]
 [  3   0  34  29 876   0  58   0   0   0]
 [  0   0   0   0   

Ep: 150/200, It: 1/234, loss: 0.1559
Ep: 150/200, It: 51/234, loss: 0.2036
Ep: 150/200, It: 101/234, loss: 0.2226
Ep: 150/200, It: 151/234, loss: 0.1747
Ep: 150/200, It: 201/234, loss: 0.2097
Ep: 150/200, It: 234/234, loss: 0.1861
Train acc: 94.11%
Train Confusion Matrix:
[[5437    1   58  105    2    0  384    0    4    0]
 [   0 5957    0   30    3    0    4    0    0    0]
 [  59    1 5477   33  240    0  179    0    5    0]
 [  50   23   22 5715  107    1   66    0    4    0]
 [   5    2  221  108 5468    0  174    0    5    0]
 [   0    0    0    1    0 5878    0   79    8   24]
 [ 433    5  246  107  329    0 4864    0   11    0]
 [   0    0    0    0    0   44    0 5817    2  125]
 [  11    1    3    3    4    3   13    1 5950    0]
 [   0    0    0    0    0   26    0  155    0 5811]]
Test acc: 91.80%
Test Confusion Matrix:
[[864   1  14  25   1   0  90   0   5   0]
 [  3 981   2   8   1   0   2   0   3   0]
 [ 16   0 894  12  38   0  39   0   1   0]
 [ 14   8  11 916  26   0  

Ep: 161/200, It: 1/234, loss: 0.1378
Ep: 161/200, It: 51/234, loss: 0.1914
Ep: 161/200, It: 101/234, loss: 0.1702
Ep: 161/200, It: 151/234, loss: 0.1488
Ep: 161/200, It: 201/234, loss: 0.1996
Ep: 161/200, It: 234/234, loss: 0.1818
Test acc: 91.93%
Test Confusion Matrix:
[[857   1  15  24   3   0  95   0   5   0]
 [  2 983   1   8   0   0   3   0   3   0]
 [ 13   0 890  11  42   0  43   0   1   0]
 [ 14   7  10 923  29   0  17   0   0   0]
 [  3   0  37  18 897   0  44   0   1   0]
 [  0   0   0   0   0 975   0  18   0   7]
 [ 91   1  43  21  85   0 755   0   4   0]
 [  0   0   0   0   0  14   0 967   0  19]
 [  4   1   1   1   0   0   0   2 990   1]
 [  0   0   0   0   0   6   1  37   0 956]]
Best test acc: 92.01%

Ep: 162/200, It: 1/234, loss: 0.1258
Ep: 162/200, It: 51/234, loss: 0.1660
Ep: 162/200, It: 101/234, loss: 0.1658
Ep: 162/200, It: 151/234, loss: 0.1344
Ep: 162/200, It: 201/234, loss: 0.1535
Ep: 162/200, It: 234/234, loss: 0.2325
Test acc: 91.90%
Test Confusion Matrix:
[[86

Test acc: 91.74%
Test Confusion Matrix:
[[848   1  18  26   3   0 100   0   4   0]
 [  2 982   2   8   0   0   3   0   3   0]
 [ 15   0 882  12  45   0  45   0   1   0]
 [ 12   7   9 918  32   0  22   0   0   0]
 [  2   1  28  19 905   0  45   0   0   0]
 [  0   0   0   0   0 977   0  16   0   7]
 [ 93   1  44  21  87   0 749   0   5   0]
 [  0   0   0   0   0  14   0 964   0  22]
 [  3   0   1   1   0   0   1   2 991   1]
 [  0   0   0   0   0   5   1  36   0 958]]
Best test acc: 92.01%

Ep: 173/200, It: 1/234, loss: 0.1358
Ep: 173/200, It: 51/234, loss: 0.1570
Ep: 173/200, It: 101/234, loss: 0.2087
Ep: 173/200, It: 151/234, loss: 0.1721
Ep: 173/200, It: 201/234, loss: 0.1305
Ep: 173/200, It: 234/234, loss: 0.1521
Test acc: 91.96%
Test Confusion Matrix:
[[873   1  16  22   1   0  82   0   5   0]
 [  2 982   1   9   0   0   3   0   3   0]
 [ 16   0 896  13  35   0  39   0   1   0]
 [ 17   7  10 922  25   0  19   0   0   0]
 [  3   1  46  30 872   0  48   0   0   0]
 [  0   0   0   0   

Ep: 183/200, It: 1/234, loss: 0.1876
Ep: 183/200, It: 51/234, loss: 0.1088
Ep: 183/200, It: 101/234, loss: 0.0975
Ep: 183/200, It: 151/234, loss: 0.1742
Ep: 183/200, It: 201/234, loss: 0.1047
Ep: 183/200, It: 234/234, loss: 0.1238
Test acc: 92.07%
Test Confusion Matrix:
[[859   0  15  25   3   0  95   0   3   0]
 [  2 982   2   8   0   0   3   0   3   0]
 [ 14   0 893  12  41   0  39   0   1   0]
 [ 11   6  10 928  24   0  21   0   0   0]
 [  3   0  31  29 885   0  52   0   0   0]
 [  0   0   0   0   0 979   0  13   0   8]
 [ 87   1  47  22  66   0 771   0   6   0]
 [  0   0   0   0   0  15   0 961   0  24]
 [  3   0   1   1   0   0   1   2 991   1]
 [  0   0   0   0   0   6   1  35   0 958]]
Best test acc: 92.11%

Ep: 184/200, It: 1/234, loss: 0.1544
Ep: 184/200, It: 51/234, loss: 0.1401
Ep: 184/200, It: 101/234, loss: 0.1652
Ep: 184/200, It: 151/234, loss: 0.1193
Ep: 184/200, It: 201/234, loss: 0.1535
Ep: 184/200, It: 234/234, loss: 0.1501
Test acc: 91.95%
Test Confusion Matrix:
[[86

Test acc: 91.98%
Test Confusion Matrix:
[[865   0  16  22   2   0  91   0   4   0]
 [  2 983   2   6   0   0   4   0   3   0]
 [ 14   0 879  13  43   0  50   0   1   0]
 [ 17   7   8 923  25   0  20   0   0   0]
 [  3   1  32  20 893   0  51   0   0   0]
 [  0   0   0   0   0 977   0  15   0   8]
 [ 95   1  42  20  75   0 761   0   6   0]
 [  0   0   0   0   0  14   0 966   0  20]
 [  3   0   1   1   0   0   0   2 992   1]
 [  0   0   0   0   0   5   1  35   0 959]]
Best test acc: 92.17%

Ep: 195/200, It: 1/234, loss: 0.1546
Ep: 195/200, It: 51/234, loss: 0.1409
Ep: 195/200, It: 101/234, loss: 0.1306
Ep: 195/200, It: 151/234, loss: 0.1462
Ep: 195/200, It: 201/234, loss: 0.1066
Ep: 195/200, It: 234/234, loss: 0.1219
Test acc: 92.05%
Test Confusion Matrix:
[[872   0  16  22   2   0  84   0   4   0]
 [  2 982   1   7   0   0   5   0   3   0]
 [ 14   0 890  13  40   0  42   0   1   0]
 [ 15   8  10 924  25   0  18   0   0   0]
 [  3   1  31  21 898   0  46   0   0   0]
 [  0   0   0   0   

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : ViT FMNIST
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/snrdrg/vit-fmnist/c345d414bf7f4c06b88828cc428d930e
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     loss [4680] : (0.06436394155025482, 2.334679365158081)
[1;38;5;39mCOMET INFO:[0m   Others:
[1;38;5;39mCOMET INFO:[0m     Name : ViT FMNIST
[1;38;5;39mCOMET INFO:[0m   Uploads:
[1;38;5;39mCOMET INFO:[0m     environment details : 1
[1;38;5;39mCOMET INFO:[0m     filename            : 1
[1;38;5;39mCOMET INFO:[0m     instal

Test acc: 92.03%
Test Confusion Matrix:
[[863   0  16  24   2   0  91   0   4   0]
 [  2 982   1   7   0   0   5   0   3   0]
 [ 14   0 885  13  38   0  49   0   1   0]
 [ 13   7   9 925  27   0  19   0   0   0]
 [  2   1  33  20 895   0  49   0   0   0]
 [  0   0   0   0   0 974   0  18   0   8]
 [ 92   1  45  21  71   0 765   0   5   0]
 [  0   0   0   0   0  13   0 966   0  21]
 [  3   1   1   1   0   0   1   2 990   1]
 [  0   0   0   0   0   4   1  37   0 958]]
Ended at 2024-12-18 12:14:11
Duration: 1:06:22.435844
