In [2]:
!nvidia-smi

Sat Nov 28 10:37:15 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN RTX           Off  | 00000000:04:00.0 Off |                  N/A |
| 42%   65C    P2   279W / 280W |  22094MiB / 24220MiB |     98%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN RTX           Off  | 00000000:05:00.0 Off |                  N/A |
| 91%   88C    P2   264W / 280W |  17046MiB / 24220MiB |     89%      Default |
+-------------------------------+----------------------+----------------------+
|   2  TITAN RTX           Off  | 00000000:08:00.0 Off |                  N/A |
|100%   

In [2]:
######### IMPORTING NECESSARY MODULES #########
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset
import sys
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

sys.path.append('/home/ironman/abhishek/AliProducts/architectures/')
sys.path.append('/home/ironman/abhishek/AliProducts/Helper/')

from dataloader import mydataset, create_prime_dict 
from trainer import train, test_classify, eval_classify
from Load_model import load



from vit_pytorch import ViT

In [3]:
device = torch.device('cuda:4' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:4


**Dataloading Scheme**

In [4]:
trainlist = '/home/ironman/abhishek/AliProducts/Newlist/NEW_train_list1.txt'
validlist = '/home/ironman/abhishek/AliProducts/Newlist/NEW_valid_list1.txt'

In [5]:
prime_dict = create_prime_dict(trainlist)

Number of classes =  50030


In [6]:
#### Train Dataloader #### 
train_dataset = mydataset(trainlist, prime_dict, name='train')          
train_dataloader = data.DataLoader(train_dataset, shuffle= True, batch_size = 1024, num_workers=16,pin_memory=True)


#### Validation Dataloader #### 
validation_dataset = mydataset(validlist, prime_dict, name='valid')         
validation_dataloader = data.DataLoader(validation_dataset, shuffle=False, batch_size = 128, num_workers=16,pin_memory=True)

**Model Definition**

In [7]:
model = ViT(
    image_size = 256,
    patch_size = 32,
    num_classes = 50030,
    dim = 1024,
    depth = 6,
    heads = 8,
    mlp_dim = 2048,
    dropout = 0.1,
    emb_dropout = 0.1
)


model = nn.DataParallel(model,device_ids=[4,5,6,7]).to(device)
model


DataParallel(
  (module): ViT(
    (patch_to_embedding): Linear(in_features=3072, out_features=1024, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (transformer): Transformer(
      (layers): ModuleList(
        (0): ModuleList(
          (0): Residual(
            (fn): PreNorm(
              (norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
              (fn): Attention(
                (to_qkv): Linear(in_features=1024, out_features=3072, bias=False)
                (to_out): Sequential(
                  (0): Linear(in_features=1024, out_features=1024, bias=True)
                  (1): Dropout(p=0.1, inplace=False)
                )
              )
            )
          )
          (1): Residual(
            (fn): PreNorm(
              (norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
              (fn): FeedForward(
                (net): Sequential(
                  (0): Linear(in_features=1024, out_features=2048, bias=True)
            

**Hyperparameters**

In [8]:
## Loss Function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


# Epochs
num_Epochs = 120

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size= 30, gamma = 0.1)

#Cutmix
# beta = 1
# cutmix_prob = 1

In [9]:
modelname = 'ViT_adam'
modelpath = '/home/ironman/abhishek/saved_model_checkpoints/AliProducts/' + modelname

In [None]:
writer = SummaryWriter(modelname)

train(model, train_dataloader, validation_dataloader, criterion, optimizer, lr_scheduler, modelpath, writer, device, epochs = num_Epochs)

writer.flush()
writer.close()

loss 10.827429780960083
loss 21.624833335876463
loss 32.40389916419983
loss 43.15905951499939
loss 53.89399312973023
loss 64.61359502792358
loss 75.31556436538696
loss 86.01247138977051
loss 96.66827885627747
loss 107.27454155921936
loss 117.84358242988587
Epoch:  1
training loss =  10.71115497033373
Validation Loss: 10.6433	Top 1 Validation Accuracy: 0.0001	 Top 5 Validation Accuracy: 0.0005
loss 10.442194557189941
loss 20.838954887390138
loss 31.198494682312013
loss 41.524763383865356
loss 51.835889406204224
loss 62.17466708183289
loss 72.44980776786804
loss 82.74271814346314
loss 92.95467862129212
loss 103.20975544929505
loss 113.40744176864624
Epoch:  2
training loss =  10.308512283743715
Validation Loss: 10.3504	Top 1 Validation Accuracy: 0.0003	 Top 5 Validation Accuracy: 0.0014
loss 10.061445512771606
loss 20.112768449783324
loss 30.153789119720457
loss 40.20015948295593
loss 50.24918898582459
loss 60.31516764640808
loss 70.36110998153687
loss 80.3927788734436
loss 90.4165878582

loss 8.638602533340453
loss 17.197217836380005
loss 25.771636514663697
loss 34.386673097610476
loss 43.09919241905212
loss 51.74867656707764
loss 60.371345596313475
loss 68.98691084861755
loss 77.69519494056702
loss 86.40499710083007
loss 95.09026183128357
Epoch:  22
training loss =  8.644797807974781
Validation Loss: 9.7223	Top 1 Validation Accuracy: 0.0044	 Top 5 Validation Accuracy: 0.0145
loss 8.511324834823608
loss 17.03416223526001
loss 25.539327058792114
loss 34.01605225563049
loss 42.524116086959836
loss 51.057267837524414
loss 59.77351448059082
loss 68.57384805679321
loss 77.32869032859803
loss 86.03371976852416
loss 94.72514276504516
Epoch:  23
training loss =  8.611895626397441
Validation Loss: 9.7750	Top 1 Validation Accuracy: 0.0040	 Top 5 Validation Accuracy: 0.0130
loss 8.482422122955322
loss 16.951132469177246
loss 25.42479681968689
loss 33.928729124069214
loss 42.43674903869629
loss 50.937898273468015
loss 59.44325297355652
loss 67.98518974304199
loss 76.56659707069397

In [None]:
'''
Load saved model from checkpoint
'''

model, optimizer, lr_scheduler, train_loss, v_loss, v_acc, epoch = load(modelpath, model, optimizer, lr_scheduler)


In [12]:
v_loss, top1_acc, accuracy_dict= eval_classify(model, validation_dataloader, criterion, device)
print('Validation Loss: {:.4f}\tTop 1 Validation Accuracy: {:.4f}\nAccuracy:{}\t'.format(v_loss, top1_acc, accuracy_dict))

Validation Loss: 0.6577	Top 1 Validation Accuracy: 0.8654
Accuracy:defaultdict(<class 'int'>, {'Top 1 Accuracy': 86.54392178672833, 'Top 5 Accuracy': 96.05128355238122, 'Top 10 Accuracy': 97.26604806369464, 'Top 20 Accuracy': 98.00954248748647, 'Top 30 Accuracy': 98.32860111816878, 'Top 50 Accuracy': 98.66814975265639, 'Top 100 Accuracy': 99.03404267775078})	
