In [1]:
!nvidia-smi

Sat Nov 28 11:19:19 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN RTX           Off  | 00000000:04:00.0 Off |                  N/A |
| 43%   64C    P2   264W / 280W |  18542MiB / 24220MiB |     98%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN RTX           Off  | 00000000:05:00.0 Off |                  N/A |
| 91%   87C    P2   190W / 280W |  17046MiB / 24220MiB |     90%      Default |
+-------------------------------+----------------------+----------------------+
|   2  TITAN RTX           Off  | 00000000:08:00.0 Off |                  N/A |
|100%   

In [2]:
######### IMPORTING NECESSARY MODULES #########
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset
import sys
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

sys.path.append('/home/ironman/abhishek/AliProducts/architectures/pytorch-image-models/timm/models/')
sys.path.append('/home/ironman/abhishek/AliProducts/architectures/pytorch-image-models/')
sys.path.append('/home/ironman/abhishek/AliProducts/Helper/')

from dataloader import mydataset, create_prime_dict 
from trainer_ViT import train, test_classify, eval_classify
from Load_model import load



from vision_transformer import vit_base_patch32_384

In [3]:
device = torch.device('cuda:4' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:4


**Dataloading Scheme**

In [4]:
trainlist = '/home/ironman/abhishek/AliProducts/Newlist/NEW_train_list1.txt'
validlist = '/home/ironman/abhishek/AliProducts/Newlist/NEW_valid_list1.txt'

In [5]:
prime_dict = create_prime_dict(trainlist)

Number of classes =  50030


In [6]:
#### Train Dataloader #### 
train_dataset = mydataset(trainlist, prime_dict, name='train')          
train_dataloader = data.DataLoader(train_dataset, shuffle= True, batch_size = 512, num_workers=16,pin_memory=True)


#### Validation Dataloader #### 
validation_dataset = mydataset(validlist, prime_dict, name='valid')         
validation_dataloader = data.DataLoader(validation_dataset, shuffle=False, batch_size = 256, num_workers=16,pin_memory=True)

**Model Definition**

In [7]:
model = vit_base_patch32_384(pretrained=False)


model = nn.DataParallel(model,device_ids=[4,5,6,7]).to(device)

In [8]:
model

DataParallel(
  (module): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32))
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU()
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
      (1): Block(
        (norm1): LayerNorm((768

In [9]:
'''
Transfer Learning
'''

# for param in model.module.parameters():
#     param.requires_grad = False

fc_inputs = model.module.head.in_features

model.module.head = nn.Sequential(
    nn.Linear(fc_inputs, 50030),
#     nn.BatchNorm1d(4096),
#     nn.GELU(),
#     nn.Linear(1000, 50030)
    
)

model.to(device)

DataParallel(
  (module): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32))
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU()
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
      (1): Block(
        (norm1): LayerNorm((768

**Hyperparameters**

In [10]:
## Loss Function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, weight_decay=1e-4, momentum=0.9)


# Epochs
num_Epochs = 30

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size= 10, gamma = 0.1)

#Cutmix
# beta = 1
# cutmix_prob = 1

In [11]:
modelname = 'ViT_scratch_vit_base_patch32_384'
modelpath = '/home/ironman/abhishek/saved_model_checkpoints/AliProducts/' + modelname

In [None]:
writer = SummaryWriter(modelname)

train(model, train_dataloader, validation_dataloader, criterion, optimizer, lr_scheduler, modelpath, writer, device, epochs = num_Epochs)

writer.flush()
writer.close()

loss 10.933627815246583
loss 21.733247299194336
loss 32.41564968109131
loss 42.99922583580017
loss 53.48661211013794
loss 63.87816764831543
loss 74.17049338340759
loss 84.36832669258118
loss 94.47614013671875
loss 104.49102215766906
loss 114.40977081298828
loss 124.25798411369324
loss 134.0140498828888
loss 143.67665919303894
loss 153.2670813179016
loss 162.77277848243713
loss 172.1930432987213
loss 181.54050104141234
loss 190.81232724189758
loss 199.99715372085572
loss 209.10913783073426
loss 218.13279258728028
Epoch:  1
training loss =  9.905279330641246
Validation Loss: 9.6937	Top 1 Validation Accuracy: 0.0013	 Top 5 Validation Accuracy: 0.0052
loss 8.758600730895996
loss 17.453941917419435
loss 26.115173177719115
loss 34.698236322402956
loss 43.20842604637146
loss 51.626526527404785
loss 59.97377611160278
loss 68.23331408500671
loss 76.41389558315277
loss 84.51837792396546
loss 92.53543013095856
loss 100.46762336730957
loss 108.3040635061264
loss 116.05524034023284
loss 123.7450595

loss 4.928913845717907
loss 5.206485079675913
loss 5.475172499716282
loss 5.751386700719595
Epoch:  13
training loss =  0.26162714040134666
Validation Loss: 6.0928	Top 1 Validation Accuracy: 0.1634	 Top 5 Validation Accuracy: 0.3084
loss 0.2124767306447029
loss 0.4326511204242706
loss 0.6513983422517776
loss 0.8676120422780513
loss 1.0855594128370285
loss 1.3076799124479295
loss 1.5243361127376556
loss 1.7515611051023006
loss 1.975921687334776
loss 2.2048850375413895
loss 2.4306453359127045
loss 2.6623615458607675
loss 2.897471466511488
loss 3.137016724795103
loss 3.372972179353237
loss 3.6107492043077944
loss 3.8530283312499525
loss 4.095401875823736
loss 4.335737729966641
loss 4.583084492981434
loss 4.827397936582566
loss 5.071153498440981
Epoch:  14
training loss =  0.2306111086186745
Validation Loss: 6.1014	Top 1 Validation Accuracy: 0.1617	 Top 5 Validation Accuracy: 0.3061
loss 0.1945379178225994
loss 0.3877435454726219
loss 0.5817832763493062
loss 0.7837712214887143
loss 0.98198

loss 0.3580237223953009
loss 0.4770677004754543
loss 0.5960210333019496
loss 0.7171022240817547
loss 0.8383257056027651
loss 0.9566338270902633
loss 1.0759795651584865
loss 1.196406601294875
loss 1.316924124136567
loss 1.4386785269528628
loss 1.5590517237782477
loss 1.6794287169724702
loss 1.801115638539195
loss 1.9226213017851115
loss 2.043155759498477
loss 2.1640663213282822
loss 2.2861316736787556
loss 2.4081365071982144
loss 2.5285788635909556
loss 2.650822877138853
Epoch:  26
training loss =  0.12051895491296438
Validation Loss: 6.0310	Top 1 Validation Accuracy: 0.1662	 Top 5 Validation Accuracy: 0.3128
loss 0.11836088322103024
loss 0.2368499570339918
loss 0.35492916136980057
loss 0.47366785421967506
loss 0.5922138479351997
loss 0.7110052414238452
loss 0.8302869275957346
loss 0.950721924751997
loss 1.071143632233143
loss 1.1908825252205133
loss 1.3113579070568084
loss 1.4316626039892435
loss 1.5511043374985456
loss 1.6723508287221194
loss 1.7930876483768226
loss 1.913799621462822


In [None]:
'''
Load saved model from checkpoint
'''
model, optimizer, lr_scheduler, train_loss, v_loss, v_acc, epoch = load(modelpath, model, optimizer, lr_scheduler)


In [12]:
v_loss, top1_acc, accuracy_dict= eval_classify(model, validation_dataloader, criterion, device)
print('Validation Loss: {:.4f}\tTop 1 Validation Accuracy: {:.4f}\nAccuracy:{}\t'.format(v_loss, top1_acc, accuracy_dict))

Validation Loss: 0.6577	Top 1 Validation Accuracy: 0.8654
Accuracy:defaultdict(<class 'int'>, {'Top 1 Accuracy': 86.54392178672833, 'Top 5 Accuracy': 96.05128355238122, 'Top 10 Accuracy': 97.26604806369464, 'Top 20 Accuracy': 98.00954248748647, 'Top 30 Accuracy': 98.32860111816878, 'Top 50 Accuracy': 98.66814975265639, 'Top 100 Accuracy': 99.03404267775078})	
