In [1]:
!nvidia-smi

Sun Nov 29 18:30:32 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN RTX           Off  | 00000000:04:00.0 Off |                  N/A |
| 41%   35C    P2    68W / 280W |   5158MiB / 24220MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN RTX           Off  | 00000000:05:00.0 Off |                  N/A |
| 40%   34C    P8    22W / 280W |   8601MiB / 24220MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  TITAN RTX           Off  | 00000000:08:00.0 Off |                  N/A |
| 41%   

In [2]:
######### IMPORTING NECESSARY MODULES #########
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset
import sys
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

sys.path.append('/home/ironman/abhishek/AliProducts/architectures/pytorch-image-models/timm/models/')
sys.path.append('/home/ironman/abhishek/AliProducts/architectures/pytorch-image-models/')
sys.path.append('/home/ironman/abhishek/AliProducts/Helper/')

from dataloader import mydataset, create_prime_dict 
from trainer_ViT import train, test_classify, eval_classify
from Load_model import load



from vision_transformer import vit_base_patch32_384

In [3]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:2


**Dataloading Scheme**

In [4]:
trainlist = '/home/ironman/abhishek/AliProducts/Newlist/NEW_train_list1.txt'
validlist = '/home/ironman/abhishek/AliProducts/Newlist/NEW_valid_list1.txt'

In [5]:
prime_dict = create_prime_dict(trainlist)

Number of classes =  50030


In [6]:
#### Train Dataloader #### 
train_dataset = mydataset(trainlist, prime_dict, name='train')          
train_dataloader = data.DataLoader(train_dataset, shuffle= True, batch_size = 768, num_workers=16,pin_memory=True)


#### Validation Dataloader #### 
validation_dataset = mydataset(validlist, prime_dict, name='valid')         
validation_dataloader = data.DataLoader(validation_dataset, shuffle=False, batch_size = 128, num_workers=16,pin_memory=True)

**Model Definition**

In [7]:
model = vit_base_patch32_384(pretrained=True, drop_rate=0.4)


model = nn.DataParallel(model,device_ids=[2,3,4,5,6,7]).to(device)

In [8]:
model

DataParallel(
  (module): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32))
    )
    (pos_drop): Dropout(p=0.4, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.4, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU()
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.4, inplace=False)
        )
      )
      (1): Block(
        (norm1): LayerNorm((768

In [9]:
'''
Transfer Learning
'''

# for param in model.module.parameters():
#     param.requires_grad = False

fc_inputs = model.module.head.in_features

model.module.head = nn.Sequential(
    nn.Linear(fc_inputs, 50030),
#     nn.BatchNorm1d(4096),
#     nn.GELU(),
#     nn.Linear(1000, 50030)
    
)

model.to(device)

DataParallel(
  (module): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32))
    )
    (pos_drop): Dropout(p=0.4, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.4, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU()
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.4, inplace=False)
        )
      )
      (1): Block(
        (norm1): LayerNorm((768

**Hyperparameters**

In [18]:
## Loss Function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4, momentum=0.9)


# Epochs
num_Epochs = 12

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size= 6, gamma = 0.1)

In [16]:
modelname = 'ViT_pretrained_vit_base_patch32_384_dropout'
modelpath = '/home/ironman/abhishek/saved_model_checkpoints/AliProducts/' + modelname

In [None]:
writer = SummaryWriter(modelname)

train(model, train_dataloader, validation_dataloader, criterion, optimizer, lr_scheduler, modelpath, writer, device, epochs = num_Epochs)

writer.flush()
writer.close()

loss 0.41240726828575136
loss 0.8361456999182701
loss 1.2628599473834037
loss 1.6992623600363732
loss 2.145672153830528
loss 2.6079919913411143
loss 3.0636757165193558
loss 3.5268797382712362
loss 3.991809473335743
loss 4.461266878247261
loss 4.933750176429749
loss 5.417405013442039
loss 5.904553549587726
loss 6.393261215090751
Epoch:  1
training loss =  0.45918348684796434
Validation Loss: 1.4678	Top 1 Validation Accuracy: 0.6980	 Top 5 Validation Accuracy: 0.8836
loss 0.4025497192144394
loss 0.8128239786624909
loss 1.231570246219635
loss 1.6487155023217201
loss 2.0776850816607477
loss 2.5110568287968635
loss 2.946517981886864
loss 3.389786552786827
loss 3.8366497665643693
loss 4.290610424280167
loss 4.7492774114012715
loss 5.211021146178245
loss 5.678881097435951
loss 6.147733005583286
Epoch:  2
training loss =  0.4410587545190401
Validation Loss: 1.4590	Top 1 Validation Accuracy: 0.6990	 Top 5 Validation Accuracy: 0.8845
loss 0.7809216514229774
loss 1.1740266174077987
loss 1.5802714

In [None]:
'''
Load saved model from checkpoint
'''
model, optimizer, lr_scheduler, train_loss, v_loss, v_acc, epoch = load(modelpath, model, optimizer, lr_scheduler)


In [12]:
v_loss, top1_acc, accuracy_dict= eval_classify(model, validation_dataloader, criterion, device)
print('Validation Loss: {:.4f}\tTop 1 Validation Accuracy: {:.4f}\nAccuracy:{}\t'.format(v_loss, top1_acc, accuracy_dict))

Validation Loss: 0.6577	Top 1 Validation Accuracy: 0.8654
Accuracy:defaultdict(<class 'int'>, {'Top 1 Accuracy': 86.54392178672833, 'Top 5 Accuracy': 96.05128355238122, 'Top 10 Accuracy': 97.26604806369464, 'Top 20 Accuracy': 98.00954248748647, 'Top 30 Accuracy': 98.32860111816878, 'Top 50 Accuracy': 98.66814975265639, 'Top 100 Accuracy': 99.03404267775078})	
