In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
import os 
import numpy as np
import cv2
from tqdm import tqdm

In [3]:
params = {'input_channels':3, 'dim':768, 'hidden_dim':3072, 'patch_size':16, 'img_size':224, 'num_layers':12, 
          'dropout':0.0, 'attention_dropout':0.0, 'num_heads':12, 'fine_tune':10, 'num_classes':21843}

In [4]:
from vit.pls import *

In [5]:
model = VisionTransformer(**params)

In [6]:
import torchvision
from torchvision import transforms

In [7]:
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

In [8]:
data = torchvision.datasets.CIFAR10('D:/Data/random/', transform=transform_train, train=True, download=False)

In [9]:
train, val = torch.utils.data.random_split(data, [40000, 10000])
trainloader = torch.utils.data.DataLoader(train, shuffle=True, batch_size=16, num_workers=4)
valloader = torch.utils.data.DataLoader(val, shuffle=True, batch_size=16, num_workers=4)

In [10]:
device = torch.device('cuda:0')

In [11]:
model.load_state_dict(torch.load('lasthope.pth', map_location=device), strict=False)

_IncompatibleKeys(missing_keys=['patch_embed.weight', 'patch_embed.bias', 'ft.weight', 'ft.bias'], unexpected_keys=['norm.weight', 'norm.bias', 'patch_embed.proj.weight', 'patch_embed.proj.bias'])

In [12]:
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss = nn.CrossEntropyLoss()

In [13]:
torch.cuda.device(0)

<torch.cuda.device at 0x1f3e50ff6a0>

In [14]:
model.train()

VisionTransformer(
  (patch_embed): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (blocks): Sequential(
    (0): EncoderBlock(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): MultiheadAttention2(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (dropout): Dropout(p=0.0, inplace=False)
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): MLP(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU()
        (dropout_1): Dropout(p=0.0, inplace=False)
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (dropout_2): Dropout(p=0.0, inplace=False)
      )
    )
    (1): EncoderBlock(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (at

In [15]:
training_losses = []
val_losses = []
avg_training_losses = []
avg_val_losses = []

In [16]:
def train(model, optimizer, loss, epochs, trainloader, valloader, device):
    avg_tloss, avg_vloss = [], []
    for e in tqdm(range(epochs)):
        t_loss, v_loss = [], []
        for x, y in trainloader:
            optimizer.zero_grad()
            x, y = x.to(device), y.to(device)
            y_pred = model(x.float())
            l = loss(y_pred, y)
            l.backward()
            optimizer.step()
            
            npl = l.detach().cpu().numpy()
            t_loss.append(npl)
            
        with torch.no_grad():
            for x, y in valloader:
                x, y = x.to(device), y.to(device)
                y_pred = model(x.float())
                l = loss(y_pred, y)
                
                npl = l.detach().cpu().numpy()
                v_loss.append(npl)
    
        avg_tloss.append(np.mean(t_loss))
        avg_vloss.append(np.mean(v_loss))

        print(f'Epoch: {e}, Training Loss: {np.mean(t_loss)}, Validation Loss: {np.mean(v_loss)}')
    
    return avg_tloss, avg_vloss, model, optimizer

In [None]:
tloss, vloss, model, optimizer = train(model, optimizer, loss, 100, trainloader, valloader, device)

  1%|▍                                              | 1/100 [13:40<22:34:13, 820.74s/it]

Epoch: 0, Training Loss: 302.44830322265625, Validation Loss: 2.5833210945129395


  2%|▉                                              | 2/100 [27:20<22:19:29, 820.10s/it]

Epoch: 1, Training Loss: 1965.947998046875, Validation Loss: 19.663541793823242
