In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt 

!pip3 install vit-pytorch linformer

Collecting linformer
  Downloading linformer-0.2.1-py3-none-any.whl (6.1 kB)
Collecting vit-pytorch
  Downloading vit_pytorch-0.6.5-py3-none-any.whl (6.1 kB)
Collecting einops>=0.3
  Downloading einops-0.3.0-py2.py3-none-any.whl (25 kB)
Installing collected packages: einops, vit-pytorch, linformer
Successfully installed einops-0.3.0 linformer-0.2.1 vit-pytorch-0.6.5
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [2]:
from linformer import Linformer
import glob
from PIL import Image
from itertools import chain
from vit_pytorch.efficient import ViT
from tqdm.notebook import tqdm 
from sklearn.model_selection import train_test_split  

import torch 
import torch.nn as nn
import torch.optim as optim 
import torch.nn.functional as F
from torchvision import datasets, transforms 
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader,Dataset

In [3]:
seed = 2021
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

In [4]:
train_dir = "/kaggle/input/cat-and-dog/training_set/training_set/"

test_dir = "/kaggle/input/cat-and-dog/test_set/test_set/"

training_samples = glob.glob(train_dir+"/**/*.jpg")
testing_samples  = glob.glob(test_dir+"/**/*.jpg")

labels = [path.split('/')[-1].split('.')[0] for path in training_samples]


print("total training samples: ",len(training_samples))
print("total testing  samples: ",len(testing_samples))


total training samples:  8005
total testing  samples:  2023


In [5]:
train, val = train_test_split(training_samples, test_size = 0.25, stratify=labels,
                                          random_state=seed)

In [6]:
print(f"Training samples: {len(train)}")
print(f"Validation samples: {len(val)}")

Training samples: 6003
Validation samples: 2002


In [7]:
# image augmentation 

train_aug = transforms.Compose(
        [
            transforms.Resize((224,224)),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            
        ]
        )


val_aug = transforms.Compose(
        [
            transforms.Resize((224,224)),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            
        ]
        )
test_aug = transforms.Compose(
        [
            transforms.Resize((224,224)),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            
        ]
        )

In [8]:
class DatasetLoader(Dataset):
    
    def __init__(self,files, transform=None):
        self.files = files
        self.transform = transform
        
    def __len__(self):
        self.file_len = len(self.files)
        return self.file_len
    
    def __getitem__(self,ids):
        img_path = self.files[ids]
        img = Image.open(img_path)
        augmented_img = self.transform(img)
        label = img_path.split("/")[-1].split(".")[0]
        label = 1 if label == "dog" else 0
        return augmented_img, label
    
    

    
train_ = DatasetLoader(train, train_aug)
val_   = DatasetLoader(val, val_aug)
test_  = DatasetLoader(testing_samples, test_aug)


batch_size = 256
train_loader = DataLoader(dataset = train_, batch_size=batch_size, shuffle=True )
valid_loader = DataLoader(dataset = val_, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset = test_, batch_size=batch_size, shuffle=True)

In [9]:
linear_transformer = Linformer(
    dim=128,
    seq_len=49+1,  # 7x7 patches + 1 cls-token
    depth=12,
    heads=8,
    k=64
)

In [10]:

epochs = 20
lr = 3e-5
gamma = 0.7

model = ViT(
    dim=128,
    image_size=224,
    patch_size=32,
    num_classes=2,
    transformer=linear_transformer,
    channels=3,
).to('cuda')

In [11]:
# loss
loss_func = nn.CrossEntropyLoss()
# optimizer 
optimizer = optim.Adam(model.parameters(),lr = lr)
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

In [12]:
# training the visual transformer

for epoch in range(epochs):
    
    epoch_loss = 0
    epoch_accuracy = 0
    
    for data, label in (train_loader):
        
        data = data.to('cuda')
        label = label.to('cuda')
        
        output = model(data)
        loss   = loss_func(output,label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        acc = (output.argmax(dim=1)==label).float().mean()
        
        epoch_accuracy += acc/len(train_loader)
        epoch_loss += loss/len(train_loader)
        
    with torch.no_grad():
        epoch_val_accuracy = 0
        epoch_val_loss = 0
        
        for data, label in valid_loader:
            data = data.to('cuda')
            label = label.to('cuda')
            
            val_output = model(data)
            val_loss   = loss_func(val_output,label)
            
            acc = (val_output.argmax(dim=1) == label).float().mean()
            epoch_val_accuracy += acc / len(valid_loader)
            epoch_val_loss += val_loss / len(valid_loader)
    
    print(f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} - val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n")

Epoch : 1 - loss : 0.7335 - acc: 0.4909 - val_loss : 0.6986 - val_acc: 0.5001

Epoch : 2 - loss : 0.6946 - acc: 0.4877 - val_loss : 0.6929 - val_acc: 0.5336

Epoch : 3 - loss : 0.6935 - acc: 0.5058 - val_loss : 0.6967 - val_acc: 0.4998

Epoch : 4 - loss : 0.6948 - acc: 0.4974 - val_loss : 0.6925 - val_acc: 0.5318

Epoch : 5 - loss : 0.6951 - acc: 0.5106 - val_loss : 0.6925 - val_acc: 0.5247

Epoch : 6 - loss : 0.6943 - acc: 0.5006 - val_loss : 0.6918 - val_acc: 0.5356

Epoch : 7 - loss : 0.6935 - acc: 0.5060 - val_loss : 0.6919 - val_acc: 0.5440

Epoch : 8 - loss : 0.6925 - acc: 0.5150 - val_loss : 0.6925 - val_acc: 0.4886

Epoch : 9 - loss : 0.6921 - acc: 0.5173 - val_loss : 0.6921 - val_acc: 0.5259

Epoch : 10 - loss : 0.6915 - acc: 0.5235 - val_loss : 0.6927 - val_acc: 0.4971

Epoch : 11 - loss : 0.6919 - acc: 0.5194 - val_loss : 0.6912 - val_acc: 0.5347

Epoch : 12 - loss : 0.6917 - acc: 0.5239 - val_loss : 0.6914 - val_acc: 0.4994

Epoch : 13 - loss : 0.6911 - acc: 0.5154 - val_lo