In [1]:
import torch
import torchvision
import os
import random

import gradio as gr
import matplotlib.pyplot as plt

from PIL import Image
from pathlib import Path
from datetime import datetime
from time import time
from tqdm.auto import tqdm
from torch.utils.tensorboard.writer import SummaryWriter

In [2]:
data_path = Path('data') / 'images' / 'Images'
classes_path = list(data_path.glob('*'))
images_path = list(data_path.glob('*/*.jpg'))

In [3]:
CLASS_NAMES = [path.name for path in classes_path]
NUM_CLASSES = len(CLASS_NAMES)
CLASS_TO_IDX = {name: i for i, name in enumerate(CLASS_NAMES)}
BATCH_SIZE = 32
LEARNING_RATE = 3e-4
DEVICE_NAME = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, paths, idx, transformer):
        super().__init__()
        self.paths = paths
        self.idx = idx
        self.transformer = transformer
        
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        image = self.transformer(Image.open(self.paths[index]).convert('RGB'))
        label = self.idx[self.paths[index].parent.name]
        return image, label
        
        

In [5]:
transformer = torchvision.transforms.Compose([
    torchvision.transforms.Resize([224, 224]),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [6]:
custom_dataset = CustomDataset(images_path, CLASS_TO_IDX, transformer)

In [7]:
custom_dataset_size = len(custom_dataset)
train_size = round(custom_dataset_size*0.8)
test_size = round(custom_dataset_size*0.2)
train_dataset, test_dataset = torch.utils.data.random_split(custom_dataset, [train_size, test_size])

In [8]:
train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [9]:
def create_mobnet_model(num_classes, device_name):
    weights = torchvision.models.MobileNet_V2_Weights.DEFAULT
    model = torchvision.models.mobilenet_v2(weights=weights)
    
    for param in model.parameters():
        param.requires_grad = False    
    
    model.classifier = torch.nn.Sequential(
        torch.nn.Dropout(p=0.2, inplace=False),
        torch.nn.Linear(in_features=1280, out_features=num_classes)
    )
    
    return model.to(device_name)

In [10]:
def create_effnet_model(num_classes, device_name):
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights)
    
    for param in model.parameters():
        param.requires_grad = False
        
    model.classifier = torch.nn.Sequential(
        torch.nn.Dropout(p=0.2, inplace=True),
        torch.nn.Linear(in_features=1280, out_features=num_classes)
    )
        
    return model.to(device_name)

In [11]:
def create_swin_model(num_classes, device_name):
    weights = torchvision.models.Swin_T_Weights.DEFAULT
    model = torchvision.models.swin_t(weights=weights)
    
    for param in model.parameters():
        param.requires_grad = False
        
    model.head = torch.nn.Linear(in_features=768, out_features=num_classes)
    
    return model.to(device_name)

In [12]:
def create_vit_model(num_classes, device_name):
    weights = torchvision.models.ViT_B_16_Weights.DEFAULT
    model = torchvision.models.vit_b_16(weights=weights)
    
    for param in model.parameters():
        param.requires_grad = False
        
    model.heads = torch.nn.Sequential(torch.nn.Linear(in_features=768, out_features=NUM_CLASSES))
        
    return model.to(device_name)

In [13]:
def train_loop(model, dataloader, optimizer, criterion, device_name):
    model.train()
    
    average_acc = 0
    average_loss = 0
    
    for images, labels in dataloader:
        images, labels = images.to(device_name), labels.to(device_name)
        
        logits = model(images)
        preds = torch.softmax(logits, dim=1).argmax(dim=1)
        loss = criterion(logits, labels)
        acc = (preds == labels).sum().item() / len(preds)
        
        average_acc += acc
        average_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    average_acc /= len(dataloader)
    average_loss /= len(dataloader)
    
    return average_acc, average_loss

In [14]:
def test_loop(model, dataloader, optimizer, criterion, device_name):
    model.eval()
    
    average_acc = 0
    average_loss = 0
    
    with torch.inference_mode():
        for images, labels in dataloader:
            images, labels = images.to(device_name), labels.to(device_name)
            
            logits = model(images)
            preds = torch.softmax(logits, dim=1).argmax(dim=1)
            loss = criterion(logits, labels)
            acc = (preds == labels).sum().item() / len(preds)
            
            average_acc += acc
            average_loss += loss.item()
            
    average_acc /= len(dataloader)
    average_loss /= len(dataloader)
    
    return average_acc, average_loss

In [15]:
def train_and_test(model, epochs, optimizer, criterion, train_data, test_data, device_name, summary_writer):
    for epoch in tqdm(range(epochs)):
        train_acc, train_loss = train_loop(model, train_data, optimizer, criterion, device_name)
        test_acc, test_loss = test_loop(model, test_data, optimizer, criterion, device_name)
        
        summary_writer.add_scalars(
            main_tag='accuracy',
            tag_scalar_dict={'train_acc': train_acc, 'test_acc': test_acc},
            global_step=epoch
        )
        summary_writer.add_scalars(
            main_tag='loss',
            tag_scalar_dict={'train_loss': train_loss, 'test_loss': test_loss},
            global_step=epoch
        )
        summary_writer.close()
        
        print(f'Train Acc: {train_acc:.4f} | Train Loss: {train_loss:.4f} | Test Acc: {test_acc:.4f} | Test Loss: {test_loss:.4f}')

In [16]:
def save_model(model, name):
    path = Path('models') / name
    torch.save(model, path)

In [17]:
def load_model(name):
    path = Path('models') / name
    return torch.load(path)

In [18]:
def save_classes(class_names):
    with open('classes.txt', 'w') as file:
        file.write('\n'.join(class_names))

In [19]:
def load_classes():
    with open('classes.txt', 'r') as file:
        class_names = [name.strip().split('-', 1)[1] for name in file.readlines()]
        return class_names

In [20]:
save_classes(CLASS_NAMES)
load_classes()

['Chihuahua',
 'Japanese_spaniel',
 'Maltese_dog',
 'Pekinese',
 'Shih-Tzu',
 'Blenheim_spaniel',
 'papillon',
 'toy_terrier',
 'Rhodesian_ridgeback',
 'Afghan_hound',
 'basset',
 'beagle',
 'bloodhound',
 'bluetick',
 'black-and-tan_coonhound',
 'Walker_hound',
 'English_foxhound',
 'redbone',
 'borzoi',
 'Irish_wolfhound',
 'Italian_greyhound',
 'whippet',
 'Ibizan_hound',
 'Norwegian_elkhound',
 'otterhound',
 'Saluki',
 'Scottish_deerhound',
 'Weimaraner',
 'Staffordshire_bullterrier',
 'American_Staffordshire_terrier',
 'Bedlington_terrier',
 'Border_terrier',
 'Kerry_blue_terrier',
 'Irish_terrier',
 'Norfolk_terrier',
 'Norwich_terrier',
 'Yorkshire_terrier',
 'wire-haired_fox_terrier',
 'Lakeland_terrier',
 'Sealyham_terrier',
 'Airedale',
 'cairn',
 'Australian_terrier',
 'Dandie_Dinmont',
 'Boston_bull',
 'miniature_schnauzer',
 'giant_schnauzer',
 'standard_schnauzer',
 'Scotch_terrier',
 'Tibetan_terrier',
 'silky_terrier',
 'soft-coated_wheaten_terrier',
 'West_Highland_wh

In [56]:
epochs_list = [5, 10]
models_list = ['mobnet', 'effnet', 'swin', 'vit']

for epoch_count in epochs_list:
    for model_name in models_list:
        print('-' * 50)
        print('Running next experiment...')
        print(f'Epochs count: {epoch_count}')
        print(f'Model name: {model_name}')
        
        if model_name == 'mobnet':
            model = create_mobnet_model(NUM_CLASSES, DEVICE_NAME)
        elif model_name == 'effnet':
            model = create_effnet_model(NUM_CLASSES, DEVICE_NAME)
        elif model_name == 'swin':
            model = create_swin_model(NUM_CLASSES, DEVICE_NAME)
        elif model_name == 'vit':
            model = create_vit_model(NUM_CLASSES, DEVICE_NAME)
        else:
            raise Exception("No model with this name has found!")
        
        optimizer = torch.optim.Adam(model.parameters(), LEARNING_RATE)
        criterion = torch.nn.CrossEntropyLoss()
        
        date = datetime.now().strftime('%Y-%m-%d')
        name = f'{model_name}-model_{epoch_count}-epochs'
        summary_writer = SummaryWriter(Path('runs') / date / name)
        
        train_and_test(model, epoch_count, optimizer, criterion, train_dataloader, test_dataloader, DEVICE_NAME, summary_writer)
        
        save_model(model, f'{model_name}-model_{epoch_count}-epochs.pth')
        print(f'Current Experiment with {model_name} model and {epoch_count} epochs has completed...')

--------------------------------------------------
Running next experiment...
Epochs count: 5
Model name: mobnet


NameError: name 'train_and_test' is not defined

In [73]:
%load_ext tensorboard
%tensorboard --logdir=runs --port=8282

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 8282 (pid 8760), started 1:06:43 ago. (Use '!kill 8760' to kill it.)

In [21]:
model = load_model('swin-model_10-epochs.pth').cpu()
model

SwinTransformer(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): Permute()
      (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (1): Sequential(
      (0): SwinTransformerBlock(
        (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (attn): ShiftedWindowAttention(
          (qkv): Linear(in_features=96, out_features=288, bias=True)
          (proj): Linear(in_features=96, out_features=96, bias=True)
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (0): Linear(in_features=96, out_features=384, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=384, out_features=96, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (1): SwinTransformerBlock(
       

In [28]:
def prob_and_pred(img):
    start_time = time()
    transformed_img = transformer(img.convert('RGB')).unsqueeze(0)
    logits = model(transformed_img)
    probs = torch.softmax(logits, dim=1)
    end_time = time()
    
    prediction_dict = {CLASS_NAMES[index]: probs[0][index].item() for index in range(NUM_CLASSES)}
    prediction_time = round(end_time - start_time, 4)
    
    return prediction_dict, prediction_time

In [29]:
demo = gr.Interface(
    fn=prob_and_pred,
    inputs=gr.Image(type='pil'),
    outputs=[gr.Label(num_top_classes=3, label='Prediction Probs'), gr.Number(label='Prediction Time')]
)

demo.launch(debug=False, share=False)

IMPORTANT: You are using gradio version 3.12.0, however version 3.14.0 is available, please upgrade.
--------
Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.


