In [1]:
# Import modules
import os
from pathlib import Path

import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import cv2
import matplotlib.pyplot as plt

from tqdm.auto import tqdm

from torchinfo import summary

c:\Users\CSR\anaconda3\envs\ml\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
c:\Users\CSR\anaconda3\envs\ml\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-gcc_10_3_0.dll


In [3]:
# Check gpu availability
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
# Get train and test data directory path
data_path = Path("./data")
train_path = data_path / "traindata" # get train data path
test_path = data_path / "testdata" # get test data path

train_path, test_path

(WindowsPath('data/traindata'), WindowsPath('data/testdata'))

# Effnetb0 Model

In [5]:
# Get pretrained effnet model
effnetb0_weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT # get best weights
effnetb0_transform = effnetb0_weights.transforms() # get transforms from weight
effnetb0 = torchvision.models.efficientnet_b0(weights=effnetb0_weights).to(device) # get model with weights and send it to device

# Freeze feature extractor layer
for parameter in effnetb0.parameters():
    parameter.requires_grad = False

# Change classification layer
effnetb0.classifier = torch.nn.Sequential(
    torch.nn.Linear(in_features=1280, out_features=2)
)

In [6]:
# Get datasets
train_data = datasets.ImageFolder(train_path, transform=effnetb0_transform)
test_data = datasets.ImageFolder(test_path, transform=effnetb0_transform)

# Get dataloader
train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=True)

In [7]:
# View the model structure
summary(effnetb0, 
        input_size=(32, 3, 256, 256), # make sure this is "input_size", not "input_shape" (batch_size, color_channels, height, width)
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 256, 256]    [32, 2]              --                   Partial
├─Sequential (features)                                      [32, 3, 256, 256]    [32, 1280, 8, 8]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 256, 256]    [32, 32, 128, 128]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 256, 256]    [32, 32, 128, 128]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 128, 128]   [32, 32, 128, 128]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 128, 128]   [32, 32, 128, 128]   --                   --
│    └─Sequential (1)                                        [32, 32, 128, 128]   [32, 

In [9]:
# Get loss function
loss_fn = torch.nn.CrossEntropyLoss()

# Get optimizer
optimizer = torch.optim.Adam(effnetb0.parameters(), lr=0.001)

# Set number of epochs
epochs = 5

# Set loss and accuracy list of both train and test 
train_loss_lst = []
train_acc_lst = []
test_loss_lst = []
test_acc_lst = []

for epoch in tqdm(range(epochs)):
    # Train model
    effnetb0.train()

    # Set train loss and accuracy
    train_loss, train_acc = 0, 0

    # Iterate train dataloader
    for X, y in train_dataloader:
        # Send X and y to device
        X, y = X.to(device), y.to(device)

        # Forward pass
        y_pred = effnetb0(X)

        # Calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # Optimizer zero grad
        optimizer.zero_grad()

        # Loss backward
        loss.backward()

        # Optimizer step
        optimizer.step()

        # Calculate accuracy
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred_class)
    
    # Get average loss and accuracy
    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader)

    # Append loss and accuracy to its list
    train_loss_lst.append(train_loss)
    train_acc_lst.append(train_acc)

    # Evaluate model
    effnetb0.eval()
    
    # Set test loss and accuracy
    test_loss, test_acc = 0, 0

    # With no updating weight
    with torch.inference_mode():
        # Iterate test dataloader
        for X, y in test_dataloader:
            # Send X and y to device
            X, y = X.to(device), y.to(device)

            # Forward pass
            test_pred = effnetb0(X)

            # Calculate loss
            loss = loss_fn(test_pred, y)
            test_loss += loss

            # Calculate accuracy
            test_pred_class = test_pred.argmax(dim=1)
            test_acc += ((test_pred_class==y).sum().item()/len(test_pred_class))

        # Get average loss and accuracy
        test_loss /= len(test_dataloader)
        test_acc /= len(test_dataloader)
    
    # Append loss and accuracy to its list
    test_loss_lst.append(test_loss)
    test_acc_lst.append(test_acc)

    print(f"Epoch {epoch+1}\ntrain loss: {train_loss:.2f}, train_acc: {train_acc:.2f} | test loss: {test_loss:.2f}, test_acc: {test_acc:.2f}\n")


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1
train loss: 0.33, train_acc: 0.86 | test loss: 0.29, test_acc: 0.88

Epoch 2
train loss: 0.27, train_acc: 0.89 | test loss: 0.25, test_acc: 0.90

Epoch 3
train loss: 0.25, train_acc: 0.90 | test loss: 0.31, test_acc: 0.90

Epoch 4
train loss: 0.24, train_acc: 0.91 | test loss: 0.25, test_acc: 0.90

Epoch 5
train loss: 0.24, train_acc: 0.90 | test loss: 0.23, test_acc: 0.90



In [10]:
# Make directory to save model
saving_path = Path("./model")
saving_path.mkdir(parents=True, exist_ok=True)

# Give file name(model name) and get path to save the model
model_name = "effnetb0.pth"
model_saving_path = saving_path / model_name

# Save the model
torch.save(obj=effnetb0.state_dict(), f=model_saving_path)

# ViT Model

In [None]:
# Get pretrained effnet model
vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT # get best weights
vit_transform = vit_weights.transforms() # get transforms from weight
vit = torchvision.models.vit_b_16(weights=vit_weights).to(device) # get model with weights and put it on cuda

# Freeze feature extractor layer
for parameter in vit.parameters():
    parameter.requires_grad = False

# Change classification layer
vit.heads = torch.nn.Sequential(
    torch.nn.Linear(in_features=768, out_features=2, bias=True)
)

In [12]:
# View the model structure
summary(vit,
        input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape" (batch_size, color_channels, height, width)
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [32, 3, 224, 224]    [32, 2]              768                  Partial
├─Conv2d (conv_proj)                                         [32, 3, 224, 224]    [32, 768, 14, 14]    (590,592)            False
├─Encoder (encoder)                                          [32, 197, 768]       [32, 197, 768]       151,296              False
│    └─Dropout (dropout)                                     [32, 197, 768]       [32, 197, 768]       --                   --
│    └─Sequential (layers)                                   [32, 197, 768]       [32, 197, 768]       --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [32, 197, 768]       [32, 197, 768]       (7,087,872)          False
│    │    └─EncoderBlock (encoder_layer_1)                   [32, 197, 768]       [32, 

In [13]:
# Get datasets
vit_train_data = datasets.ImageFolder(train_path, transform=vit_transform)
vit_test_data = datasets.ImageFolder(test_path, transform=vit_transform)

# Get dataloader
vit_train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
vit_test_dataloader = DataLoader(test_data, batch_size=32, shuffle=True)

In [14]:
# Get loss function
loss_fn = torch.nn.CrossEntropyLoss()

# Get optimizer
optimizer = torch.optim.Adam(vit.parameters(), lr=0.001)

# Set number of epochs
epochs = 5

# Set loss and accuracy list of both train and test 
train_loss_lst = []
train_acc_lst = []
test_loss_lst = []
test_acc_lst = []

for epoch in tqdm(range(epochs)):
    # Train model
    vit.train()

    # Set train loss and accuracy
    train_loss, train_acc = 0, 0

    # Iterate train dataloader
    for X, y in vit_train_dataloader:
        # Send X and y to device
        X, y = X.to(device), y.to(device)

        # Forward pass
        y_pred = vit(X)

        # Calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # Optimizer zero grad
        optimizer.zero_grad()

        # Loss backward
        loss.backward()

        # Optimizer step
        optimizer.step()

        # Calculate accuracy
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred_class)
    
    # Get average loss and accuracy
    train_loss /= len(vit_train_dataloader)
    train_acc /= len(vit_train_dataloader)

    # Append loss and accuracy to its list
    train_loss_lst.append(train_loss)
    train_acc_lst.append(train_acc)

    # Evaluate model
    vit.eval()
    
    # Set test loss and accuracy
    test_loss, test_acc = 0, 0

    # With no updating weight
    with torch.inference_mode():
        # Iterate test dataloader
        for X, y in vit_test_dataloader:
            # Send X and y to device
            X, y = X.to(device), y.to(device)

            # Forward pass
            test_pred = vit(X)

            # Calculate loss
            loss = loss_fn(test_pred, y)
            test_loss += loss

            # Calculate accuracy
            test_pred_class = test_pred.argmax(dim=1)
            test_acc += ((test_pred_class==y).sum().item()/len(test_pred_class))

        # Get average loss and accuracy
        test_loss /= len(vit_test_dataloader)
        test_acc /= len(vit_test_dataloader)
    
    # Append loss and accuracy to its list
    test_loss_lst.append(test_loss)
    test_acc_lst.append(test_acc)

    print(f"Epoch {epoch+1}\ntrain loss: {train_loss:.2f}, train_acc: {train_acc:.2f} | test loss: {test_loss:.2f}, test_acc: {test_acc:.2f}\n")


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1
train loss: 0.26, train_acc: 0.89 | test loss: 0.21, test_acc: 0.91

Epoch 2
train loss: 0.17, train_acc: 0.93 | test loss: 0.18, test_acc: 0.93

Epoch 3
train loss: 0.15, train_acc: 0.94 | test loss: 0.18, test_acc: 0.93

Epoch 4
train loss: 0.14, train_acc: 0.95 | test loss: 0.18, test_acc: 0.93

Epoch 5
train loss: 0.13, train_acc: 0.95 | test loss: 0.17, test_acc: 0.94



In [15]:
# Make directory to save model
saving_path = Path("./model")
saving_path.mkdir(parents=True, exist_ok=True)

# Give file name(model name) and get path to save the model
model_name = "vit.pth"
model_saving_path = saving_path / model_name

# Save the model
torch.save(obj=vit.state_dict(), f=model_saving_path)