### 0. Importing Necessary Libraries & Dependencies 

In [2]:
##-- Import libraries --##
import torch
import torchvision
from torch import nn
import matplotlib.pyplot as plt
from torchvision import transforms
from pytorch_modules import data_setup, engine, utils

##-- Setup device agnostic code --##
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"PyTorch Version: {torch.__version__}")
print(f"Device: {device}")

BATCH_SIZE = 32
RANDOM_STATE = 42

PyTorch Version: 2.5.1
Device: cpu


### 1. Get Data

In [3]:
##-- Setup directory paths --##
from pathlib import Path
image_data_path = Path("./chest_xray")
train_dir = image_data_path / "train"
val_dir = image_data_path / "val"

train_dir, val_dir

(PosixPath('chest_xray/train'), PosixPath('chest_xray/val'))

### 2. Create Datasets & DataLoaders

#### 2.2 Creating a transforms for `torchvision.models` (auto creation)

- As of `torchvision` v0.13_ there is now support for automatic data transform creation based on the pretrained model weights you're using.

In [4]:
##-- Get a set of pretrained model weights --##
vit_b16_weights =torchvision.models.ViT_B_16_Weights.DEFAULT # 'DEFAULT' = best availabel weight
vit_b16_weights

ViT_B_16_Weights.IMAGENET1K_V1

In [5]:
##-- Get the transforms used to create the pretrained weights
auto_transforms = vit_b16_weights.transforms()
auto_transforms

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [6]:
##-- Create DataLoaders using auto_transforms --##
train_dataloader, val_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                              val_dir=val_dir,
                                                                              transform=auto_transforms,
                                                                              batch_size=BATCH_SIZE)
train_dataloader, val_dataloader, class_names


(<torch.utils.data.dataloader.DataLoader at 0x1192b09b0>,
 <torch.utils.data.dataloader.DataLoader at 0x1294babd0>,
 ['Covid', 'Normal', 'Viral Pneumonia'])

### 3. Getting Pretrained Models

#### 3.1 Setting Up Pretrained Model

In [8]:
import torchvision
vit_b16_weights =torchvision.models.ViT_B_16_Weights.DEFAULT # "DEFAULT" = get the best available weights
model_b16 = torchvision.models.vit_b_16(weights=vit_b16_weights).to(device)

In [9]:
# print(f"{model_b16.avgpool}\n")
# print(f"{model_b16.classifier}")

#### 3.4 Getting Model Summary Using `torchinfo`

In [10]:
from torchinfo import summary

summary(model=model_b16,
        input_size=(1, 3, 224, 224), # [batch_size, color_channels, height, width],
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [1, 3, 224, 224]     [1, 1000]            768                  True
├─Conv2d (conv_proj)                                         [1, 3, 224, 224]     [1, 768, 14, 14]     590,592              True
├─Encoder (encoder)                                          [1, 197, 768]        [1, 197, 768]        151,296              True
│    └─Dropout (dropout)                                     [1, 197, 768]        [1, 197, 768]        --                   --
│    └─Sequential (layers)                                   [1, 197, 768]        [1, 197, 768]        --                   True
│    │    └─EncoderBlock (encoder_layer_0)                   [1, 197, 768]        [1, 197, 768]        7,087,872            True
│    │    └─EncoderBlock (encoder_layer_1)                   [1, 197, 768]        [1, 197, 768

#### 3.5 Freezing Base Model & Changing Output Layer Based On Our Needs

In [11]:
##-- Freeze all the base layers in EffNetB7 --##
for params in model_b16.parameters():
    params.requires_grad = False # won't updated the weights

In [12]:
##-- Update the classifier head of our model to suite our problem --##
from torch import nn

torch.manual_seed(RANDOM_STATE)
torch.cuda.manual_seed(RANDOM_STATE)

model_b16.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=768, # features vector coming in from the forzen layers
              out_features=len(class_names)).to(device))

In [15]:
summary(model=model_b16,
        input_size=(1, 3, 224, 224), # [batch_size, color_channels, height, width],
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [1, 3, 224, 224]     [1, 1000]            3,075                Partial
├─Conv2d (conv_proj)                                         [1, 3, 224, 224]     [1, 768, 14, 14]     (590,592)            False
├─Encoder (encoder)                                          [1, 197, 768]        [1, 197, 768]        151,296              False
│    └─Dropout (dropout)                                     [1, 197, 768]        [1, 197, 768]        --                   --
│    └─Sequential (layers)                                   [1, 197, 768]        [1, 197, 768]        --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [1, 197, 768]        [1, 197, 768]        (7,087,872)          False
│    │    └─EncoderBlock (encoder_layer_1)                   [1, 197, 768]        [1, 1

### 4. Train Model

In [16]:
# torch.manual_seed(RANDOM_STATE)
# torch.cuda.manual_seed(RANDOM_STATE)

# ##-- Define hyperparamters --##
# EPOCHS = 20
# LEARNING_RATE = 0.001

# ##-- Define Loss & Optimizer --##
# loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(params=model_b16.parameters(), lr=LEARNING_RATE)

# ##-- Start timer --##
# from timeit import default_timer as timer
# start_time = timer()

# ##-- Setup training and save the results --##
# model_b7_results = engine.train(model=model_b16,
#                                train_dataloader=train_dataloader,
#                                val_dataloader=val_dataloader,
#                                optimizer=optimizer,
#                                loss_fn=loss_fn,
#                                epochs=EPOCHS,
#                                device=device)

# ##-- End timer --##
# end_time = timer()

In [17]:
# ##- Save the model --##
# utils.save_model(model=model_b16,
#                  target_dir="./saved_models",
#                  model_name=f"efficientnetb7_{LEARNING_RATE}_{EPOCHS}_{BATCH_SIZE}.pth",
#                  optimizer=optimizer,
#                  epoch=EPOCHS)

# print(f"[INFO]: Total training time: {(end_time-start_time)/60:.3f} minutes.")

### 5. Evaluate Model

In [18]:
# ##-- Plot loss curves --##
# epochs = [i for i in range(1, EPOCHS+1)]

# plt.figure(figsize=(12, 5))

# plt.subplot(1, 2, 1)
# plt.plot(epochs, model_b7_results["train_loss"], color="blue", label="train_loss")
# plt.plot(epochs, model_b7_results["val_loss"], color="red", label="val_loss")
# plt.xlabel("Epochs")
# plt.ylabel("Loss")
# plt.legend()

# plt.subplot(1, 2, 2)
# plt.plot(epochs, model_b7_results["train_acc"], color="blue", label="train_acc")
# plt.plot(epochs, model_b7_results["val_acc"], color="green", label="val_acc")
# plt.xlabel("Epochs")
# plt.ylabel("Accuracy")
# plt.legend()