In [1]:
import warnings
warnings.filterwarnings("ignore")

import torch
import numpy as np
import torchvision
import matplotlib.pyplot as plt
import multiprocessing
from transformers import *
from datasets import load_dataset
from PIL import Image
from torchinfo import summary
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from tqdm.auto import tqdm
import time

torch.cuda.empty_cache()
device="cuda" if torch.cuda.is_available() else "cpu"

    PyTorch 2.1.0+cu121 with CUDA 1201 (you have 2.0.1+cu117)
    Python  3.9.18 (you have 3.9.17)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


### Importing Data

In [2]:
def transform(examples):
  inputs = image_processor([img.convert("RGB") for img in examples["image"]], return_tensors="pt")
  inputs["labels"] = examples["label"]
  return inputs

def collate_fn(batch):
  return {
      "pixel_values": torch.stack([x["pixel_values"] for x in batch]),
      "labels": torch.tensor([x["labels"] for x in batch]),
  }

In [3]:
model_name = "google/vit-base-patch16-384"
batch_size = 6
cpu_count=multiprocessing.cpu_count()

image_processor = ViTImageProcessor.from_pretrained(model_name)

train_ds= load_dataset('./chest_xray/data')
train_ds = train_ds["train"].train_test_split(test_size=0.25) 


labels = train_ds["train"].features["label"].names
dataset = train_ds.with_transform(transform)


train_dataset_loader = torch.utils.data.DataLoader(dataset["train"], collate_fn=collate_fn, batch_size=batch_size, shuffle=True)
valid_dataset_loader = torch.utils.data.DataLoader(dataset["test"], collate_fn=collate_fn, batch_size=batch_size, shuffle=True)

loading configuration file preprocessor_config.json from cache at /home/moose/.cache/huggingface/hub/models--google--vit-base-patch16-384/snapshots/2960116e809e2fca84146dbb240289aee7db4827/preprocessor_config.json
size should be a dictionary on of the following set of keys: ({'width', 'height'}, {'shortest_edge'}, {'longest_edge', 'shortest_edge'}, {'longest_edge'}), got 384. Converted to {'height': 384, 'width': 384}.
Image processor ViTImageProcessor {
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_processor_type": "ViTImageProcessor",
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 384,
    "width": 384
  }
}



Resolving data files:   0%|          | 0/5840 [00:00<?, ?it/s]

### Loading Model

In [4]:
model = ViTForImageClassification.from_pretrained(
    model_name,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True,
)

model = model.to(device)

loading configuration file config.json from cache at /home/moose/.cache/huggingface/hub/models--google--vit-base-patch16-384/snapshots/2960116e809e2fca84146dbb240289aee7db4827/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-384",
  "architectures": [
    "ViTForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "NORMAL",
    "1": "PNEUMONIA"
  },
  "image_size": 384,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NORMAL": "0",
    "PNEUMONIA": "1"
  },
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "qkv_bias": true,
  "transformers_version": "4.33.3"
}

loading weights file model.safetensors from cache at /home/moose/.cache/huggingface/hub/models--google--vit-base-patch16-384/snapshots

### Optimizer and accuracy functions

In [5]:
#Define the optimizer
optimizer = AdamW(model.parameters(), lr=1e-5)

log_dir = "./ViT_base_384_tensorboard/"
writer = SummaryWriter(log_dir=log_dir)

def accuracy_fn(logits,true):
    return torch.eq(torch.argmax(torch.softmax(logits,dim=1),dim=1).squeeze(),true).sum().item()/len(logits)
    

In [6]:
epochs=10

train_accuracy=[]
test_accuracy=[]
train_loss=[]
test_loss=[]

for i in tqdm(range(epochs)):
    print("Training:")
    model.train()
    
    #Defining accuracy and loss for train and test data
    temp_train_accuracy=[]
    temp_test_accuracy=[]
    temp_train_loss=[]
    temp_test_loss=[]

    net_train_accuracy=0
    net_test_accuracy=0
    net_train_loss=0
    net_test_loss=0
    
    with tqdm(total=len(train_dataset_loader)) as pbar:
        for batch in train_dataset_loader:
                x=batch["pixel_values"].to(device)
                y=batch["labels"].to(device)
    
                #Calculating model output
                result=model(pixel_values=x,labels=y)
                logits=result.logits
    
                #Reseting any old gradient values
                optimizer.zero_grad()
                loss=result.loss

            
                #Track of metrics        
                accuracy_train=accuracy_fn(logits,y)
                temp_train_accuracy.append(accuracy_train)
                temp_train_loss.append(loss.item())
    
                #Back Propogation
                loss.backward()
            
                #Update Parameters
                optimizer.step()
            
                #Progress Bar Update
                pbar.update(1)
        pbar.close()
    #Tensorboard & Metrics for the dataset
    net_train_accuracy=sum(temp_train_accuracy)/len(temp_train_accuracy)
    net_train_loss=sum(temp_train_loss)/len(temp_train_loss)
    train_accuracy.append(net_train_accuracy)
    train_loss.append(net_train_loss)
    writer.add_scalar("Train Accuracy",net_train_accuracy,i)
    writer.add_scalar("Train Loss",net_train_loss,i)

    #Evaluation
    print("Testing:")
    model.eval()

    with tqdm(total=len(valid_dataset_loader)) as pbar2:
        for batch in valid_dataset_loader:
            x=batch["pixel_values"].to(device)
            y=batch["labels"].to(device)
            
            #Setting inference mode
            with torch.inference_mode():
                result=model(pixel_values=x,labels=y)
                logits=result.logits
                loss=result.loss

                #Track of metrics
                accuracy_test=accuracy_fn(logits,y)
                temp_test_accuracy.append(accuracy_test)
                temp_test_loss.append(loss.item())

                #Progress Bar Update
                pbar2.update(1)
        pbar2.close()

    #Tensorboard & Metrics for the dataset
    net_test_accuracy=sum(temp_test_accuracy)/len(temp_test_accuracy)
    net_test_loss=sum(temp_test_loss)/len(temp_test_loss)
    test_accuracy.append(net_test_accuracy)
    test_loss.append(net_test_loss)
    writer.add_scalar("Test Accuracy",net_test_accuracy,i)
    writer.add_scalar("Test Loss",net_test_loss,i)

    #Saving the model
    model.save_pretrained(f"./vit-base-384/checkpoint-{i+1}")
    image_processor.save_pretrained(f"./vit-base-384/checkpoint-{i+1}")

    print(f"Epoch {i+1}:\nTrain Accuracy: {net_train_accuracy}  Train Loss: {net_train_loss}  Test Accuracy: {net_test_accuracy}  Test Loss: {net_test_loss}")
    print("\n")

  0%|          | 0/10 [00:00<?, ?it/s]

Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-1/config.json
Model weights saved in ./vit-base-384/checkpoint-1/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-1/preprocessor_config.json


Epoch 1:
Train Accuracy: 0.9410958904109589  Train Loss: 0.14798766325550053  Test Accuracy: 0.9774590163934429  Test Loss: 0.0654667591233356


Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-2/config.json
Model weights saved in ./vit-base-384/checkpoint-2/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-2/preprocessor_config.json


Epoch 2:
Train Accuracy: 0.9780821917808227  Train Loss: 0.06396807534839441  Test Accuracy: 0.9760928961748637  Test Loss: 0.05894467266601869


Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-3/config.json
Model weights saved in ./vit-base-384/checkpoint-3/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-3/preprocessor_config.json


Epoch 3:
Train Accuracy: 0.9913242009132426  Train Loss: 0.0271337835496325  Test Accuracy: 0.9747267759562844  Test Loss: 0.07490119638395464


Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-4/config.json
Model weights saved in ./vit-base-384/checkpoint-4/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-4/preprocessor_config.json


Epoch 4:
Train Accuracy: 0.9968036529680367  Train Loss: 0.011432552172646703  Test Accuracy: 0.9760928961748638  Test Loss: 0.07711402319755059


Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-5/config.json
Model weights saved in ./vit-base-384/checkpoint-5/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-5/preprocessor_config.json


Epoch 5:
Train Accuracy: 0.9956621004566215  Train Loss: 0.012802572600268009  Test Accuracy: 0.9781420765027327  Test Loss: 0.07533510096330798


Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-6/config.json
Model weights saved in ./vit-base-384/checkpoint-6/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-6/preprocessor_config.json


Epoch 6:
Train Accuracy: 0.995205479452055  Train Loss: 0.013532756306172714  Test Accuracy: 0.9829234972677598  Test Loss: 0.06688504409403023


Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-7/config.json
Model weights saved in ./vit-base-384/checkpoint-7/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-7/preprocessor_config.json


Epoch 7:
Train Accuracy: 0.9990867579908678  Train Loss: 0.004718294461335599  Test Accuracy: 0.9856557377049184  Test Loss: 0.06850345887958939


Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-8/config.json
Model weights saved in ./vit-base-384/checkpoint-8/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-8/preprocessor_config.json


Epoch 8:
Train Accuracy: 0.9972602739726029  Train Loss: 0.007451145804167923  Test Accuracy: 0.9829234972677598  Test Loss: 0.07680878962734644


Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-9/config.json
Model weights saved in ./vit-base-384/checkpoint-9/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-9/preprocessor_config.json


Epoch 9:
Train Accuracy: 0.9972602739726031  Train Loss: 0.00807558549292717  Test Accuracy: 0.9808743169398911  Test Loss: 0.07718623701275198


Training:


  0%|          | 0/730 [00:00<?, ?it/s]

Testing:


  0%|          | 0/244 [00:00<?, ?it/s]

Configuration saved in ./vit-base-384/checkpoint-10/config.json
Model weights saved in ./vit-base-384/checkpoint-10/pytorch_model.bin
Image processor saved in ./vit-base-384/checkpoint-10/preprocessor_config.json


Epoch 10:
Train Accuracy: 0.9995433789954339  Train Loss: 0.0011769281213611527  Test Accuracy: 0.9795081967213121  Test Loss: 0.10447689523981599


