# VGG16 Pre trained to test

This jupyter notebook has the objective to, not only retrieve the accuracies of the VGGnet16 pretrained, but to obtain also <br>
the layer features before the last classification layer.

In [22]:
 #Import necessary modules
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import DataLoader
from torchvision import transforms
plt.rcParams['figure.figsize'] = [20, 12]

### Set the path to here

Make sure the setup the paths properly!

In [23]:
#Path to assign tests (copy path directly)
notebooks_path = os.getcwd() # OR MAYBE has to be set manually depending your computer

#Set the path to this working directory
os.chdir(notebooks_path)
print(os.getcwd())

import sys
#Append the path the src folder
sys.path.append(os.path.join(os.getcwd(), os.pardir, "src"))

/project/cs231/CS231N-Final-Proj/notebooks


### Import the necessary module for downloading

Note for this: EVERYTIME There is a change inside the download <br>
the changes inside the file would only be shown if the jupyter kernel is restarted. <br>


In [24]:
# Import the necessary modules
from utils import CXReader, DfReader

### Set the data path

In [25]:
# Create the data path
df_path = os.path.join(notebooks_path, os.pardir, "data")
data_path = os.path.join(df_path, "images", "images")

### Get the dataframes of the data
First, lets obtain the dataframes for the data and check that all metadata <br>
information has been set up properly. <br>

In [26]:
#Create a dataframe compiler
df_compiler = DfReader()

#set the path and retrieve the dataframes
df_compiler.set_folder_path(df_path)

#Get the dataframe holder and names
dfs_holder, dfs_names = df_compiler.get_dfs()

  0%|          | 0/5 [00:00<?, ?it/s]

The file: miccai2023_nih-cxr-lt_labels_val.csv has been retrieved
The file: miccai2023_nih-cxr-lt_labels_test.csv has been retrieved


100%|██████████| 5/5 [00:00<00:00, 22.12it/s]

The file: miccai2023_nih-cxr-lt_labels_train.csv has been retrieved





# Read the images and labels

Also, obtain DataLoaders for test, train, and validation datasets using <br>
the Dataloader class from pytorch.

In [27]:
print(torch.cuda.is_available())

True


In [28]:
# Get the device if cuda or not
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Define a transformations for the VGGnet16 (requires a 224,224)
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize to 256x256
    transforms.CenterCrop((224, 224)),  # Center crop to 224x224
    transforms.ToTensor(),
])

#Create datasets and dataloaders
test_dataset = CXReader(data_path=data_path, dataframe=dfs_holder[0], transform=transform, device=device)
train_dataset = CXReader(data_path=data_path, dataframe=dfs_holder[1], transform=transform,device=device)
val_dataset = CXReader(data_path=data_path, dataframe=dfs_holder[2], transform=transform, device=device)

#Sampled images from train to see single shape
samp3_image, label3 = train_dataset[1]
print("Shape of a single image and its labels")
print(f"Image: {samp3_image.shape}, labels: {label3.shape}")

#With batch size of 32, and shuffle true, and num workers = 4
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,)

#Iterate inside a batch
for idx, batch in enumerate(train_loader):
    print(f"batch number: {idx}")
    images, labels = batch
    print("Shape of batch of images and labels")
    print(f"Images: {images.shape}, labels: {labels.shape}")
    if idx == 5:
        print("It can iterate through all batches")
        break

Shape of a single image and its labels
Image: torch.Size([3, 224, 224]), labels: torch.Size([20])
batch number: 0
Shape of batch of images and labels
Images: torch.Size([32, 3, 224, 224]), labels: torch.Size([32, 20])
batch number: 1
Shape of batch of images and labels
Images: torch.Size([32, 3, 224, 224]), labels: torch.Size([32, 20])
batch number: 2
Shape of batch of images and labels
Images: torch.Size([32, 3, 224, 224]), labels: torch.Size([32, 20])
batch number: 3
Shape of batch of images and labels
Images: torch.Size([32, 3, 224, 224]), labels: torch.Size([32, 20])
batch number: 4
Shape of batch of images and labels
Images: torch.Size([32, 3, 224, 224]), labels: torch.Size([32, 20])
batch number: 5
Shape of batch of images and labels
Images: torch.Size([32, 3, 224, 224]), labels: torch.Size([32, 20])
It can iterate through all batches


### Load the vgg16 pretrained model

Check if you have GPU Envidia! Else, use the cpu

In [29]:
#Load the pretrained model
vgg16 = models.vgg16(pretrained = True)



# Modify the classifier layers
We know that VGGnet16 has a last linear layer with 1000 output units...<br>
However, this doesnt really resemble our problem per se...<br><br>
Lets do this! Lets replace the last layer with a linear layer that has the same <br> number of classes as our data!. (In our case, is 20).


### See the vgg16.features architecture and get the parameter shapes

In [30]:
print(vgg16.features)
print([x.shape for x in vgg16.features.parameters()])

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

### See the vgg16.avgpool parameters

In [31]:
print(vgg16.avgpool)
print([x.shape for x in vgg16.avgpool.parameters()])

AdaptiveAvgPool2d(output_size=(7, 7))
[]


### See the vgg16 classifier parameters and weights

In [32]:
print(vgg16.classifier)
print([x.shape for x in vgg16.classifier.parameters()])

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)
[torch.Size([4096, 25088]), torch.Size([4096]), torch.Size([4096, 4096]), torch.Size([4096]), torch.Size([1000, 4096]), torch.Size([1000])]


In [33]:
# NEW CODE CELL to conduct fine-tuning on Vggnet16 only on the last (Linear) layer

# First, freeze all the parameters
for param in vgg16.parameters():
    param.requires_grad = False

In [34]:
# Modify the last layer for the last 20 classes
num_classes = 20  # Number of classes for your specific task
num_features = vgg16.classifier[0].in_features #Get all of the features after convolutional layers
print(num_features)

#Obtain the same classifier you got befor with lower number of classes, so we can pretrain it
vgg16.classifier = nn.Sequential(
    nn.Linear(num_features, 4096, bias=True),
    nn.ReLU(inplace= True),
    nn.Dropout(0.1, inplace=False),
    nn.Linear(4096, 4096, bias=True),
    nn.ReLU(inplace= True),
    nn.Dropout(0.1, inplace=False),
    nn.Linear(4096, num_classes, bias=True),
    nn.ELU(inplace=True)
)

print(vgg16.classifier)

25088


Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.1, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.1, inplace=False)
  (6): Linear(in_features=4096, out_features=20, bias=True)
  (7): ELU(alpha=1.0, inplace=True)
)


In [35]:
# NEW CODE CELL

# Create state_dict path
model_dict_path = os.path.join(notebooks_path, os.pardir, "models")

if os.path.exists(model_dict_path) == False:
    os.mkdir(model_dict_path)

In [36]:
# NEW CODE CELL to perform fine-tuning
#print([x.shape for x in vgg16.classifier[-6].parameters()])

import torch.optim as optim
vgg16 = vgg16.to(device)
vgg16.train()
params_to_update = [vgg16.classifier.parameters()]
print(params_to_update)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(*params_to_update, lr=0.01)

def finetune_model(model, data_loader, num_epochs, device:str):
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-------------')
            
        for idx, batch in enumerate(data_loader):
            images_inputs, images_labels = batch
            images_inputs, images_labels = images_inputs.to(device), images_labels.to(device)

            # Convert labels to float type (also need to move to CUDA again!)
            images_labels = images_labels.to(torch.float64)

            # initialize optimizer
            optimizer.zero_grad()            
            outputs = model(images_inputs)
            
            # compute loss
            loss = criterion(outputs, images_labels)
            
            # predict labels
            pred_labels = (outputs > 0.5).float()

            # Calculate TP, FP, TN, FN and accuracy
            TP = torch.sum((pred_labels == 1) & (images_labels == 1)).item()
            FP = torch.sum((pred_labels == 1) & (images_labels == 0)).item()
            TN = torch.sum((pred_labels == 0) & (images_labels == 0)).item()
            FN = torch.sum((pred_labels == 0) & (images_labels == 1)).item()
            accuracy = ((TP + TN) / (TP + FP + TN + FN)) * 100.0                

            loss.backward()
            print(f"iter {idx} ---  Loss: {loss}    Accuracy: {accuracy}")
            optimizer.step()
        
        # Save parameters for each epoch
        torch.save(model.state_dict(), os.path.join(model_dict_path, "vgg16_finetune_params.pth"))

[<generator object Module.parameters at 0x77a1c9ca8ac0>]


In [37]:
# Let's do fine-tuning
finetune_model(model=vgg16, data_loader=train_loader, num_epochs=5, device=device)

Epoch 1/5
-------------
iter 0 ---  Loss: 4.602937683463097    Accuracy: 92.34375
iter 1 ---  Loss: 4.0355129688978195    Accuracy: 93.125
iter 2 ---  Loss: 5.280447103083134    Accuracy: 90.9375
iter 3 ---  Loss: 4.325652673840523    Accuracy: 92.5
iter 4 ---  Loss: 3.2509279623627663    Accuracy: 93.125
iter 5 ---  Loss: 3.7702549137175083    Accuracy: 90.3125
iter 6 ---  Loss: 3.9184491150081158    Accuracy: 86.875
iter 7 ---  Loss: 3.3907826878130436    Accuracy: 86.25
iter 8 ---  Loss: 3.558126490563154    Accuracy: 85.9375
iter 9 ---  Loss: 3.810967568308115    Accuracy: 86.25
iter 10 ---  Loss: 3.9466537050902843    Accuracy: 84.375
iter 11 ---  Loss: 2.882425855845213    Accuracy: 84.84375
iter 12 ---  Loss: 3.843246625736356    Accuracy: 82.34375
iter 13 ---  Loss: 3.1304370388388634    Accuracy: 80.15625
iter 14 ---  Loss: 3.4035610891878605    Accuracy: 78.90625
iter 15 ---  Loss: 4.175964046269655    Accuracy: 80.0
iter 16 ---  Loss: 4.340290382504463    Accuracy: 76.40625


In [None]:
vgg16.load_state_dict(torch.load(os.path.join(model_dict_path, "vgg16_finetune_params.pth")))

### Create a function that would evaluate the model.

Make sure it outputs all of the accuracies of all 20 conditions. <br>

In [None]:
import torch.nn.functional as F

def evaluate_model(model, data_loader, limit:int, device:str):
    """
    Instance method that would evaluate with a given
    data loader, the accuracies obtained by the VGGNET16
    """
    model.eval()
    threshold = 0.5
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []

    #Use no grad to not perform backpropagation for inference time
    with torch.no_grad():
        #Iterate through each of the images and labels
        
        # Calculate the total numbers for metrics
        TP, FP, TN, FN = 0.0, 0.0, 0.0, 0.0
        for idx, batch in enumerate(data_loader):
    
            #See if it works
            images_inputs, images_labels = batch
            images_inputs, images_labels = images_inputs.to(device), images_labels.to(device)

            #Print the shape of each one of them
            print(f"Inputs shape: {images_inputs.shape}, Labels shape: {labels.shape}")

            #Send the outputs to model in device
            outputs = model(images_inputs)

            #Binarize the output with threshold
            pred_labels = (outputs > threshold).float()

            # Calculate batch-wise TP, FP, TN, FN
            b_TP = torch.sum((pred_labels == 1) & (images_labels == 1)).item()
            b_FP = torch.sum((pred_labels == 1) & (images_labels == 0)).item()
            b_TN = torch.sum((pred_labels == 0) & (images_labels == 0)).item()
            b_FN = torch.sum((pred_labels == 0) & (images_labels == 1)).item()
            TP += b_TP
            FP += b_FP
            TN += b_TN
            FN += b_FN

        #_, predicted = torch.max(outputs, 1)  # Get the index of the maximum log-probability
        accuracy = ((TP + TN) / (TP + FP + TN + FN)) * 100.0
        precision = (TP / (TP + FP)) * 100.0 if (TP + FP) > 0 else 0.0
        recall = (TP / (TP + FN)) * 100.0 if (TP + FN) > 0 else 0.0
        f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0

        print("Accuracy: {:.2f}%".format(accuracy))
        print("Precision: {:.2f}%".format(precision))
        print("Recall: {:.2f}%".format(recall))
        print("F1 Score: {:.2f}%".format(f1_score))

            # accuracies.append(accuracy)
            # precisions.append(precision)
            # recalls.append(recall)
            # f1_scores.append(f1_score)

            # if idx == limit:
            #     print("Limit reached")
            #     break
    return accuracies, precisions, recalls, f1_scores

In [None]:
# Evaluate on the eval set
accuracies, precisions, recalls, f1_scores = evaluate_model(vgg16, test_loader, 5, device=device)