# VGGNet16 features extraction.

One of the key components to be able to create a GMVAE of the CXR14 dataset, is to be able to retrieve <br>
the VGGne16 features, just as how the paper. <br><br> "Deep Generative Classifiers for Thoracic Disease Diagnosis with Chest X-ray Images" has done. <br>
link: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6651749/

In [1]:
 #Import necessary modules
import os
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import DataLoader
from torchvision import transforms
plt.rcParams['figure.figsize'] = [20, 12]

### Set the path to here

Make sure the setup the paths properly!

In [2]:
#Path to assign tests (copy path directly)
test_path = r"D:\Python_Projects\CS236-Final-Proj\test"

#Set the path to this working directory
os.chdir(test_path)
print(os.getcwd())

import sys
#Append the path the src folder
sys.path.append(r'D:\Python_Projects\CS236-Final-Proj\src')

D:\Python_Projects\CS236-Final-Proj\test


### Import the necessary module for downloading

Note for this: EVERYTIME There is a change inside the download <br>
the changes inside the file would only be shown if the jupyter kernel is restarted. <br>


In [3]:
# Import the necessary modules
from utils import CXReader, DfReader

### Set the data path

In [4]:
# Create the data path
data_path = os.path.join(test_path, os.pardir, "data")

### Get the dataframes of the data
First, lets obtain the dataframes for the data and check that all metadata <br>
information has been set up properly. <br>

In [5]:
#Create a dataframe compiler
df_compiler = DfReader()

#set the path and retrieve the dataframes
df_compiler.set_folder_path(data_path)

#Get the dataframe holder and names
dfs_holder, dfs_names = df_compiler.get_dfs()

  0%|          | 0/112124 [00:00<?, ?it/s]

100%|██████████| 112124/112124 [00:00<00:00, 524524.83it/s]

The file: miccai2023_nih-cxr-lt_labels_test.csv has been retrieved
The file: miccai2023_nih-cxr-lt_labels_train.csv has been retrieved
The file: miccai2023_nih-cxr-lt_labels_val.csv has been retrieved





In [6]:
# Get the device if cuda or not
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Define a transformations for the VGGnet16 (requires a 224,224)
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

#Create datasets and dataloaders
test_dataset = CXReader(data_path=data_path, dataframe=dfs_holder[0], transform=transform, device=device)
train_dataset = CXReader(data_path=data_path, dataframe=dfs_holder[1], transform=transform,device=device)
val_dataset = CXReader(data_path=data_path, dataframe=dfs_holder[2], transform=transform, device=device)

#Sampled images from train to see single shape
samp3_image, label3 = train_dataset[1]
print("Shape of a single image and its labels")
print(f"Image: {samp3_image.shape}, labels: {label3.shape}")

#With batch size of 16, and shuffle true, and num workers = 4
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,  num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,  num_workers=2)

#Iterate inside a batch
for idx, batch in enumerate(train_loader):
    print(f"batch number: {idx}")
    images, labels = batch
    print("Shape of batch of images and labels")
    print(f"Images: {images.shape}, labels: {labels.shape}")
    if idx == 5:
        print("It can iterate through all batches")
        break

Shape of a single image and its labels
Image: torch.Size([3, 224, 224]), labels: torch.Size([20])
batch number: 0
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
batch number: 1
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
batch number: 2
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
batch number: 3
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
batch number: 4
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
batch number: 5
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
It can iterate through all batches


### Print an image and see the output

In [7]:
print(samp3_image)
print(samp3_image.shape)

tensor([[[0.8000, 0.7961, 0.7843,  ..., 0.7686, 0.7725, 0.7569],
         [0.7961, 0.7882, 0.7725,  ..., 0.7490, 0.7608, 0.7373],
         [0.7922, 0.7804, 0.7686,  ..., 0.7333, 0.7451, 0.7216],
         ...,
         [0.2745, 0.1255, 0.0549,  ..., 0.0392, 0.0118, 0.0000],
         [0.2745, 0.1255, 0.0549,  ..., 0.0353, 0.0078, 0.0000],
         [0.2784, 0.1255, 0.0549,  ..., 0.0353, 0.0078, 0.0000]],

        [[0.8000, 0.7961, 0.7843,  ..., 0.7686, 0.7725, 0.7569],
         [0.7961, 0.7882, 0.7725,  ..., 0.7490, 0.7608, 0.7373],
         [0.7922, 0.7804, 0.7686,  ..., 0.7333, 0.7451, 0.7216],
         ...,
         [0.2745, 0.1255, 0.0549,  ..., 0.0392, 0.0118, 0.0000],
         [0.2745, 0.1255, 0.0549,  ..., 0.0353, 0.0078, 0.0000],
         [0.2784, 0.1255, 0.0549,  ..., 0.0353, 0.0078, 0.0000]],

        [[0.8000, 0.7961, 0.7843,  ..., 0.7686, 0.7725, 0.7569],
         [0.7961, 0.7882, 0.7725,  ..., 0.7490, 0.7608, 0.7373],
         [0.7922, 0.7804, 0.7686,  ..., 0.7333, 0.7451, 0.

### Obtain the vgg16_model features and set to eval

In [8]:
class VGGEncoder(torch.nn.Module):
    def __init__(self, pretrained=True):
        super(VGGEncoder, self).__init__()

        # Load pre-trained VGG16 model
        vgg16_model = models.vgg16(pretrained=pretrained)

        # Use only the features part and remove the classifier
        self.features = vgg16_model.features

        # Set to evaluation mode if not fine-tuning
        if not pretrained:
            self.features.eval()

    def forward(self, x):
        return self.features(x)

# Example usage
# For fine-tuning
#encoder_finetune = VGGEncoder(pretrained=False)
#encoder_finetune.train()

# For inference
encoder_inference = VGGEncoder(pretrained=True)
encoder_inference.eval()

#Print output and output features
output_features = encoder_inference(samp3_image)
print(output_features)
print(output_features.shape)



tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.1371, 0.3649,  ..., 0.0929, 0.2825, 0.0000],
         [0.0000, 0.1291, 0.3101,  ..., 0.2486, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.6302,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.1466,  ..., 0.0335, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0788, 0.0000,  ..., 0.0000, 0.0000, 0.1012],
         [0.0000, 0.2479, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.

### Now that we have seen the VGG16 layer.
Lets implement the solution where we flat the feature vector to a 1x1xD vector. <br>
just as how the paper does it.

In [9]:
class TransitionLayer(nn.Module):
    def __init__(self, input_channels, output_channels):
        super(TransitionLayer, self).__init__()
        
        # Convolutional layer with kernel size 1x1
        self.conv1x1 = nn.Conv2d(input_channels, output_channels, kernel_size=1)
        
        # Batch normalization
        self.batch_norm = nn.BatchNorm2d(output_channels)
        
        # ReLU activation
        self.relu = nn.ReLU(inplace=True)
        
        # Max pooling with kernel size equal to the feature map size
        self.max_pool = nn.MaxPool2d(kernel_size=7)

    def forward(self, x):
        # Apply operations sequentially
        x = self.conv1x1(x)
        x = self.batch_norm(x)
        x = self.relu(x)
        x = self.max_pool(x)
        return x

# Example usage
input_channels = 512
output_channels = 300 # D is the desired number of channels
transition_layer = TransitionLayer(input_channels, output_channels)

# Assuming input_tensor is of size (batch_size, 512, 7, 7)
input_tensor = torch.randn(3, 512, 7, 7)

# Apply the transition layer
output = transition_layer(input_tensor)

# Print the shape of the final feature vector
print(output.shape)
print(output)

torch.Size([3, 300, 1, 1])
tensor([[[[3.0579]],

         [[1.7879]],

         [[2.1095]],

         [[2.1985]],

         [[2.0681]],

         [[2.0306]],

         [[2.5692]],

         [[2.2555]],

         [[2.1820]],

         [[1.9125]],

         [[3.0760]],

         [[1.9485]],

         [[2.3806]],

         [[3.1762]],

         [[2.9413]],

         [[2.4204]],

         [[1.7466]],

         [[2.1346]],

         [[2.9450]],

         [[2.5219]],

         [[1.7372]],

         [[1.9311]],

         [[2.2098]],

         [[2.1287]],

         [[2.2396]],

         [[2.4239]],

         [[2.0724]],

         [[2.4720]],

         [[2.3738]],

         [[2.0836]],

         [[3.1526]],

         [[1.8083]],

         [[2.0790]],

         [[2.5191]],

         [[1.9334]],

         [[1.7204]],

         [[2.5947]],

         [[2.1917]],

         [[2.1669]],

         [[2.8529]],

         [[2.0048]],

         [[2.4622]],

         [[2.4857]],

         [[1.6923]],

     

# BIIIIG. We know the feature space at the end would be 512, 7, 7. 
This means that, if we want to create an encoder that would take this; and convert it to a sampling gaussian representation <br>
we need to do the following:
1. Pass a VGGnet16 pretrained features at eval mode (option to pretrain it too) to 512, 7, 7.
2. Pass that VGGnet16 through a transition layer that would flat it to a num_channelsx 300 output (3 since RGB).
3. Use this to sample a mean and gaussian distribution uing homework codes to retrieve results.

### Create a function that would sample gaussian parameters.
Use functions from hw2 utils.py to do this.

In [10]:
from torch.nn import functional as F
def gaussian_parameters(h, dim=-1):
    """
    Converts generic real-valued representations into mean and variance
    parameters of a Gaussian distribution

    Args:
        h: tensor: (batch, ..., dim, ...): Arbitrary tensor
        dim: int: (): Dimension along which to split the tensor for mean and
            variance

    Returns:
        m: tensor: (batch, ..., dim / 2, ...): Mean
        v: tensor: (batch, ..., dim / 2, ...): Variance
    """
    print(f"h dimension passed through gaussian paremeters i {h.shape}")
    m, h = torch.split(h, h.size(dim) // 2, dim=dim)
    v = F.softplus(h) + 1e-8
    return m, v

### Create the Encoder
Now that we have replcated all of the transformations required to perform the encoder first time .<br>

In [12]:
class Encoder(torch.nn.Module):
    def __init__(self,  z_dim, y_dim=0, pretrained=True,):
        super(Encoder, self).__init__()
        self.z_dim = z_dim
        self.y_dim = y_dim
        # Load pre-trained VGG16 model
        vgg16_model = models.vgg16(weights=pretrained)

        # Use only the features part and remove the classifier
        self.features = vgg16_model.features

        # Set to evaluation mode if not fine-tuning
        if not pretrained:
            self.features.eval()
        
        # Convolutional layer with kernel size 1x1
        self.conv1x1 = nn.Conv2d(512, 300, kernel_size=1)
        
        # Batch normalization
        self.batch_norm = nn.BatchNorm2d(300)
        
        # ReLU activation
        self.relu = nn.ReLU(inplace=True)
        
        # Max pooling with kernel size equal to the feature map size
        self.max_pool = nn.MaxPool2d(kernel_size=7)

        #Obtain the net
        self.net = nn.Sequential(
            nn.Linear(300, 2 * z_dim),
        )

    def forward(self, x):
        # Create feature map from vgget16
        feat_map = self.features(x)

        # Apply operations to obtain transition layer from paper
        h = self.conv1x1(feat_map)
        h = self.batch_norm(h)
        h = self.relu(h)
        h = self.max_pool(h)

        # Convert output from 3, 300, 1, 1 to 3, 300
        h = h.view(h.shape[0], h.shape[1])

        #Now pass it through the net to obtain gaussian space
        g = self.net(h)

        #Pass the feature space and get gaussian parameters
        m, v = gaussian_parameters(g, dim=1)
        return m, v
    
#Sampled images from train to see single shape
samp3_image, label3 = train_dataset[1]
print("Shape of a single image and its labels")
print(f"Image: {samp3_image.shape}, labels: {label3.shape}")

#Create encoder compiler
encoder_compiler = Encoder(z_dim = 2)

#Iterate inside a batch
for idx, batch in enumerate(test_loader):
    print(f"batch number: {idx}")
    images, labels = batch
    print("Shape of batch of images and labels")
    print(f"Images: {images.shape}, labels: {labels.shape}")
    output = encoder_compiler(images)
    if idx == 5:
        print("It can iterate through all batches")
        break

print(output[0].shape)
print(output[1].shape)

Shape of a single image and its labels
Image: torch.Size([3, 224, 224]), labels: torch.Size([20])




batch number: 0
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
h dimension passed through gaussian paremeters i torch.Size([16, 4])
batch number: 1
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
h dimension passed through gaussian paremeters i torch.Size([16, 4])
batch number: 2
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
h dimension passed through gaussian paremeters i torch.Size([16, 4])
batch number: 3
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
h dimension passed through gaussian paremeters i torch.Size([16, 4])
batch number: 4
Shape of batch of images and labels
Images: torch.Size([16, 3, 224, 224]), labels: torch.Size([16, 20])
h dimension passed through gaussian paremeters i torch.Size([16, 4])
batch number: 5
Shape of batch of images and labels
Ima