# VGG-16

## Setup

In [None]:
# set up Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# run required notebooks to import functions
%run /content/drive/MyDrive/BINF_4008_Final_Project/Scripts/Metrics.ipynb
%run /content/drive/MyDrive/BINF_4008_Final_Project/Scripts/Models.ipynb
%run /content/drive/MyDrive/BINF_4008_Final_Project/Scripts/Trainer.ipynb
%run /content/drive/MyDrive/BINF_4008_Final_Project/Scripts/CustomDataset.ipynb

In [None]:
# import libraries
import pandas as pd
import numpy as np

import os

import pickle

import matplotlib.pyplot as plt
%matplotlib notebook

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision import models
from torchvision.models import VGG16_BN_Weights
from torch.utils.data import Dataset, DataLoader

from PIL import Image

import matplotlib.pyplot as plt

from sklearn.metrics import r2_score, mean_squared_error

In [None]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# load metadata
metadata = pd.read_csv('/content/drive/MyDrive/BINF_4008_Final_Project/Data/metadata.csv')

In [None]:
# split into different sets
train_images = metadata[metadata['split'] == 'train']['id'].astype(str)
train_tab = metadata[metadata['split'] == 'train']['male']
train_labels = metadata[metadata['split'] == 'train']['boneage']

val_images = metadata[metadata['split'] == 'val']['id'].astype(str)
val_tab = metadata[metadata['split'] == 'val']['male']
val_labels = metadata[metadata['split'] == 'val']['boneage']

test_images = metadata[metadata['split'] == 'test']['id'].astype(str)
test_tab = metadata[metadata['split'] == 'test']['male']
test_labels = metadata[metadata['split'] == 'test']['boneage']

In [None]:
# split test set into male and female cohorts
test_metadata = metadata[metadata['split'] == 'test']

test_male_metadata = test_metadata[test_metadata['male'] == 1]
test_female_metadata = test_metadata[test_metadata['male'] == 0]

test_male_images = test_male_metadata['id'].astype(str)
test_male_tab = test_male_metadata['male']
test_male_labels = test_male_metadata['boneage']

test_female_images = test_female_metadata['id'].astype(str)
test_female_tab = test_female_metadata['male']
test_female_labels = test_female_metadata['boneage']

In [None]:
# define batch size and epochs
BATCH_SIZE = 64
EPOCHS = 7

# define ImageNet mean and standard deviation
MEAN = [0.485]
STD = [0.229]

# define transformations
transform_1 = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels = 3),
    transforms.ToTensor(),
    transforms.Normalize(mean = MEAN * 3, std = STD * 3)
])

transform_2 = transforms.Compose([
    transforms.RandomAffine(degrees = (0, 90), translate = (0.1, 0.1), scale = None, shear = (0, 10)),
    transforms.RandomHorizontalFlip(0.3),
    transforms.RandomVerticalFlip(0.3)
])

In [None]:
# initialize datasets and dataloaders for the different splits
train_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', train_images, train_labels, tabular_data = train_tab, transforms = [transform_1, transform_2])
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 2)

val_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', val_images, val_labels, tabular_data = val_tab, transforms = [transform_1])
val_dataloader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = 2)

test_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', test_images, test_labels, tabular_data = test_tab, transforms = [transform_1])
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = 2)

test_male_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', test_male_images, test_male_labels, tabular_data = test_male_tab, transforms = [transform_1])
test_male_dataloader = DataLoader(test_male_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = 2)

test_female_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', test_female_images, test_female_labels, tabular_data = test_female_tab, transforms = [transform_1])
test_female_dataloader = DataLoader(test_female_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = 2)

## Define Model

In [None]:
class VGG16Regression(BaseModel):

  def __init__(self, weights, device, tabular_data = False, tabular_hidden = None, output_hidden = None):

    super(VGG16Regression, self).__init__(device)
    
    # initialize with random weights
    if weights == 'random':
      self.vgg16 = models.vgg16_bn(weights = None)
    
      # replace final layer with linear layer culminating in single output node
      self.vgg16.classifier[-1] = nn.Linear(self.vgg16.classifier[-1].in_features, 1)
      self.vgg16 = self.vgg16.apply(self.xavier_init)
    
    else:
      # initialize with ImageNet weights
      self.vgg16 = models.vgg16_bn(weights = weights)
    
      # replace final layer with linear layer culminating in single output node
      self.vgg16.classifier[-1] = nn.Linear(self.vgg16.classifier[-1].in_features, 1)
    
    # define tabular data variable
    self.tabular_data = tabular_data
    
    # if using tabular data then define tabular hidden and output hidden variables
    if self.tabular_data:
      self.tabular_hidden = tabular_hidden
      self.output_hidden = output_hidden
        
      # create linear layer to same number of dimensions
      self.vgg16.classifier[-1] = nn.Linear(self.vgg16.classifier[-1].in_features, self.vgg16.classifier[-1].in_features)
      
      # create tabular network
      self.tabular_net = nn.Sequential(
        nn.Linear(1, self.tabular_hidden),
        nn.ReLU()
      )
      
      # define dimension of fusion dim for image output and tabular output
      self.fusion_dim = self.vgg16.classifier[-1].out_features + self.tabular_hidden
        
      # define last layers for output
      self.output_net = nn.Sequential(
        nn.Linear(self.fusion_dim, self.output_hidden),
        nn.ReLU(),
        nn.Linear(self.output_hidden, self.output_hidden),
        nn.ReLU(),
        nn.Linear(self.output_hidden, 1)
      )
      
      # initialize added networks with xavier weights
      self.tabular_net = self.tabular_net.apply(self.xavier_init)
      self.output_net = self.output_net.apply(self.xavier_init)

  def xavier_init(self, m):
    
    # function to initialize weights with Xavier
    if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
        nn.init.xavier_uniform_(m.weight)

  def forward(self, image_input, tabular_input = None):
    
    # if gender is included:
    if self.tabular_data:
      
      # pass image through image model
      image_features = self.vgg16(image_input)
      
      # flatten the output
      image_features = image_features.view(image_features.size(0), -1)
    
      # pass gender through the tabular net
      tabular_features = self.tabular_net(tabular_input)
    
      # combine the two outputs
      combined_features = torch.cat((image_features, tabular_features), dim = 1)
    
      # pass full representation through last network to get output
      final_output = self.output_net(combined_features)

      return final_output

    else:
        
      # if just image data, pass the image through the image model
      return self.vgg16(image_input)

## ImageNet Weight Initialization No Gender Model

In [None]:
# initialize or load the model
# model = VGG16Regression(weights = VGG16_BN_Weights.IMAGENET1K_V1, device = device).to(device)
with open('/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_imagenet_no_gender_model.pkl', 'rb') as f:
 model = pickle.load(f)

In [None]:
# define criterion and optimizer
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [None]:
# define trainer and evaluator to train and evaluate the model
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, EPOCHS, 'vgg16_imagenet_no_gender', device)

In [None]:
# train and validate the model
training_results = trainer_and_evaluator.train_and_validate()

In [None]:
training_results

In [None]:
# evaluate the model on the test set using bootstrapped samples
test_results = trainer_and_evaluator.bootstrap_test_set()

In [None]:
test_results

## Xavier Weight Initialization No Gender Model

In [None]:
# define or initialize the model
# model = VGG16Regression(weights = 'random', device = device).to(device)
with open('/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_xavier_no_gender_model.pkl', 'rb') as f:
 model = pickle.load(f)

In [None]:
# define criterion and optimizer
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [None]:
# define trainer and evaluator to train and evaluate the model
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, EPOCHS, 'vgg16_xavier_no_gender', device)

In [None]:
# train and validate the model
train_results = trainer_and_evaluator.train_and_validate()

In [None]:
train_results

In [None]:
# evaluate the model on the test set using bootstrapped samples
test_results = trainer_and_evaluator.bootstrap_test_set()

In [None]:
test_results

## ImageNet Weight Initialization With Gender Model

In [None]:
# define or initialize the model
# model = VGG16Regression(weights = VGG16_BN_Weights.IMAGENET1K_V1, device = device, tabular_data = True, tabular_hidden = 50, output_hidden = 1000).to(device)
with open('/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_imagenet_w_gender_model.pkl', 'rb') as f:
  model = pickle.load(f)

In [None]:
# define criterion and optimizer
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [None]:
# define trainer and evaluator to train and evaluate the model
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, EPOCHS, 'vgg16_imagenet_w_gender', device)

In [None]:
# train and validate the model
train_results = trainer_and_evaluator.train_and_validate()

In [None]:
train_results

In [None]:
# evaluate the model on the test set using bootstrapped samples
test_results = trainer_and_evaluator.bootstrap_test_set()

In [None]:
test_results

## Xavier Weight Initialization With Gender Model

In [None]:
# initialize or load the model
# model = VGG16Regression(weights = 'random', device = device, tabular_data = True, tabular_hidden = 50, output_hidden = 1000).to(device)
with open( '/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_xavier_w_gender_model.pkl', 'rb') as f:
  model = pickle.load(f)

In [None]:
# define criterion and optimizer
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [None]:
# define trainer and evaluator to train and evaluate the model
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, EPOCHS, 'vgg16_xavier_w_gender', device = device)

In [None]:
# train and validate the model
train_results = trainer_and_evaluator.train_and_validate()

In [None]:
train_results

In [None]:
# evaluate the model on the test set using bootstrapped samples
test_results = trainer_and_evaluator.bootstrap_test_set()

In [None]:
test_results

## Xavier Weight Initialization No Gender Model (10 Epochs)

In [None]:
# define or load the model
# model = VGG16Regression(weights = 'random', device = device).to(device)
with open( '/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_xavier_no_gender_10_model.pkl', 'rb') as f:
  model = pickle.load(f)

In [None]:
# define criterion and optimizer
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [None]:
# define trainer and evaluator to train and evaluate the model
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, 10, 'vgg16_xavier_no_gender_10', device = device)

In [None]:
# train and validate the model
train_results = trainer_and_evaluator.train_and_validate()

In [None]:
train_results

In [None]:
# evaluate the model on the test set using bootstrapped samples
test_results = trainer_and_evaluator.bootstrap_test_set()

In [None]:
test_results

## Xavier Weight Initialization With Gender Model (10 Epochs)

In [None]:
# initialize or load the model
# model = VGG16Regression(weights = 'random', device = device, tabular_data = True, tabular_hidden = 50, output_hidden = 1000).to(device)
with open( '/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_xavier_w_gender_10_model.pkl', 'rb') as f:
  model = pickle.load(f)

In [None]:
# define criterion and optimizer
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [None]:
# define trainer and evaluator to train and evaluate the model
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, 10, 'vgg16_xavier_w_gender_10', device = device)

In [None]:
# train and validate the model
train_results = trainer_and_evaluator.train_and_validate()

In [None]:
train_results

In [None]:
# evaluate the model on the test set using bootstrapped samples
test_results = trainer_and_evaluator.bootstrap_test_set()

In [None]:
test_results