# VGG-16

## Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%run /content/drive/MyDrive/BINF_4008_Final_Project/Scripts/Metrics.ipynb
%run /content/drive/MyDrive/BINF_4008_Final_Project/Scripts/Models.ipynb
%run /content/drive/MyDrive/BINF_4008_Final_Project/Scripts/Trainer.ipynb
%run /content/drive/MyDrive/BINF_4008_Final_Project/Scripts/CustomDataset.ipynb

In [3]:
# import libraries
import pandas as pd
import numpy as np

import os

import pickle

import matplotlib.pyplot as plt
%matplotlib notebook

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision import models
from torchvision.models import VGG16_BN_Weights
from torch.utils.data import Dataset, DataLoader

from PIL import Image

import matplotlib.pyplot as plt

from sklearn.metrics import r2_score, mean_squared_error

In [4]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
# load metadata
metadata = pd.read_csv('/content/drive/MyDrive/BINF_4008_Final_Project/Data/metadata.csv')

In [6]:

# split into different sets
train_images = metadata[metadata['split'] == 'train']['id'].astype(str)
train_tab = metadata[metadata['split'] == 'train']['male']
train_labels = metadata[metadata['split'] == 'train']['boneage']

val_images = metadata[metadata['split'] == 'val']['id'].astype(str)
val_tab = metadata[metadata['split'] == 'val']['male']
val_labels = metadata[metadata['split'] == 'val']['boneage']

test_images = metadata[metadata['split'] == 'test']['id'].astype(str)
test_tab = metadata[metadata['split'] == 'test']['male']
test_labels = metadata[metadata['split'] == 'test']['boneage']

In [7]:
test_metadata = metadata[metadata['split'] == 'test']

test_male_metadata = test_metadata[test_metadata['male'] == 1]
test_female_metadata = test_metadata[test_metadata['male'] == 0]

test_male_images = test_male_metadata['id'].astype(str)
test_male_tab = test_male_metadata['male']
test_male_labels = test_male_metadata['boneage']

test_female_images = test_female_metadata['id'].astype(str)
test_female_tab = test_female_metadata['male']
test_female_labels = test_female_metadata['boneage']

In [8]:
BATCH_SIZE = 64
EPOCHS = 7

MEAN = [0.485]
STD = [0.229]

transform_1 = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels = 3),
    transforms.ToTensor(),
    transforms.Normalize(mean = MEAN * 3, std = STD * 3)
])

transform_2 = transforms.Compose([
    transforms.RandomAffine(degrees = (0, 90), translate = (0.1, 0.1), scale = None, shear = (0, 10)),
    transforms.RandomHorizontalFlip(0.3),
    transforms.RandomVerticalFlip(0.3)
])

In [9]:
train_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', train_images, train_labels, tabular_data = train_tab, transforms = [transform_1, transform_2])
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 2)

val_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', val_images, val_labels, tabular_data = val_tab, transforms = [transform_1])
val_dataloader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = 2)

test_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', test_images, test_labels, tabular_data = test_tab, transforms = [transform_1])
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = 2)

test_male_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', test_male_images, test_male_labels, tabular_data = test_male_tab, transforms = [transform_1])
test_male_dataloader = DataLoader(test_male_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = 2)

test_female_dataset = CustomDataset('/content/drive/MyDrive/BINF_4008_Final_Project/Data/all_images/', test_female_images, test_female_labels, tabular_data = test_female_tab, transforms = [transform_1])
test_female_dataloader = DataLoader(test_female_dataset, batch_size = BATCH_SIZE, shuffle = False, num_workers = 2)

## Define Model

In [10]:
class VGG16Regression(BaseModel):

  def __init__(self, weights, device, tabular_data = False, tabular_hidden = None, output_hidden = None):

    super(VGG16Regression, self).__init__(device)

    if weights == 'random':
      self.vgg16 = models.vgg16_bn(weights = None)
      self.vgg16.classifier[-1] = nn.Linear(self.vgg16.classifier[-1].in_features, 1)
      self.vgg16 = self.vgg16.apply(self.xavier_init)

    else:
      self.vgg16 = models.vgg16_bn(weights = weights)
      self.vgg16.classifier[-1] = nn.Linear(self.vgg16.classifier[-1].in_features, 1)

    self.tabular_data = tabular_data

    if self.tabular_data:
      self.tabular_hidden = tabular_hidden
      self.output_hidden = output_hidden

      self.vgg16.classifier[-1] = nn.Linear(self.vgg16.classifier[-1].in_features, self.vgg16.classifier[-1].in_features)

      self.tabular_net = nn.Sequential(
        nn.Linear(1, self.tabular_hidden),
        nn.ReLU()
      )

      self.fusion_dim = self.vgg16.classifier[-1].out_features + self.tabular_hidden

      self.output_net = nn.Sequential(
        nn.Linear(self.fusion_dim, self.output_hidden),
        nn.ReLU(),
        nn.Linear(self.output_hidden, self.output_hidden),
        nn.ReLU(),
        nn.Linear(self.output_hidden, 1)
      )

      self.tabular_net = self.tabular_net.apply(self.xavier_init)
      self.output_net = self.output_net.apply(self.xavier_init)

  def xavier_init(self, m):

    if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
        nn.init.xavier_uniform_(m.weight)

  def forward(self, image_input, tabular_input = None):

    if self.tabular_data:

      image_features = self.vgg16(image_input)
      image_features = image_features.view(image_features.size(0), -1)

      tabular_features = self.tabular_net(tabular_input)

      combined_features = torch.cat((image_features, tabular_features), dim = 1)

      final_output = self.output_net(combined_features)

      return final_output

    else:

      return self.vgg16(image_input)

## ImageNet Weight Initialization No Gender Model

In [11]:
# model = VGG16Regression(weights = VGG16_BN_Weights.IMAGENET1K_V1, device = device).to(device)
with open('/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_imagenet_no_gender_model.pkl', 'rb') as f:
 model = pickle.load(f)

In [12]:
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [13]:
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, EPOCHS, 'vgg16_imagenet_no_gender', device)

In [14]:
# training_results = trainer_and_evaluator.train_and_validate()

In [15]:
# training_results

In [16]:
test_results = trainer_and_evaluator.bootstrap_test_set()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [17]:
# test_results

## Xavier Weight Initialization No Gender Model

In [18]:
# model = VGG16Regression(weights = 'random', device = device).to(device)
with open('/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_xavier_no_gender_model.pkl', 'rb') as f:
 model = pickle.load(f)

In [19]:
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [20]:
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, EPOCHS, 'vgg16_xavier_no_gender', device)

In [21]:
# train_results = trainer_and_evaluator.train_and_validate()

In [22]:
# train_results

In [23]:
test_results = trainer_and_evaluator.bootstrap_test_set()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [24]:
test_results

{'test': [{'r2': [-0.06230090218947415,
    -0.011633610516958246,
    0.04389671045544071],
   'pearson': [0.4046004889194396, 0.45049822176032933, 0.4935091879874941],
   'mse': [1660.2372131347656, 1760.0819, 1866.4645233154297],
   'mae': [34.035156631469725, 35.165794, 36.295392990112305],
   'mape': [34.045974761247635, 39.217378824949265, 46.376570016145706]},
  {'r2': [-0.052633848445307496,
    -0.018178315028962055,
    -0.02774977582531113,
    -0.005511507228031176,
    -0.0073843702101601405,
    -0.001390579617945642,
    0.0028306388799197446,
    -0.04283835587477958,
    0.00967266003132472,
    -0.035169871086021987,
    -0.009915848338534206,
    -0.030479824677324796,
    0.005455634667274167,
    -0.05897969260225788,
    0.05263011471226853,
    -0.027842271529629814,
    -0.019600520288299395,
    -0.029396301941264635,
    -0.026701780449945423,
    -0.014749607133576204,
    0.003910240939369869,
    0.028461396403791972,
    -0.007309649746338964,
    -0.01784

## ImageNet Weight Initialization With Gender Model

In [25]:
# model = VGG16Regression(weights = VGG16_BN_Weights.IMAGENET1K_V1, device = device, tabular_data = True, tabular_hidden = 50, output_hidden = 1000).to(device)
with open('/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_imagenet_w_gender_model.pkl', 'rb') as f:
  model = pickle.load(f)

In [26]:
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [27]:
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, EPOCHS, 'vgg16_imagenet_w_gender', device)

In [28]:
# train_results = trainer_and_evaluator.train_and_validate()

In [29]:
# train_results

In [30]:
test_results = trainer_and_evaluator.bootstrap_test_set()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [31]:
test_results

{'test': [{'r2': [0.8164881136427856, 0.8358649632293179, 0.852534093621219],
   'pearson': [0.9168946450857821, 0.9277318255422593, 0.9375768795184812],
   'mse': [253.9059600830078, 285.609, 321.1267425537109],
   'mae': [12.443767619132995, 12.8863945, 13.50315854549408],
   'mape': [13.224757313728333, 18.011933125555515, 24.325235448777676]},
  {'r2': [0.8384493219034946,
    0.8304895655964171,
    0.8381819960504371,
    0.8394306597855585,
    0.8456668779933659,
    0.8440381860310443,
    0.8356062349142427,
    0.8303647063493539,
    0.8217326266493827,
    0.8350702629826142,
    0.833104292552147,
    0.8170241864610275,
    0.8147134645434049,
    0.8338847384623003,
    0.8210652623074273,
    0.8180463240016178,
    0.8248345269343784,
    0.8164591867334768,
    0.8292729615907839,
    0.8228530383943985,
    0.8279129453770221,
    0.8475623182355023,
    0.8275751372559794,
    0.8305085357605262,
    0.8562428936256334,
    0.841268453666536,
    0.8332150112599179

## Xavier Weight Initialization With Gender Model

In [32]:
# model = VGG16Regression(weights = 'random', device = device, tabular_data = True, tabular_hidden = 50, output_hidden = 1000).to(device)
with open( '/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_xavier_w_gender_model.pkl', 'rb') as f:
  model = pickle.load(f)

In [33]:
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [34]:
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, EPOCHS, 'vgg16_xavier_w_gender', device = device)

In [35]:
# train_results = trainer_and_evaluator.train_and_validate()

In [36]:
# train_results

In [37]:
test_results = trainer_and_evaluator.bootstrap_test_set()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [38]:
test_results

{'test': [{'r2': [-1.5168571056477964,
    -1.3494908189958001,
    -1.185250179102817],
   'pearson': [0.48595515935979594, 0.5319709726124227, 0.572068816053204],
   'mse': [3908.356475830078, 4085.72, 4250.160571289062],
   'mae': [54.357827281951906, 55.828007, 57.23323030471802],
   'mape': [42.55460448563099, 45.004946649074554, 47.8914912045002]},
  {'r2': [-1.4499532542130074,
    -1.4214725324757373,
    -1.2974817248599506,
    -1.3481239957993605,
    -1.3803352192036016,
    -1.4382999977684876,
    -1.3472643382492908,
    -1.5484806278863816,
    -1.3482868360484637,
    -1.3740781532699926,
    -1.3281587823168417,
    -1.3908051631285967,
    -1.2720331330346366,
    -1.3711106626692624,
    -1.1668351062659168,
    -1.3474913258842247,
    -1.4423349288206748,
    -1.394190108542242,
    -1.3883194252546351,
    -1.2983714260841612,
    -1.2958728784537903,
    -1.2268802049990302,
    -1.3266095149023474,
    -1.4062938564863154,
    -1.352660087269606,
    -1.3394544

## Xavier Weight Initialization No Gender Model (10 Epochs)

In [39]:
# model = VGG16Regression(weights = 'random', device = device).to(device)
with open( '/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_xavier_no_gender_10_model.pkl', 'rb') as f:
  model = pickle.load(f)

In [40]:
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [41]:
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, 10, 'vgg16_xavier_no_gender_10', device = device)

In [42]:
# train_results = trainer_and_evaluator.train_and_validate()

In [43]:
# train_results

In [44]:
test_results = trainer_and_evaluator.bootstrap_test_set()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [45]:
test_results

{'test': [{'r2': [0.10862146496352303,
    0.1412955609758091,
    0.17878332128235574],
   'pearson': [0.40018712692801367, 0.43950675230891817, 0.48522932359896076],
   'mse': [1386.9376159667968, 1494.5366, 1603.2805969238282],
   'mae': [28.72027430534363, 29.8157, 30.975564432144164],
   'mape': [37.388912066817284, 43.8528051674366, 50.844711810350425]},
  {'r2': [0.16447638708777146,
    0.1408454571767378,
    0.10862961292678686,
    0.15451305343777666,
    0.13119935337091893,
    0.1586788293000143,
    0.1454406131444813,
    0.14553551319829672,
    0.17238459054909439,
    0.13542265789841335,
    0.13452265522076612,
    0.14876479203905912,
    0.11931939681441606,
    0.13382176466010887,
    0.10287674621496767,
    0.1258333665954361,
    0.12776440888791007,
    0.1300230505917459,
    0.13741586236885683,
    0.14297902862045497,
    0.12394523943485947,
    0.13517654464531237,
    0.14815099358092187,
    0.1613780293404764,
    0.16168706172032077,
    0.115818

## Xavier Weight Initialization With Gender Model (10 Epochs)

In [46]:
# model = VGG16Regression(weights = 'random', device = device, tabular_data = True, tabular_hidden = 50, output_hidden = 1000).to(device)
with open( '/content/drive/MyDrive/BINF_4008_Final_Project/Pickles/vgg16_xavier_w_gender_10_model.pkl', 'rb') as f:
  model = pickle.load(f)

In [47]:
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters())

In [48]:
trainer_and_evaluator = BaseTrainer(model, criterion, optimizer, train_dataloader, val_dataloader, test_dataloader, test_male_dataloader, test_female_dataloader, 10, 'vgg16_xavier_w_gender_10', device = device)

In [49]:
# train_results = trainer_and_evaluator.train_and_validate()

In [50]:
# train_results

In [51]:
test_results = trainer_and_evaluator.bootstrap_test_set()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [52]:
test_results

{'test': [{'r2': [-2.741844248143593,
    -2.4894144906185303,
    -2.2527537403789557],
   'pearson': [0.4620956343766194, 0.509035141329047, 0.5489599817861734],
   'mse': [5829.52783203125, 6067.554, 6276.500341796876],
   'mae': [68.34405574798583, 69.87857, 71.52804450988769],
   'mape': [51.95266351103783, 53.71273022890091, 55.57961389422417]},
  {'r2': [-2.624965011959862,
    -2.6289618274038418,
    -2.434788761339738,
    -2.504088132734752,
    -2.5251809643032197,
    -2.6027105916271474,
    -2.5231402343174287,
    -2.8408015785253586,
    -2.4788538958486046,
    -2.479681822995624,
    -2.4802983904499283,
    -2.53820479607982,
    -2.416814813815088,
    -2.524122975281886,
    -2.246609609356536,
    -2.50185596461101,
    -2.638297673487373,
    -2.55643300273974,
    -2.5451301824400416,
    -2.390910131438114,
    -2.4254696154024478,
    -2.2871193546620674,
    -2.429198857009841,
    -2.593024563310316,
    -2.4648926259478876,
    -2.4923206390318344,
    -2.