<a href="https://colab.research.google.com/github/rvignav/SimCLR/blob/main/Train_SimCLR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -r requirements.txt

In [None]:
import numpy as np
import pickle
import pandas as pd

from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import VGG16 

from evaluate_features import get_features, linear_classifier, tSNE_vis

# Load Dataframe

In [None]:
import csv
class_labels = ["none", "mild", "moderate", "severe", "proliferative"]

csv_file = open('data/trainLabels.csv', mode='r')
d = csv.DictReader(csv_file)

fname = []
label = []
one_hot = []

for row in d:
    fname.append('data/train/' + row['image'] + '.jpeg')
    l = int(row['level'])
    label.append(class_labels[l])
    arr = [0, 0, 0, 0, 0]
    arr[l] = 1
    one_hot.append(arr)

df = pd.DataFrame({"filename": fname, "class_label": label, "class_one_hot": one_hot})

df.head()

In [None]:
num_classes = len(df['class_one_hot'][0])

print("# of training instances:", len(df.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df.class_label == label).sum()}")

# of training instances: 35126 

# of 'none' training instances: 25810
# of 'mild' training instances: 2443
# of 'moderate' training instances: 5292
# of 'severe' training instances: 873
# of 'proliferative' training instances: 708


In [None]:
df_train, df_val_test = train_test_split(df, test_size=0.30, random_state=42, shuffle=True)
df_val, df_test = train_test_split(df_val_test, test_size=0.50, random_state=42, shuffle=True)

print("# of training instances:", len(df_train.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df_train.class_label == label).sum()}")
    
print()
print("# of validation instances:", len(df_val.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df_val.class_label == label).sum()}")

print()
print("# of test instances:", len(df_test.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df_test.class_label == label).sum()}")
    
dfs = {
    "train": df_train,
    "val": df_val,
    "test": df_test
}

# of training instances: 24588 

# of 'none' training instances: 18045
# of 'mild' training instances: 1725
# of 'moderate' training instances: 3707
# of 'severe' training instances: 621
# of 'proliferative' training instances: 490

# of validation instances: 5269 

# of 'none' training instances: 3877
# of 'mild' training instances: 358
# of 'moderate' training instances: 781
# of 'severe' training instances: 134
# of 'proliferative' training instances: 119

# of test instances: 5269 

# of 'none' training instances: 3888
# of 'mild' training instances: 360
# of 'moderate' training instances: 804
# of 'severe' training instances: 118
# of 'proliferative' training instances: 99


In [None]:
# Img size
size = 128
height_img = size
width_img = size

input_shape = (height_img, width_img, 3)

# Load pretrained VGG16 & Feature evaluation

In [None]:
params_vgg16 = {'weights': "imagenet", 
                'include_top': False, 
                'input_shape': input_shape, 
                'pooling': None}

# Design model
base_model = VGG16(**params_vgg16)
base_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0     

In [None]:
feat_dim = 2 * 2 * 512

# Build SimCLR-Model

In [None]:
from DataGeneratorSimCLR import DataGeneratorSimCLR as DataGenerator
from SimCLR import SimCLR

Using TensorFlow backend.


### Properties

In [None]:
batch_size = 16
# Projection_head
num_layers_ph = 2
feat_dims_ph = [2048, 128]
num_of_unfrozen_layers = 4
save_path = 'models/dr'

In [None]:
SimCLR = SimCLR(
        base_model = base_model,
        input_shape = input_shape,
        batch_size = batch_size,
        feat_dim = feat_dim,
        feat_dims_ph = feat_dims_ph,
        num_of_unfrozen_layers = num_of_unfrozen_layers,
        save_path = save_path
    )

In [None]:
params_generator = {'batch_size': batch_size,
                    'shuffle' : True,
                    'width':width_img,
                    'height': height_img,
                    'VGG': True
                   }

# Generators
data_train = DataGenerator(df_train.reset_index(drop=True), **params_generator)
data_val = DataGenerator(df_val.reset_index(drop=True), subset = "val", **params_generator) #val keeps the unity values on the same random places ~42
data_test = DataGenerator(df_test.reset_index(drop=True), subset = "test", **params_generator) #test keeps the unity values on the diagonal

## Training SimCLR

In [None]:
SimCLR.unfreeze_and_train(data_train, 
                          data_val, 
                          num_of_unfrozen_layers = 4, 
                          r = 4, 
                          lr = 1e-6,
                          epochs = 5)

trainable parameters: 24.12 M.
non-trainable parameters: 7.64 M.
Train for 1537 steps, validate for 330 steps
Epoch 1/5
Epoch 00001: val_loss improved from inf to 934.88759, saving model to /scratch/users/rvignav/models/dr/SimCLR/SimCLR_07_14_01h_21.h5
Epoch 2/5
Epoch 00002: val_loss improved from 934.88759 to 823.52380, saving model to /scratch/users/rvignav/models/dr/SimCLR/SimCLR_07_14_01h_21.h5
Epoch 3/5
Epoch 00003: val_loss improved from 823.52380 to 719.20680, saving model to /scratch/users/rvignav/models/dr/SimCLR/SimCLR_07_14_01h_21.h5
Epoch 4/5
Epoch 00004: val_loss improved from 719.20680 to 622.37751, saving model to /scratch/users/rvignav/models/dr/SimCLR/SimCLR_07_14_01h_21.h5
Epoch 5/5
Epoch 00005: val_loss improved from 622.37751 to 533.14481, saving model to /scratch/users/rvignav/models/dr/SimCLR/SimCLR_07_14_01h_21.h5
trainable parameters: 24.12 M.
non-trainable parameters: 7.64 M.
