<a href="https://colab.research.google.com/github/rvignav/SimCLR/blob/main/Eval_SimCLR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%cd /scratch/users/rvignav/SimCLR
# !pip install -r requirements.txt

/scratch/users/rvignav/SimCLR


In [None]:
import numpy as np
import pickle
import pandas as pd

from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import VGG16 
from tensorflow.keras.models import load_model 
from swish import Swish
from SoftmaxCosineSim import SoftmaxCosineSim

from evaluate_features import get_features, linear_classifier, tSNE_vis

# Load Dataframe

In [None]:
import csv
class_labels = ["none", "mild", "moderate", "severe", "proliferative"]

csv_file = open('/scratch/users/rvignav/SimCLR/data/trainLabels.csv', mode='r')
d = csv.DictReader(csv_file)

fname = []
label = []
one_hot = []

for row in d:
    fname.append('/scratch/users/rvignav/SimCLR/data/train/' + row['image'] + '.jpeg')
    l = int(row['level'])
    label.append(class_labels[l])
    arr = [0, 0, 0, 0, 0]
    arr[l] = 1
    one_hot.append(arr)

df = pd.DataFrame({"filename": fname, "class_label": label, "class_one_hot": one_hot})

df.head()

Unnamed: 0,class_label,class_one_hot,filename
0,none,"[1, 0, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/10_le...
1,none,"[1, 0, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/10_ri...
2,none,"[1, 0, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/13_le...
3,none,"[1, 0, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/13_ri...
4,mild,"[0, 1, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/15_le...


In [None]:
num_classes = len(df['class_one_hot'][0])

print("# of training instances:", len(df.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df.class_label == label).sum()}")

# of training instances: 35126 

# of 'none' training instances: 25810
# of 'mild' training instances: 2443
# of 'moderate' training instances: 5292
# of 'severe' training instances: 873
# of 'proliferative' training instances: 708


In [None]:
df_train, df_val_test = train_test_split(df, test_size=0.30, random_state=42, shuffle=True)
df_val, df_test = train_test_split(df_val_test, test_size=0.50, random_state=42, shuffle=True)

print("# of training instances:", len(df_train.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df_train.class_label == label).sum()}")
    
print()
print("# of validation instances:", len(df_val.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df_val.class_label == label).sum()}")

print()
print("# of test instances:", len(df_test.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df_test.class_label == label).sum()}")
    
dfs = {
    "train": df_train,
    "val": df_val,
    "test": df_test
}

# of training instances: 24588 

# of 'none' training instances: 18045
# of 'mild' training instances: 1725
# of 'moderate' training instances: 3707
# of 'severe' training instances: 621
# of 'proliferative' training instances: 490

# of validation instances: 5269 

# of 'none' training instances: 3877
# of 'mild' training instances: 358
# of 'moderate' training instances: 781
# of 'severe' training instances: 134
# of 'proliferative' training instances: 119

# of test instances: 5269 

# of 'none' training instances: 3888
# of 'mild' training instances: 360
# of 'moderate' training instances: 804
# of 'severe' training instances: 118
# of 'proliferative' training instances: 99


In [None]:
# Img size
size = 128
height_img = size
width_img = size

input_shape = (height_img, width_img, 3)

# Load pretrained VGG16 & Feature evaluation

In [None]:
BASE_MODEL_PATH = '/scratch/users/rvignav/models/dr/base_model/base_model_round_4.h5'
base_model = load_model(BASE_MODEL_PATH)
base_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0     

In [None]:
feat_dim = 2 * 2 * 512

# Build SimCLR-Model

In [None]:
from DataGeneratorSimCLR import DataGeneratorSimCLR as DataGenerator
from SimCLR import SimCLR

Using TensorFlow backend.


### Properties

In [None]:
batch_size = 16
# Projection_head
num_layers_ph = 2
feat_dims_ph = [2048, 128]
num_of_unfrozen_layers = 4
save_path = '/scratch/users/rvignav/models/dr'

In [None]:
SIMCLR_MODEL_PATH = '/scratch/users/rvignav/models/dr/SimCLR/SimCLR_07_14_01h_21.h5'
customObjects = {
    'Swish': Swish,
    'SoftmaxCosineSim': SoftmaxCosineSim,
}
SimCLR = load_model(SIMCLR_MODEL_PATH, custom_objects=customObjects)

In [None]:
params_generator = {'batch_size': batch_size,
                    'shuffle' : True,
                    'width':width_img,
                    'height': height_img,
                    'VGG': True
                   }

# Generators
data_train = DataGenerator(df_train.reset_index(drop=True), **params_generator)
data_val = DataGenerator(df_val.reset_index(drop=True), subset = "val", **params_generator) #val keeps the unity values on the same random places ~42
data_test = DataGenerator(df_test.reset_index(drop=True), subset = "test", **params_generator) #test keeps the unity values on the diagonal

## Feature Evaluation

### Fine tuned model

In [None]:
batch_size_classifier = 32
params_generator_classifier = {'max_width':width_img,
                            'max_height': height_img,
                            'num_classes': num_classes,
                            'VGG': True
                            }
params_training_classifier = {'1.0':{
                                "reg_dense" : 0.005,
                                "reg_out" : 0.005,
                                "nums_of_unfrozen_layers" : [5, 5, 6, 7],
                                "lrs" : [1e-3, 1e-4, 5e-5, 5e-5],
                                "epochs" : [5, 5, 15, 10]
                                },
                              '0.2':{
                                "reg_dense" : 0.075,
                                "reg_out" : 0.01,
                                "nums_of_unfrozen_layers" : [5, 5, 6, 7],
                                "lrs" : [1e-3, 1e-4, 5e-5, 5e-5],
                                "epochs" : [5, 5, 20, 15]
                              },
                              '0.05':{
                                "reg_dense" : 0.01,
                                "reg_out" : 0.02,
                                "nums_of_unfrozen_layers" : [5, 5, 6, 7],
                                "lrs" : [1e-3, 1e-4, 5e-5, 1e-5],
                                "epochs" : [5, 5, 20, 15]
                              }                              
                            }

In [None]:
from Classifier import Classifier

def train_NL_and_evaluate(
        base_model,
        dfs,
        batch_size,
        params_generator,
        fraction,
        class_labels,
        reg_dense=0.005,
        reg_out=0.005,
        nums_of_unfrozen_layers=[5, 5, 6, 7],
        lrs=[1e-3, 1e-4, 5e-5, 1e-5],
        epochs=[5, 5, 20, 25],
        verbose_epoch=0,
        verbose_cycle=1,
    ):
        """ Trains and evaluates a nonlinear classifier on top of the base_model
        """
        results = {"acc": 0}
        for i in range(5):
            if verbose_cycle:
                print(f"Learning attempt {i+1}")

            classifier = Classifier(
                base_model=base_model,
                num_classes=params_generator["num_classes"],
                reg_dense=reg_dense,
                reg_out=reg_out,
            )

            data_train, data_val, data_test = classifier.get_generators(
                dfs, fraction, batch_size, params_generator
            )

            classifier.train(
                data_train,
                data_val,
                fraction,
                nums_of_unfrozen_layers,
                lrs,
                epochs,
                verbose_epoch,
                verbose_cycle,
            )
            acc, report = classifier.evaluate_on_test(
                dfs["test"], data_test, class_labels
            )

            if results["acc"] < acc:
                results["acc"] = acc
                results["report"] = report
                results["attempt"] = i + 1

        print("Best result from attempt", str(results["attempt"]))
        print(results["report"])

In [None]:
fractions = [1.0, 0.2, 0.05]
for fraction in fractions:    
    print(f"    ==== {fraction * 100}% of the training data used ==== \n")
    train_NL_and_evaluate(base_model = base_model, dfs = dfs,
                                batch_size = batch_size_classifier,
                                params_generator = params_generator_classifier,
                                fraction = fraction,
                                class_labels = class_labels,
                                reg_dense = params_training_classifier[str(fraction)]["reg_dense"],
                                reg_out = params_training_classifier[str(fraction)]["reg_out"],
                                nums_of_unfrozen_layers = params_training_classifier[str(fraction)]["nums_of_unfrozen_layers"],
                                lrs = params_training_classifier[str(fraction)]["lrs"],
                                epochs = params_training_classifier[str(fraction)]["epochs"],
                                verbose_epoch = 0,
                                verbose_cycle = 0
                                )

    ==== 100.0% of the training data used ==== 



### Logistic Regression

In [None]:
features_train, y_train, feats = get_features(base_model, df_train, class_labels)
features_test, y_test, feats = get_features(base_model, df_test, class_labels)
np.count_nonzero(features_train[0])

In [None]:
# Training logistic regression classifier on 3 fractions of the data
# Optimal regularization is determined from a 5-fold cross-validation
for fraction in fractions:
    print(f"    ==== {fraction * 100}% of the training data used ==== \n")
    linear_classifier(features_train, y_train, features_test, y_test, class_labels, fraction = fraction)

In [None]:
tSNE_vis(df_train, features_train, class_labels)