<a href="https://colab.research.google.com/github/rvignav/SimCLR/blob/main/Parallelized_Train_SimCLR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

Tue Aug 10 22:32:04 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:03:00.0 Off |                    0 |
| N/A   32C    P0    30W / 250W |      0MiB / 16280MiB |      0%   E. Process |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  On   | 00000000:04:00.0 Off |                    0 |
| N/A   31C    P0    25W / 250W |      0MiB / 16280MiB |      0%   E. Process |
|       

In [None]:
%cd /scratch/users/rvignav/SimCLR
!pip install -r requirements.txt

/scratch/users/rvignav/SimCLR
Defaulting to user installation because normal site-packages is not writeable
Collecting jinja2>=2.10
  Using cached Jinja2-3.0.1-py3-none-any.whl (133 kB)
Collecting MarkupSafe>=2.0
  Using cached MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl (30 kB)
Collecting jsonschema>=3.0.1
  Using cached jsonschema-3.2.0-py2.py3-none-any.whl (56 kB)




Installing collected packages: MarkupSafe, jsonschema, jinja2
Successfully installed MarkupSafe-2.0.1 jinja2-3.0.1 jsonschema-3.2.0
You should consider upgrading via the '/share/software/user/open/python/3.6.1/bin/python3.6 -m pip install --upgrade pip' command.[0m


In [None]:
import numpy as np
import pickle
import pandas as pd

from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import VGG16 

from evaluate_features import get_features, linear_classifier, tSNE_vis

# Load Dataframe

In [None]:
import csv
class_labels = ["none", "mild", "moderate", "severe", "proliferative"]

csv_file = open('/scratch/users/rvignav/SimCLR/data/trainLabels.csv', mode='r')
d = csv.DictReader(csv_file)

fname = []
label = []
one_hot = []

for row in d:
    fname.append('/scratch/users/rvignav/SimCLR/data/train/' + row['image'] + '.jpeg')
    l = int(row['level'])
    label.append(class_labels[l])
    arr = [0, 0, 0, 0, 0]
    arr[l] = 1
    one_hot.append(arr)

df = pd.DataFrame({"filename": fname, "class_label": label, "class_one_hot": one_hot})

df.head()

Unnamed: 0,class_label,class_one_hot,filename
0,none,"[1, 0, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/10_le...
1,none,"[1, 0, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/10_ri...
2,none,"[1, 0, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/13_le...
3,none,"[1, 0, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/13_ri...
4,mild,"[0, 1, 0, 0, 0]",/scratch/users/rvignav/SimCLR/data/train/15_le...


In [None]:
num_classes = len(df['class_one_hot'][0])

print("# of training instances:", len(df.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df.class_label == label).sum()}")

# of training instances: 35126 

# of 'none' training instances: 25810
# of 'mild' training instances: 2443
# of 'moderate' training instances: 5292
# of 'severe' training instances: 873
# of 'proliferative' training instances: 708


In [None]:
df_train, df_val_test = train_test_split(df, test_size=0.30, random_state=42, shuffle=True)
df_val, df_test = train_test_split(df_val_test, test_size=0.50, random_state=42, shuffle=True)

print("# of training instances:", len(df_train.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df_train.class_label == label).sum()}")
    
print()
print("# of validation instances:", len(df_val.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df_val.class_label == label).sum()}")

print()
print("# of test instances:", len(df_test.index), "\n")
for label in class_labels:
    print(f"# of '{label}' training instances: {(df_test.class_label == label).sum()}")
    
dfs = {
    "train": df_train,
    "val": df_val,
    "test": df_test
}

# of training instances: 24588 

# of 'none' training instances: 18045
# of 'mild' training instances: 1725
# of 'moderate' training instances: 3707
# of 'severe' training instances: 621
# of 'proliferative' training instances: 490

# of validation instances: 5269 

# of 'none' training instances: 3877
# of 'mild' training instances: 358
# of 'moderate' training instances: 781
# of 'severe' training instances: 134
# of 'proliferative' training instances: 119

# of test instances: 5269 

# of 'none' training instances: 3888
# of 'mild' training instances: 360
# of 'moderate' training instances: 804
# of 'severe' training instances: 118
# of 'proliferative' training instances: 99


In [None]:
# Img size
size = 128
height_img = size
width_img = size

input_shape = (height_img, width_img, 3)

# Load pretrained VGG16 & Feature evaluation

In [None]:
import tensorflow as tf
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

with strategy.scope():
    params_vgg16 = {'weights': "imagenet", 
                'include_top': False, 
                'input_shape': input_shape, 
                'pooling': None}
    # Design model
    base_model = VGG16(**params_vgg16)
base_model.summary()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2')
Number of devices: 3
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________

In [None]:
feat_dim = 2 * 2 * 512

# Build SimCLR-Model

In [None]:
from DataGeneratorSimCLR import DataGeneratorSimCLR as DataGenerator
from SimCLR import SimCLR

Using TensorFlow backend.


### Properties

In [None]:
batch_size = 16
# Projection_head
num_layers_ph = 2
feat_dims_ph = [2048, 128]
num_of_unfrozen_layers = 4
save_path = '/scratch/users/rvignav/models/dr'

In [None]:
with strategy.scope():
    SimCLR = SimCLR(
        base_model = base_model,
        input_shape = input_shape,
        batch_size = batch_size,
        feat_dim = feat_dim,
        feat_dims_ph = feat_dims_ph,
        num_of_unfrozen_layers = num_of_unfrozen_layers,
        save_path = save_path
    )

In [None]:
params_generator = {'batch_size': batch_size,
                    'shuffle' : True,
                    'width':width_img,
                    'height': height_img,
                    'VGG': True
                   }

# Generators
data_train = DataGenerator(df_train.reset_index(drop=True), **params_generator)
data_val = DataGenerator(df_val.reset_index(drop=True), subset = "val", **params_generator) #val keeps the unity values on the same random places ~42
data_test = DataGenerator(df_test.reset_index(drop=True), subset = "test", **params_generator) #test keeps the unity values on the diagonal

## Training SimCLR

In [None]:
with strategy.scope():
    SimCLR.num_of_unfrozen_layers = 4
    SimCLR.r = 4
    if SimCLR.lr != 1e-6:
        SimCLR.change_lr(1e-6)

    SimCLR.SimCLR_model = SimCLR.build_model()
    SimCLR.print_weights()
    
    checkpoint, earlyStopping, reduce_lr = SimCLR.get_callbacks()

    SimCLR_model = SimCLR.SimCLR_model
    
SimCLR_model.fit(
    data_train,
    epochs=25,
    verbose=1,
    validation_data=data_val,
    callbacks=[checkpoint, earlyStopping, reduce_lr],
)

SimCLR.print_weights()
SimCLR.save_base_model()

trainable parameters: 24.12 M.
non-trainable parameters: 7.64 M.
Train for 1537 steps, validate for 330 steps
Epoch 1/25
INFO:tensorflow:batch_all_reduce: 10 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:batch_all_reduce: 10 all-reduces with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument:  Incompatible shapes: [6,64] vs. [16,64]
	 [[node replica_1/Mul_13 (defined at /python/3.6.1/lib/python3.6/threading.py:916) ]]
	 [[Identity_2/_130]]
  (1) Invalid argument:  Incompatible shapes: [6,64] vs. [16,64]
	 [[node replica_1/Mul_13 (defined at /python/3.6.1/lib/python3.6/threading.py:916) ]]
0 successful operations.
2 derived errors ignored. [Op:__inference_distributed_function_67762]

Function call stack:
distributed_function -> distributed_function
