In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import os
import nibabel as nib
from tqdm import tqdm
import time
import sys
from tfrecord_utils import *
import tensorflow as tf
from tensorflow.python.ops import array_ops, math_ops, init_ops
from tensorflow.python.framework import dtypes
tf.enable_eager_execution()

In [None]:
TF_RECORD_FILENAME = "/home-local/remedis/data/subdural_lesion/dataset.tfrecords"
# model
inputs = tf.keras.layers.Input(shape=(None, None, 1))
x = tf.keras.layers.Conv2D(32, 3, activation='relu', padding='same')(inputs)
x = tf.keras.layers.MaxPooling2D(2, 2)(x)
x = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(x)
x = tf.keras.layers.MaxPooling2D(2, 2)(x)
x = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(x)
x = tf.keras.layers.MaxPooling2D(2, 2)(x)
#x = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(x)
#x = tf.keras.layers.MaxPooling2D(2, 2)(x)
#x = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(x)
x = tf.keras.layers.GlobalMaxPooling2D()(x)
#x = tf.keras.layers.Dense(32, activation='relu')(x)
#x = tf.keras.layers.Dense(16, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

num_elements = 62
instance_size = (128, 128)

dataset = tf.data.TFRecordDataset(TF_RECORD_FILENAME)\
    .map(lambda record : parse_bag(record, instance_size, 1))\
    .shuffle(buffer_size=num_elements)
    

def running_average(old_average, cur_val, n):
    return old_average * (n-1)/n + cur_val / n

# training
N_EPOCHS = 500
progbar_length = 60
opt = tf.train.AdamOptimizer(learning_rate=1e-3)

In [None]:
batch_size = 1
grads = [tf.zeros_like(l) for l in model.trainable_variables]

for cur_epoch in range(N_EPOCHS):
    epoch_loss = 0
    epoch_acc = 0
    correct = 0
    print("\nEpoch {}/{}\n".format(cur_epoch + 1, N_EPOCHS))

    for i, (x, y) in enumerate(dataset):
        with tf.GradientTape() as tape:
            
            logits = model(x, training=True)
            
            if tf.reduce_sum(y) == 0:
                loss= tf.losses.sigmoid_cross_entropy(                                       
                        multi_class_labels=y,                                                
                        logits=logits,                                                       
                        reduction=tf.losses.Reduction.MEAN                                   
                    )                                                                        
                # in all-zero class situtation, take mean of entire bag                      
                loss = tf.reshape(loss, (num_classes,1))                                     
                grad = tape.gradient(loss, model.trainable_variables)                        
                # aggregate current element in batch                                         
                for k in range(len(grad)):
                    grads[k] = running_average(grads[k], grad[k], i + 1)                     

            else:                                                                            
                # otherwise, take top polluted instance for each class                       
                multiclass_grads = [tf.zeros_like(l) for l in model.trainable_variables]     
                top_polluted_indices = tf.argmax(logits, dimension=0).numpy()                
                # average among top num_classes instances                                    
                for j, top_polluted_idx in enumerate(top_polluted_indices):                  
                    loss= tf.losses.sigmoid_cross_entropy(                                   
                            multi_class_labels=y,                                            
                            logits=logits[top_polluted_idx],                                 
                            reduction=tf.losses.Reduction.NONE                               
                        )                                                                    
                    grad = tape.gradient(loss, model.trainable_variables)                    
                    for k in range(len(grad)):
                        multiclass_grads[k] = running_average(multiclass_grads[k], grad[k], j + 1)

                # aggregate current element in batch                                         
                for k in range(len(multiclass_grads)):
                    grads[k] = running_average(grads[k], multiclass_grads[k], i + 1)

        # Acc is based off max predictions
        pred = tf.reshape(tf.round(tf.reduce_max(logits)), y.shape)
        if pred.numpy() == y.numpy():
            correct += 1
        cur_acc = correct / (i + 1)
        epoch_loss = running_average(epoch_loss, loss, i + 1)
        epoch_acc = running_average(epoch_acc, cur_acc, i + 1)
        

        if i > 0 and i % batch_size == 0 or i == num_elements - 1:
            opt.apply_gradients(zip(grads, model.trainable_variables))
            sys.stdout.write("\r[{:{}<{}}] Loss: {:.4f} Acc: {:.2%} Correct: {}".format(
                    "=" * i,
                    "-",
                    progbar_length,
                    epoch_loss.numpy()[0],
                    epoch_acc,
                    correct
                ))
            sys.stdout.flush()
            # wipe tape and records
            grads = [tf.zeros_like(l) for l in model.trainable_variables]
            del tape
            

In [None]:
batch_size = 32
grads = [tf.zeros_like(l) for l in model.trainable_variables]
for cur_epoch in range(N_EPOCHS):
    print("\nEpoch {}/{}".format(cur_epoch + 1, N_EPOCHS))

    for i, (x, y) in enumerate(dataset):
        with tf.GradientTape() as tape:
            
            repeated_y = np.repeat(y.numpy(), len(x), axis=0)
            
            logits = model(x, training=True)
            
            losses = tf.losses.sigmoid_cross_entropy(
                        multi_class_labels=tf.reshape(repeated_y, repeated_y.shape + (1,)),
                        logits=logits,
                        reduction=tf.losses.Reduction.NONE
            )

            if y.numpy() == 0:
                loss = tf.reduce_mean(losses, axis=0)
                print("True: {:.0f} \tPred: {:.2f}\tLoss: {:.4f}".format(
                    y[0], 
                    tf.reduce_max(logits).numpy(), # prediction is actually the max of the logits for negative case
                    loss.numpy()[0],
                 ))
            else:
                top_polluted_idx = tf.argmin(losses, dimension=1).numpy()[0]
                loss = tf.reduce_min(losses[top_polluted_idx], axis=0)
                
                print("True: {:.0f} \tPred: {:.2f}\tLoss: {:.4f}".format(
                    y[0], 
                    logits[top_polluted_idx].numpy()[0], 
                    loss.numpy(),
                 ))
            
                     
            loss = tf.reshape(loss, (1, 1))

            t = tape.gradient(loss, model.trainable_variables)
            
            #if tf.reduce_sum(t[0]).numpy() == 0 and tf.reduce_sum(t[1]).numpy() == 0:
            #    print("no grad")
            
            for j in range(len(t)):
                grads[j] = running_average(grads[j], t[j], i + 1)
                

            #print(top_polluted_loss.numpy())

        if i > 0 and i % batch_size == 0 or i == num_elements - 1:
            opt.apply_gradients(zip(grads, model.trainable_variables))
            # wipe tape and records
            grads = [tf.zeros_like(l) for l in model.trainable_variables]
            #del tape
            

In [None]:

'''
# This doesn't work for some reason even though I think it should.
batch_size = 16
grads = [tf.zeros_like(l) for l in model.trainable_variables]
for cur_epoch in range(N_EPOCHS):
    print("\nEpoch {}/{}".format(cur_epoch + 1, N_EPOCHS))
    #progbar.update(0)
    
    for i, (x, y) in enumerate(dataset):
        with tf.GradientTape() as tape:
            
            logits = model(x, training=True)

            if y.numpy() == 1:
                target_pred = tf.reduce_min(logits, axis=0)
            else:
                target_pred = tf.reduce_mean(logits, axis=0)

            loss = tf.losses.sigmoid_cross_entropy(y, target_pred, reduction=tf.losses.Reduction.NONE)
            t = tape.gradient(loss, model.trainable_variables)
            
            if tf.reduce_sum(t[0]).numpy() == 0 and tf.reduce_sum(t[1]).numpy() == 0:
                print("no grad")

            for j in range(len(t)):
                grads[j] = running_average(grads[j], t[j], i + 1)

            print("True: {:.0f} Pred: {:.0f} Loss: {:.4f}".format(
                    y[0], 
                    np.round(target_pred[0]), 
                    loss.numpy()[0],
                 ))

        if i > 0 and i % batch_size == 0 or i == num_elements - 1:
            #print("applied gradient")
            opt.apply_gradients(zip(grads, model.trainable_variables))
            # wipe records
            grads = [tf.zeros_like(l) for l in model.trainable_variables]
            #del tape

        #progbar.add(1, values=[("loss", loss)])'''

In [None]:
# prediction
correct_count = 0
total_count = 0
for i, (x, y) in enumerate(dataset):
    
    pred = model(x)
    pred = tf.reduce_max(pred)
    total_count += 1
    if np.round(pred.numpy()) == y.numpy()[0]:
        correct_count += 1
print("{:.2%}".format(correct_count/total_count))

In [None]:
import numpy as np

In [None]:
logits = np.zeros((100,1,5))

In [None]:
logits.shape

In [None]:
logits[0] = np.array([[1, 0, 0, 0, 0]])

In [None]:
logits[1] = np.array([[0, 1, 1, 0, 0]])

In [None]:
logits[30] = np.array([[0, 1, 0, 0, 2]])

In [None]:
np.argmax(logits, axis=0)