# Instructions

If you are going to just play with this script, I keep it in the baseline directory. Please add a -ignore to the end of the file, e.g. **explainability_inference-v1-kms-ignore** The -ignore will stop git from tracking the file. And you can play with it as much as you want. 

If you want to build on it and push a new version, please rename it e.g. **explainability_inference-v{ next version number }-{ your initials }-ignore** This way we can keep each iteration. These notebooks will have their own directory for work going forward. This is only for baseline model.

You will need to hardcode some paths in here, I made a note where you shall do that. To use this notebook you also must have a model.h5 file (~100-200mbs) dont worry, the .gitignore will not let you commit or track a model file, but you will need it to work with this notebook. Model files can be found in the google drive either under baseline_model or model dirs.

## Setting Up & Sample Data Intake

In [None]:
import sys
import os
import csv
import numpy as np
import re
from random import randint
from configparser import ConfigParser
import matplotlib.pyplot as plt
%matplotlib inline
import h5py
import tensorflow as tf
# tf.enable_eager_execution()
# tfe = tf.contrib.eager
from tensorflow.keras.preprocessing import image
from tensorflow.keras import models
%load_ext memory_profiler

In [None]:
#Add you own path to your model here...
model = models.load_model('DenseNet169_baseline_model.h5') # Load model, weights and meta data

In [None]:
def print_img(img_path):
    img = image.load_img(img_path, target_size=(IMG_RESIZE_X, IMG_RESIZE_Y))
    img_tensor = image.img_to_array(img)
    img_tensor = np.expand_dims(img_tensor, axis=0) #add batch dimension of 1 to image to match training shape
    img_tensor /= 255.
    return img_tensor

def prepare_img(filename):
    """Prepare an image with the same preprocessing steps used during training (no augmentation)"""
    image_string = tf.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=CHANNELS) # Don't use tf.image.decode_image
    image = tf.image.convert_image_dtype(image, tf.float32) #convert to float values in [0, 1]
    image = tf.image.resize_images(image, [IMG_RESIZE_X, IMG_RESIZE_Y])
    image = image[np.newaxis,...]
    print("Image size pushed into the network: " + str(image.shape))
    return image

In [None]:
# For this test, we shall use the sample images in the repo, as this is a universal file path for all users
data_path = '../../images/'
img_names = ['neg_sample_1', 'neg_sample_2', 'pos_sample_1', 'pos_sample_2']
img_type = '.png'
IMG_RESIZE_X = 320
IMG_RESIZE_Y = 320
CHANNELS = 3

## Predictions on Single Image

In [None]:
img_path = data_path + img_names[randint(0, 3)] + img_type #randomly select from the 4 sample images in the repo
img = prepare_img(img_path)

# plt the image we are predicting on
image_to_plot = print_img(img_path)
plt.imshow(image_to_plot[0])
plt.show()

print("Image being passed to network: " + img_path[-16:])
pred_prob = model.predict(img, batch_size=None, steps=1, verbose=1)
print(pred_prob)
pred_prob = np.where(pred_prob > 0.5, 1, 0)[0][0]
print(pred_prob)
if pred_prob == 0:
    pred = 'Negative'
else:
    pred = 'Positive'
print("Predict class: " + pred)

## Predictions on a Study

In [None]:
full_data_path = '/Users/keil/datasets/mura/' #Add you own path here...
input_csv = 'MURA-v1.1/valid_image_paths.csv'
output_csv = 'MURA-v1.1/predictions.csv' #predictions csv file saved to data dir

In [None]:
def id_generator(csv_line):
    csv_line = csv_line.rstrip('\n') #chomp chomp
    split_line = csv_line.split('/') #tokenize line
    patient = split_line[3][7:] #get the patient number
    study = re.search(r'([0-9]+)',split_line[4]).group(0) #get the study number
    record = patient + '/' + study #create unique patient study record
    return csv_line, record

patient_dict = {} #our new data study based structure key = patient_num/study_num e.g. 11185/1, 11185/2
count = 0
with open(full_data_path+input_csv,'r') as in_file:
    buffer = []
    previous_id = None
    for line in in_file:
        data, unique_id = id_generator(line) #sanitize data
        
        if previous_id == None: #special case for first loop
            previous_id = unique_id
        
        if previous_id != unique_id: #write the buffers to the dict if a new patient and or study appear
            patient_dict[previous_id] = buffer
            buffer = [] #flush buffers
            previous_id = unique_id
        
        buffer.append(data)

In [None]:
for k,v in patient_dict.items():
    print(k,v)
    break

### patient_dict is a dictionary of image file path values grouped to keys which are patient_id + study_id, because a patient can have multiple studies, and each study (and thw study's image(s)) must be predicted in isolation.

In [None]:
%memit
#collect memory usage for submission...

def strip_filename(path):
    dirname, filename = os.path.split(path)
    return dirname + '/'

def prepare_img(filename):
    """Prepare an image with the same preprocessing steps used during training (no augmentation)"""
    image_string = tf.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=CHANNELS) # Don't use tf.image.decode_image
    image = tf.image.convert_image_dtype(image, tf.float32) #convert to float values in [0, 1]
    image = tf.image.resize_images(image, [IMG_RESIZE_X, IMG_RESIZE_Y])
    image = image[np.newaxis,...] #add on that tricky batch axis
    return image
    
def inference(img_path, model, data_path=full_data_path):
    img = prepare_img(full_data_path+img_path)
    pred_prob = model.predict(img, batch_size=None, steps=1, verbose=0)
    return pred_prob[0][0]

def avg_probabilities(prob_vector):
    vec = np.array(prob_vector)
    avg_prob = vec.sum()/len(prob_vector)
    return int(np.where(avg_prob > 0.5, 1, 0))


predictions = []
count=0
for patient_study_id, img_path_list in patient_dict.items():
    prob_vector = []
    dir_path = strip_filename(img_path_list[0])
    for img_path in img_path_list:
        pred = inference(img_path, model) #i'm sure we can do this as a batch, memory contraints???
        prob_vector.append(pred)
    count+=1
    print(prob_vector)
    classification = avg_probabilities(prob_vector)
    predictions.append((dir_path, classification))
    if count == 3:
        break
print(predictions)

In [None]:
# write out the list of prediction tuples to a csv

with open(full_data_path+output_csv,'w') as out_file:
    writer = csv.writer(out_file)
    for result in predictions:
        writer.writerow([result[0],result[1]])
        

## Viewing Activation Maps

In [None]:
def get_layer_ouputs(model):
    layer_outputs = [layer.output for layer in model.layers]
    return layer_outputs

In [None]:
layer_outputs = get_layer_ouputs(model)
# Critical logic error: speciasl use of layer_outputs[1:] because [0] is model.input layer. Therefore the
# layer is being fed and fetched will result in erra! O.o
activation_model = models.Model(inputs=model.input, outputs=layer_outputs[1:]) 
activations = activation_model.predict(img, batch_size=None, steps=1, verbose=2)

In [None]:
def extract_activation_layer(layer_num,model=activations,outputs=layer_outputs[1:]):
    """Get info on the activation layer"""
    print(model[layer_num].shape)
    print("output layer: " + str(outputs[layer_num]))
    return model[layer_num]

print(len(activations)) # 595 activation map layers weee!!!!!!

In [None]:
activation_layer = extract_activation_layer(100)
#true layer 0: input layer ------ removed

#layer 0: padding layer
#layer 1: conv 2d layer
#layer 2: batch norm
#layer 3: relu activation


In [None]:
plt.matshow(activation_layer[0, :, :, 220], cmap='viridis')
plt.show()

In [None]:
# run to get the layer information from the activation model:
activation_model.summary()

In [None]:
# Let's plot out many activation map thumbnails... RAISE TODO!!!

for idx,layers in enumerate(layer_outputs):
    m = re.search(r'\w(conv)',str(layers))
    if m:
        print("yes")
        print(layers)
        print(idx)
#     break

In [None]:
conv_maps = extract_activation_layer(1)
imgs_per_row = 16
num_cols = conv_maps.shape[-1]
print(num_cols)