In [None]:
import numpy as np
import argparse
import os
import time
import glob
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from matplotlib import colors
import matplotlib as mpl
import io

import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

from utils.dataGenerator import DataGenerator, DataGenerator_metaData
from utils import utils
from PIL import Image

import pydot
from tensorflow import keras
from tensorflow.keras.models import Model

import skimage.measure


In [None]:
print("tf version: ", tf.__version__)
print("keras version: ", tf.keras.__version__)
print("")

# GPU availability
tf.test.gpu_device_name()
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())

In [None]:
def load_patches(file, mags, jpeg=True):
    names = ['patches_20x', 'patches_10x', 'patches_05x']
    with h5py.File(file, 'r') as f:
        plist = []
        for i in mags:
            plist.append(list(f[names[i]]))
        coords = list(f['coordinates'])
        mask = list(f['mask'])

    patches = [[] for i in range(len(mags))]
    for pi in range(len(mags)):
        for p in plist[pi]:
            if jpeg:
                patches[pi].append(jpeg2patch(p))
            else:
                patches[pi].append(p)
    return patches, coords, mask

def jpeg2patch(patch):
    return Image.open(io.BytesIO(np.array(patch)))


In [None]:
# data folders
patches_folder = r"D:\example_patches"
patch_ids = [file for file in os.listdir(patches_folder) if file.endswith('.h5')]
print("number of slide patches: ", len(patch_ids))

In [None]:
# load model
modeldir = r"D:\annotated_slides\models\hooknet"
modelfile = 'model_hooknet_exp_1.h5'
hooknet = load_model(os.path.join(modeldir, modelfile))

In [None]:
# hooknet.summary()
keras.utils.plot_model(hooknet, os.path.join(modeldir, "context_target_unet.png"), show_shapes=True)

In [None]:
encoder_model = Model(inputs=hooknet.input,
                      outputs=hooknet.get_layer("activation_31").output)
del hooknet

In [None]:
tsz = 256
mags=0, 1, 2
i = 0
for patch_name in patch_ids[0:1]:
    print("patch name: ", patch_name)
    patches, coords, mask = load_patches(os.path.join(patches_folder, patch_name), mags)
    print("number of patches: ", len(patches))
    
    # Display
    fig, axes = plt.subplots(1, 3, figsize=(12, 6), sharex=True, sharey=True)
    ax = axes.ravel()

    ax[0].imshow(patches[0][i])
    ax[0].set_title("patch @ 20x")

    ax[1].imshow(patches[1][i])
    ax[1].set_title("patch @ 10x")

    ax[2].imshow(patches[2][i])
    ax[2].set_title("patch @ 5x")  

    for a in ax.ravel():
        a.axis('off')

    fig.tight_layout()
    
    patch_t = np.asarray(patches[0][i])
    patch_c = np.asarray(patches[2][i])
   
    patch_t = patch_t.astype('float') / 255
    patch_c = patch_c.astype('float') / 255
    
    patch_c = np.reshape(patch_c, (1, tsz, tsz, -1))
    patch_t = np.reshape(patch_t, (1, tsz, tsz, -1))

            
#     pred_c, pred_t = hooknet.predict([patch_c, patch_t])
#     pred_c = np.reshape(pred_c, (1, tsz, tsz, -1))
#     pred_c = np.argmax(pred_c, axis=-1)
#     pred_c = np.reshape(pred_c, (tsz, tsz))
#     print("predicted segmentation stats (context):", np.unique(pred_c, return_counts=True))


In [None]:
encoder_output = encoder_model.predict([patch_c, patch_t])


In [None]:
encoder_output.shape

In [None]:
encoder_output_pool = skimage.measure.block_reduce(encoder_output, (1, 4, 4, 1), np.max)
print(encoder_output_pool.shape)

In [None]:
1024/128

In [None]:
batch_size=2 
dim=(256, 256, 3)
mags=0, 2
i = 0
for patch_name in patch_ids[0:1]:
    print("patch name: ", patch_name)
    patches, coords, mask = load_patches(os.path.join(patches_folder, patch_name), mags)
    print("number of mag patches: ", len(patches))
    
    # Number of batches to iterate over
    steps = int(np.ceil(len(patches[0]) / batch_size))
    print('steps: ', steps)
    
    features = []
    patch_t = np.asarray(patches[0][2])
    patch_c = np.asarray(patches[1][2])
    print(patch_t.shape)
    print(patch_c.shape)
    
    print("number of patches: ", len(patches[0]))
    
    for index in range(steps):
        X_c = np.empty((batch_size, *dim))
        X_t = np.empty((batch_size, *dim))

        batchlist = []
        for i in range(len(patches)):
            batchlist.append(patches[i][index * batch_size:(index + 1) * batch_size])
        print("len(batchlist): ", len(batchlist))
#         print(batchlist)
        
        # Transform image in a batch to tensors
        for i in range(batch_size):
            patch_t = batchlist[0][i]
            patch_c = batchlist[1][i]
#             print(type(patch_t))
#             print(type(patch_c))

            patch_t = np.array(patch_t)
            patch_c= np.array(patch_c)
            patch_t = patch_t.astype('float') / 255
            patch_c = patch_c.astype('float') / 255

            X_c[i,] = patch_c
            X_t[i,] = patch_t


### Inspect extracted features

In [None]:
import numpy as np
import os
import glob

import torch
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt

In [None]:
features_folder = r"E:\WST_Share\Features"
hooknet_features_folder = r"D:\1500_cases\Features_hooknet\Features"
feature_files = [file for file in os.listdir(hooknet_features_folder) if file.endswith(".pt")]
print("num feature files: ", len(feature_files))

In [None]:
for file in feature_files[0:1]:
    hn_features = torch.load(os.path.join(hooknet_features_folder, file))
    features = torch.load(os.path.join(features_folder, file.split(".")[0] + "_0" + ".pt"))

    features_arr = features.numpy()
    hn_features_arr = hn_features.numpy()
    
    print("hn features, min and max: ", np.min(hn_features_arr), np.max(hn_features_arr))
    print("IN features :", np.min(features_arr), np.max(features_arr))
    hn_max_value = 1 # np.max(hn_features_arr)
    
    hn_features_arr_norm = np.linalg.norm(hn_features_arr, axis=1)
    features_arr_norm = np.linalg.norm(features_arr, axis=1)
    i = 2
    plt.hist(hn_features_arr[i]/hn_max_value)
#     plt.hist(features_arr[i])
#     diff = features_arr - hn_features_arr
#     dist = np.linalg.norm((features_arr - hn_features_arr), axis=1)
#     print(np.min(dist), np.max(dist))

In [None]:
print(hn_features_arr_norm.shape)
print(hn_features_arr_norm[0:10])
print(np.min(hn_features_arr_norm), np.max(hn_features_arr_norm))
print(features_arr_norm[0:10])
print(np.min(features_arr_norm), np.max(features_arr_norm))

In [None]:
### normalize the hooknet features to [0, 1] range

In [None]:
for file in feature_files:
    hn_features = torch.load(os.path.join(hooknet_features_folder, file))

    hn_features_arr = hn_features.numpy()
    hn_max_value = np.max(hn_features_arr)
#     print("max value is: ", hn_max_value)
    
    hn_features_arr = hn_features_arr/hn_max_value
#     print(hn_max_value.shape)
#     print(np.max(hn_features_arr))

    
    features = torch.from_numpy(hn_features_arr)
    torch.save(features, os.path.join(
        "D:\\1500_cases\\Norm_Features_hooknet\\Features", file))

### create cvs file 

In [None]:
import pandas as pd
import os


In [None]:
df = pd.read_csv(r"D:\\20kDatasetMetafiles\\patientinfo_train_smallDataset_1500.csv")
hooknet_features_folder = r"D:\1500_cases\Features_hooknet\Features"
feature_files = [file for file in os.listdir(hooknet_features_folder) if file.endswith(".pt")]
print("num feature files: ", len(feature_files))

df['new_slide_id'] = df['slide_id'].apply(lambda x: x.rsplit("_", 1)[0])
print(df.head())

In [None]:
file_names = [name.split(".")[0] for name in feature_files]
print(file_names[0:4])

df = df[df['new_slide_id'].isin(file_names)]
print(df.head())
df.shape

In [None]:
df = df.drop(['slide_id'], axis=1)
df = df.rename(columns={"new_slide_id": "slide_id"})
df.to_csv(r"D:\1500_cases\Features_hooknet\df_train.csv", index=False)
df.head()


In [None]:
# test set
df = pd.read_csv(r"D:\\20kDatasetMetafiles\\patientinfo_test_smallDataset.csv")
hooknet_features_folder = r"D:\1500_cases\Features_hooknet\Features"
feature_files = [file for file in os.listdir(hooknet_features_folder) if file.endswith(".pt")]
print("num feature files: ", len(feature_files))

df['new_slide_id'] = df['slide_id'].apply(lambda x: x.rsplit("_", 1)[0])
print(df.head())

In [None]:
file_names = [name.split(".")[0] for name in feature_files]
print(file_names[0:4])

df = df[df['new_slide_id'].isin(file_names)]
print(df.head())
df.shape

In [None]:
df = df.drop(['slide_id'], axis=1)
df = df.rename(columns={"new_slide_id": "slide_id"})
df.to_csv(r"D:\1500_cases\Features_hooknet\df_test.csv", index=False)
df.head()
