# Libraries

In [None]:
import numpy as np
import pandas as pd
import tflearn, h5py, cv2
import matplotlib.pyplot as plt

import sys
sys.path.append('..')
from src.models.cnn_model import CNNModel

# Build components

In [None]:
# Dataset

h5f = h5py.File('../src/data/train.h5', 'r')
X_train_images = h5f['X']
Y_train_labels = h5f['Y']

print("X_train_images", X_train_images.shape)
print("Y_train_labels", Y_train_labels.shape)

In [None]:
# Model

convnet  = CNNModel()
network = convnet.define_network(X_train_images)
model = tflearn.DNN(network, tensorboard_verbose=0, checkpoint_path="../ckpt/nodule3-classifier.tfl.ckpt")
model.load("../ckpt/nodule3-classifier.tfl")

# Get clusters

In [None]:
# Inference

preds = model.predict(X_train_images[:,:,:,:])
print("preds", preds.shape)

In [None]:
# Get clusters

pos_indicator = Y_train_labels[:,1]==1
neg_indicator = Y_train_labels[:,0]==1

pos_embeddings = preds[pos_indicator,:]
neg_embeddings = preds[neg_indicator,:]
print(pos_embeddings.shape, neg_embeddings.shape)

avg_pos_embedding = pos_embeddings.mean(axis=0)
avg_neg_embedding = neg_embeddings.mean(axis=0)
print("avg_pos_embedding", avg_pos_embedding)
print("avg_neg_embedding", avg_neg_embedding)

# Testing

In [None]:
# Dataset

h5f2 = h5py.File('../src/data/test.h5', 'r')
X_test_images = h5f2['X']
Y_test_labels = h5f2['Y']

print("X_test_images", X_test_images.shape)
print("Y_test_labels", Y_test_labels.shape)

In [None]:
# Inference

embeddings = model.predict(X_test_images[:,:,:,:])
print("embeddings", embeddings.shape)

In [None]:
# Cluster

pos_dists = ((embeddings-avg_pos_embedding)**2).sum(axis=1)[:,np.newaxis]
neg_dists = ((embeddings-avg_neg_embedding)**2).sum(axis=1)[:,np.newaxis]
dists = np.hstack([neg_dists, pos_dists])
pred_indicies = np.argmin(dists, axis=1)

print("neg_ratio:", (pred_indicies==0).sum() / pred_indicies.size)
print("pos_ratio:", (pred_indicies==1).sum() / pred_indicies.size)