### Import Libraries

In [162]:
import numpy as np
import os
from random import shuffle
import random
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import pickle
from statistics import mode

# OpenCV
import cv2

# scikit-learn
from sklearn.utils import resample
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import manhattan_distances, cosine_distances
from sklearn.metrics import silhouette_score
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import davies_bouldin_score

# Pandas
import pandas as pd

### Select Model

In [163]:
selected_model = 2 #int(input("Enter the number for: \n 1) VGGNET16 \n 2) Resnet101  \n 3) Densenet169 "))

### For traning speed, define DATASET_SHRINK_FACTOR

In [164]:
DATASET_SHRINK_FACTOR = 0.01

### Open extracted feature in pickle file

In [165]:
filepath = "../../pickle_files/al/ct_scan/"
if selected_model == 1:
  filename = "ct_scan_vggnet16.pickle"
elif selected_model == 2:
  filename = "ct_scan_resnet101.pickle"
elif selected_model == 3:
  filename = "ct_scan_densenet169.pickle"

file = filepath + filename
with open(file, 'rb') as handle:
  all_ft_dataset = pickle.load(handle)

### Sample the data and shuffle

In [166]:
# suffle the data
random.seed(42)
shuffle(all_ft_dataset)

print("Total Dataset: {}".format(len(all_ft_dataset)))
print("Sample feature dataset Covid: {}".format(all_ft_dataset[0]))
print("Sample feature dataset Non-Covid: {}".format(all_ft_dataset[5001]))

# shrink for minimize training time
original_data_size = len(all_ft_dataset)
new_data_size = int(original_data_size * DATASET_SHRINK_FACTOR)
ft_dataset = all_ft_dataset[:new_data_size]

print("Selected Dataset: {}".format(len(ft_dataset)))

Total Dataset: 10000
Sample feature dataset Covid: {'id': 3771, 'filepath': './ct_scan_dataset/3A_images/NCP_509_2175_0024.png', 'image': array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), 'label': 1}
Sample feature dataset Non-Covid: {'id': 9791, 'filepath': './ct_scan_dataset/3A_images/radiopaedia-50806-56291-0-0034.png', 'image': array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), 'label': 0}
Selected Dataset: 100


### Define a function to correct mispredictions

In [167]:
def correct_mispredictions(query, feat_label, train_label, train_id, index_data, decision, data_frame_mistake, count):
  if mode(decision) != query["label"]:
    count += 1
    data_frame_mistake["Image name"].append(query["filepath"].split("/")[-1])
    data_frame_mistake["Mistake ID"].append(query['id'])
    data_frame_mistake["Original label"].append(query['label'])
    data_frame_mistake["Predicted label"].append(mode(decision))
    data_frame_mistake["Mistake index"].append(index_data)
    feat_label[query['label']].append(query["image"])
    train_label[query['label']].append(query["label"])
    train_id[query['label']].append(query['id'])

  else:
    feat_label[query['label']].append(query["image"])
    train_label[query['label']].append(query["label"])
    train_id[query['label']].append(query['id'])
  return count, data_frame_mistake, feat_label, train_label, train_id

### Define a function to calcuate distance

In [168]:
# query: raw dictionary (from pickle file)
# feat_label: dictionary of {0: [], 1:[]}
# distance_type: 1. Eucliddean, 2. Manhattan, 3. Cosine
# id_pred: predicted id
# label_pred: predicted label
# n_neighbours: no. of neighbour
# count:
# train_label: training label
# train_id: trainin id
# index_data: index of data to track the mistaken data
# data_frame_mistake: to save data_frame in CSV
# supervised_data: if mentored data or not
def distance(query, feat_label, distance_type, id_pred, label_pred, n_neighbours, count, train_label, train_id, index_data, data_frame_mistake, supervised_data):
  expnd_query = np.expand_dims(query['image'], axis=0)
  pos_tup, neg_tup = [], []
  pos_dist, neg_dist = [], []

  # Calculating the distance using numpy (axis=1) to calculate all at ones
  if distance_type == 1: # Euclidean distance
    if len(feat_label[0]) > 1:
      neg_dist = np.linalg.norm(query['image'] - feat_label[0], axis=1)
    elif len(feat_label[0]) == 1:
      neg_dist = [np.linalg.norm(query['image'] - feat_label[0], axis=1)]
    if len(feat_label[1]) > 1:
      pos_dist = np.linalg.norm(query['image'] - feat_label[1], axis=1)
    elif len(feat_label[1]) == 1:
      pos_dist = [np.linalg.norm(query['image'] - feat_label[1], axis=1)]

  elif distance_type == 2: # Manhattan distance
    if len(feat_label[0]) > 1:
      neg_dist = np.squeeze(manhattan_distances(feat_label[0], expnd_query))
    elif len(feat_label[0]) == 1:
      neg_dist = [np.squeeze(manhattan_distances(feat_label[0], expnd_query))]
    if len(feat_label[1]) > 1:
      pos_dist = np.squeeze(manhattan_distances(feat_label[1], expnd_query))
    elif len(feat_label[1]) == 1:
      pos_dist = [np.squeeze(manhattan_distances(feat_label[1], expnd_query))]

  elif distance_type == 3: # Cosine distance
    if len(feat_label[0]) > 1:
      neg_dist = np.squeeze(cosine_distances(expnd_query, feat_label[0]))
    elif len(feat_label[0]) == 1:
      neg_dist = [np.squeeze(cosine_distances(expnd_query, feat_label[0]))]
    if len(feat_label[1]) > 1:
      pos_dist = np.squeeze(cosine_distances(expnd_query, feat_label[1]))
    elif len(feat_label[1]) == 1:
      pos_dist = [np.squeeze(cosine_distances(expnd_query, feat_label[1]))]

  for dist_single in pos_dist:
    pos_tup.append((dist_single, 1))

  for dist_single in neg_dist:
    neg_tup.append((dist_single, 0))

  pos_tup.extend(neg_tup)
  tup_dist = sorted(pos_tup)[:n_neighbours]

  decision = [y for (x,y) in tup_dist]
  if supervised_data:
    (count,
     data_frame_mistake,
     feat_label,
     train_label,
     train_id) = correct_mispredictions(query,
                                        feat_label,
                                        train_label,
                                        train_id,
                                        index_data,
                                        decision,
                                        data_frame_mistake,
                                        count)

  else:
    if len(decision) > 0 and mode(decision) == 0:
      feat_label[0].append(query["image"])
      id_pred[0].append(query["id"])
      label_pred[0].append((query['id'], decision.count(1)/n_neighbours))
    else:
      feat_label[1].append(query["image"])
      id_pred[1].append(query["id"])
      label_pred[1].append((query['id'],decision.count(1)/n_neighbours))

  return feat_label, id_pred, label_pred, data_frame_mistake, count, train_label, train_id

### Define a function to calculate the classification metrices

In [182]:
def classification_metrices(id_gt, id_pred):
  TP, FP, FN, TN = 0, 0, 0, 0

  # TP --> when correctly classified covid
  for tp in id_pred[1]:
    if tp in id_gt[1]:
      TP += 1

  # TN --> when correctly classified healthy (non-covid)
  for tn in id_pred[0]:
    if tn in id_gt[0]:
      TN += 1

  # FP --> when incorrectly classified healthy (Classified healthy as covid)
  for fp in id_pred[1]:
    if fp in id_gt[0]:
      FP += 1

  # FN --> when missed covid classification (Covid cases missed)
  for fn in id_pred[0]:
    if fn in id_gt[1]:
      FN += 1

  accuracy = round((TP + TN) / (TP + TN + FP + FN), 3)
  if (TN + FP) > 0:
    specificity = round(TN / (TN + FP), 3)
  else:
    specificity = 0 # Infinity

  if (TP + FN) > 0:
    sensitivity = round((TP) / (TP + FN), 3)
  else:
    sensitivity = 0 # Infinity

  # f1_score = (2*precision*recall)/(precision + recall)

  print("TP: {}  FP: {}".format(TP, FP))
  print("FN: {}  TN: {}".format(FN, TN))

  return accuracy, specificity, sensitivity, TP, TN, FP, FN

### Define a function to calculate ROC AUC Curve

In [184]:
def roc_auc_curve(label_gt, label_pred):
  # contains (id, labels) tuple of binary class
  gt_labels = sorted(label_gt[0] + label_gt[1])

  # contains (id, labels) tuple of binary class --> sorted to match each element in gt_labels and pred_labels
  pred_labels = sorted(label_pred[0] + label_pred[1])
  y_test = [y for (x,y) in gt_labels] # Get only the labels
  y_scores = [y for (x,y) in pred_labels]
  fpr, tpr, threshold = roc_curve(y_test, y_scores)
  roc_auc = round(auc(fpr, tpr), 3)
  return roc_auc

### Define a function to calculate Cluster metrices

In [190]:
def cluster_metrices(feat_label, train_label, id_pred):
  print("Calculating Dunn's index...")
  dunn_index, davies_bouldin_index, silhouette_index = "NA", "NA", "NA"
  if len(feat_label[0]) > 0 and len(feat_label[1]) > 0:
    intra_dist1 = euclidean_distances(feat_label[0]).max()
    intra_dist2 = euclidean_distances(feat_label[1]).max()
    inter_dist = euclidean_distances(feat_label[0], feat_label[1]).min()

    if intra_dist1 > intra_dist2:
      max_intra_dist= intra_dist1
    else:
      max_intra_dist = intra_dist2

    dunn_index = round(inter_dist / max_intra_dist, 3)

  print("dunn_index: ", dunn_index)

  # Davies Bouldin and Silhouette score from sklearn library.
  class_0 = np.concatenate((np.zeros(shape=(len(train_label[0])), dtype=int), np.zeros(shape=(len(id_pred[0])), dtype=int)))
  class_1 = np.concatenate((np.ones(shape=(len(train_label[1])), dtype=int), np.ones(shape=(len(id_pred[1])), dtype=int)))
  class_all = np.concatenate((class_0, class_1))
  if len(feat_label[0]) > 0 and len(feat_label[1]) > 0:
    feature_all = np.concatenate((feat_label[0], feat_label[1]))
    print("Calculating Davies Bouldin index...")
    davies_bouldin_index = round(davies_bouldin_score(feature_all, class_all), 3)
    print("davies_bouldin_index: ", davies_bouldin_index)

    print("Calculating Silhouette index...")
    silhouette_index = round(silhouette_score(feature_all, class_all), 3)
    print("silhouette_index: ", silhouette_index)


  return dunn_index, davies_bouldin_index, silhouette_index

### Define a function to load dataset into three different segment (k-way n-shot)

In [172]:
# Method to return three sets (n, 3200, 6400)  of labeled dataset for experiment
def data_loader(dataset, n):
  labeled_data, unlabeled_data = [], []

  l_data = dataset[:n]                          # First case (0-40) // labeled + mentored
  ul_data = dataset[n:]                         # First case (40-1000) // unlabeled
  labeled_data.append(l_data)                   # labeled_data[0] => dataset[0-40]
  unlabeled_data.append(ul_data)                # unlabeled_data[0] => dataset[40-1000]

  size_second_set = int(3200 * DATASET_SHRINK_FACTOR) # 3200 * 0.1 = 320
  sss = size_second_set
  l_data = dataset[sss: n + sss]                # Second case (320-360) // labeled + mentored
  ul_data = dataset[:sss] + dataset[n + sss:]   # Second case (0-320) + (360-1000) // unlabeled
  labeled_data.append(l_data)                   # labeled_data[1] => dataset[320-360]
  unlabeled_data.append(ul_data)                # unlabeled_data[1] => dataset[0-320] + dataset[360-1000]

  size_second_set = int(6400 * DATASET_SHRINK_FACTOR) # 6400 * 0.1 = 640
  sss = size_second_set
  l_data = dataset[sss: n + sss]                # Third case (640-680) // labeled + mentored
  ul_data = dataset[:sss] + dataset[n + sss:]   # Third case (0-640) + (680-1000) // unlabeled
  labeled_data.append(l_data)                   # labeled_data[2] => dataset[640-680]
  unlabeled_data.append(ul_data)                # unlabeled_data[2] => dataset[0-640] + dataset[680-1000]
  return labeled_data, unlabeled_data

### Define a function to separate data into positive and negative samples

In [173]:
# Function that selects number of data samples and removes the selected data from the dataset. (required to select balanced positive and negative samples)
def data_separation(dataset, taken_data_idx, label=None, data_sample=100):
  add_data = []
  for i, data in enumerate(dataset):
    if i in taken_data_idx:
      continue
    if label > -1 and dataset[i]["label"] == label:
      add_data.append(data)
      taken_data_idx.append(i)
    else:
      add_data.append(data)
      taken_data_idx.append(i)
    if len(add_data) == data_sample:
      break
  return add_data, taken_data_idx

### Distance type (Euclidean Manhattan or Consine)

In [174]:
distance_type = 3 # int(input("Enter the number for: \n 1) Euclidean  \n 2) Manhattan \n 3) Cosine"))

### Model and Distance Name

In [175]:
if selected_model == 1:
  s_model = 'vggnet16'
elif selected_model == 2:
  s_model = 'resnet101'
elif selected_model == 3:
  s_model = 'densenet169'

if distance_type == 1:
  s_distance = 'euclidean'
elif distance_type == 2:
  s_distance = 'manhattan'
elif distance_type == 3:
  s_distance = 'cosine'

### Active Learning Process

In [188]:
n_neighbours = 5 # 31
labeled_size = [200, 400, 800, 1550]
labeled_size = [int(size * DATASET_SHRINK_FACTOR) for size in labeled_size]
data_frame_metrix = {
  "Labeled data": [],
  "Dataset": [],
  "Accuracy": [],
  "Specificity": [],
  "Sensitivity": [],
  "AUC":[],
  "Dunn index": [],
  "Davies Bouldin": [],
  "Silhouette index":[],
  "TP":[],
  "TN":[],
  "FP":[],
  "FN":[],
  "pos_labeled_img":[],
  "neg_labeled_img":[],
  "corrected_count":[]
}

global_count = 0

for size in labeled_size:
  labeled_data_sets, unlabeled_data_sets = data_loader(ft_dataset, size)

  # labeled_data_sets ==> three sets: [d1, d2, d3] ==> eg: [0-40, 320-360, 640-680]
  for dataset_type, labeled_data in enumerate(labeled_data_sets):
    global_count += 1
    print(f"============================== {global_count}/{len(labeled_size) * len(labeled_data_sets)} ==============================")
    data_frame_mistake = {
      "Image name": [],
      "Mistake index": [],
      "Mistake ID": [],
      "Original label": [],
      "Predicted label": []
    }

    pos_img, neg_img = 0, 0

    # collect the ground truth (label) of all the predicting images =>> key: 0 & 1 (class), value: tuple (data['id'], data['label']), required to calulate TP, FP, FN, TN
    label_gt = {0: [], 1: []}
    # collect the ground truth (id) of all the predicting images =>> key: 0 & 1 (class), value: ground truth id
    id_gt = {0: [], 1: []}

    # collect the predicted label for all the images =>> key: 0 & 1 (class), value: tuple(query['id'], decision.count(1)/n_neighbours) --> decision.count(1)/n_neighbours: Percentage of predicted positive class, required to calculate AUC/ROC value
    label_pred = {0: [], 1: []}
    # collect the predicted id for all the images =>> key: 0 & 1 (class), value: predicted ids, required to calulate TP, FP, FN, TN
    id_pred = {0: [],  1: []}

    # feature label =>> key: 0 & 1 (class), value: deep feature of image
    feat_label = {0: [], 1: []}

    # train id =>> key: 0 & 1 (class), value: id of images --> Not required for, but scared to delete. 
    train_id = {0: [], 1: []}
    # train lable =>> key: 0 & 1 (class), value: ground truth labels
    train_label = {0: [], 1: []}

    # select balanced labeled data (50% from positive and 50% from negative) and initialize training data from a few sample and rest data as supervised data
    sample_data_size = int(200 * DATASET_SHRINK_FACTOR) # 200
    training_data_pos, taken_data_idx = data_separation(labeled_data, [], 0, int(sample_data_size/2))
    training_data_neg, taken_data_idx = data_separation(labeled_data, taken_data_idx, 1, int(sample_data_size/2))
    training_data = training_data_pos + training_data_neg

    sample_data_size_supervised = len(labeled_data) - sample_data_size
    supervised_data, taken_data_idx = data_separation(labeled_data, taken_data_idx, -1, sample_data_size_supervised)

    print(f"training data: {len(training_data)}")

    for data in training_data:
      if data["label"] == 1:
        feat_label[1].append(data['image'])
        train_id[1].append(data['id'])
        train_label[1].append((data['id'],data['label']))
        pos_img += 1
      else:
        feat_label[0].append(data['image'])
        train_id[0].append(data['id'])
        train_label[0].append((data['id'],data['label']))
        neg_img += 1

    print(f"feature neg: {neg_img}")
    print(f"feature pos: {pos_img}")

    count, index_data = 0, 200

    print(f"supervised data: {len(supervised_data)}")

    # loop is for the mentored data --> Notice mentored_data=True in argument of the function call distance.
    for data in supervised_data:
      (feat_label,
       id_pred,
       label_pred,
       data_frame_mistake,
       count, train_label,
       train_id) = distance(data,
                           feat_label,
                           distance_type,
                           id_pred,
                           label_pred,
                           n_neighbours,
                           count,
                           train_label,
                           train_id,
                           index_data,
                           data_frame_mistake,
                           supervised_data=True)
      index_data += 1

    data_f_mistake = pd.DataFrame.from_dict(data_frame_mistake)
    data_f_mistake.to_csv(f"../test/ct_scan/{s_model}_{s_distance}_mistake_{size}_{dataset_type}.csv", index=False)

    print(f"unlabeled data: {len(unlabeled_data_sets[dataset_type])}")

    # loop is for the test data --> Notice mentored_data=False in argument of the function call distance.
    for data in tqdm(unlabeled_data_sets[dataset_type]):
      if data["label"] == 1:
        id_gt[1].append(data['id'])
        label_gt[1].append((data['id'], data['label']))
      else:
        id_gt[0].append(data['id'])
        label_gt[0].append((data['id'], data['label']))

      feat_label, id_pred, label_pred, _, _, _, _ = distance(data,
                                                             feat_label,
                                                             distance_type,
                                                             id_pred,
                                                             label_pred,
                                                             n_neighbours,
                                                             count=None,
                                                             train_label=None,
                                                             train_id=None,
                                                             index_data=None,
                                                             data_frame_mistake=None,
                                                             supervised_data=False)

    accuracy, specificity, sensitivity, TP, TN, FP, FN = classification_metrices(id_gt, id_pred)
    dunn_index, davies_bouldin_index, silhouette_index = cluster_metrices(feat_label, train_label, id_pred)
    cl_auc = roc_auc_curve(label_gt, label_pred)

    data_frame_metrix["Labeled data"].append(size)
    data_frame_metrix["Dataset"].append(f"d_{dataset_type}")
    data_frame_metrix["Accuracy"].append(accuracy)
    data_frame_metrix["Specificity"].append(specificity)
    data_frame_metrix["Sensitivity"].append(sensitivity)
    data_frame_metrix["AUC"].append(cl_auc)
    data_frame_metrix["Dunn index"].append(dunn_index)
    data_frame_metrix["Davies Bouldin"].append(davies_bouldin_index)
    data_frame_metrix["Silhouette index"].append(silhouette_index)
    data_frame_metrix["TP"].append(TP)
    data_frame_metrix["TN"].append(TN)
    data_frame_metrix["FP"].append(FP)
    data_frame_metrix["FN"].append(FN)
    data_frame_metrix["pos_labeled_img"].append(pos_img)
    data_frame_metrix["neg_labeled_img"].append(neg_img)
    data_frame_metrix["corrected_count"].append(count)

    print(f"Dataset: d_{dataset_type} \t\t Labeled image: {size} \t\t Corrected count: {count}")
    print(f"Accuracy: {accuracy} \t\t Specificity: {specificity} \t\t Sensitivity: {sensitivity}")
    print(f"Dunn index: {dunn_index} \t Davies Bouldin: {davies_bouldin_index} \t Silhouette index: {silhouette_index}")
    print(f"AUC: {cl_auc}")


training data: 2
feature neg: 1
feature pos: 1
supervised data: 0
unlabeled data: 98


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [00:01<00:00, 65.54it/s]


TP: 0  FP: 0
FN: 54  TN: 44
Calculating Dunn's index...
dunn_index:  0.605
Calculating Davies Bouldin index...
davies_bouldin_index:  0.929
Calculating Silhouette index...
silhouette_index:  0.029
Dataset: d_0 		 Labeled image: 2 		 Corrected count: 0
Accuracy: 0.449 		 Specificity: 1.0 		 Sensitivity: 0.0
Dunn index: 0.6050000190734863 	 Davies Bouldin: 0.929 	 Silhouette index: 0.028999999165534973
AUC: 0.549
training data: 2
feature neg: 0
feature pos: 2
supervised data: 0
unlabeled data: 98


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [00:01<00:00, 77.98it/s]


TP: 53  FP: 45
FN: 0  TN: 0
Calculating Dunn's index...
dunn_index:  NA
Dataset: d_1 		 Labeled image: 2 		 Corrected count: 0
Accuracy: 0.541 		 Specificity: 0.0 		 Sensitivity: 1.0
Dunn index: NA 	 Davies Bouldin: NA 	 Silhouette index: NA
AUC: 0.512
training data: 2
feature neg: 1
feature pos: 1
supervised data: 0
unlabeled data: 98


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [00:01<00:00, 76.03it/s]


TP: 0  FP: 0
FN: 54  TN: 44
Calculating Dunn's index...
dunn_index:  0.515
Calculating Davies Bouldin index...
davies_bouldin_index:  1.245
Calculating Silhouette index...
silhouette_index:  -0.1
Dataset: d_2 		 Labeled image: 2 		 Corrected count: 0
Accuracy: 0.449 		 Specificity: 1.0 		 Sensitivity: 0.0
Dunn index: 0.5149999856948853 	 Davies Bouldin: 1.245 	 Silhouette index: -0.10000000149011612
AUC: 0.541
training data: 2
feature neg: 1
feature pos: 1
supervised data: 2
unlabeled data: 96


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 96/96 [00:01<00:00, 77.33it/s]


TP: 53  FP: 43
FN: 0  TN: 0
Calculating Dunn's index...
dunn_index:  0.614
Calculating Davies Bouldin index...
davies_bouldin_index:  1.953
Calculating Silhouette index...
silhouette_index:  0.05
Dataset: d_0 		 Labeled image: 4 		 Corrected count: 1
Accuracy: 0.552 		 Specificity: 0.0 		 Sensitivity: 1.0
Dunn index: 0.6140000224113464 	 Davies Bouldin: 1.953 	 Silhouette index: 0.05000000074505806
AUC: 0.475
training data: 2
feature neg: 0
feature pos: 2
supervised data: 2
unlabeled data: 96


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 96/96 [00:01<00:00, 72.69it/s]


TP: 52  FP: 44
FN: 0  TN: 0
Calculating Dunn's index...
dunn_index:  0.591
Calculating Davies Bouldin index...
davies_bouldin_index:  0.973
Calculating Silhouette index...
silhouette_index:  0.003
Dataset: d_1 		 Labeled image: 4 		 Corrected count: 1
Accuracy: 0.542 		 Specificity: 0.0 		 Sensitivity: 1.0
Dunn index: 0.5910000205039978 	 Davies Bouldin: 0.973 	 Silhouette index: 0.003000000026077032
AUC: 0.49
training data: 2
feature neg: 1
feature pos: 1
supervised data: 2
unlabeled data: 96


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 96/96 [00:01<00:00, 78.10it/s]


TP: 53  FP: 43
FN: 0  TN: 0
Calculating Dunn's index...
dunn_index:  0.575
Calculating Davies Bouldin index...
davies_bouldin_index:  2.599
Calculating Silhouette index...
silhouette_index:  -0.006
Dataset: d_2 		 Labeled image: 4 		 Corrected count: 2
Accuracy: 0.552 		 Specificity: 0.0 		 Sensitivity: 1.0
Dunn index: 0.574999988079071 	 Davies Bouldin: 2.599 	 Silhouette index: -0.006000000052154064
AUC: 0.573
training data: 2
feature neg: 1
feature pos: 1
supervised data: 6
unlabeled data: 92


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92/92 [00:01<00:00, 72.25it/s]


TP: 50  FP: 41
FN: 0  TN: 1
Calculating Dunn's index...
dunn_index:  0.614
Calculating Davies Bouldin index...
davies_bouldin_index:  3.162
Calculating Silhouette index...
silhouette_index:  0.069
Dataset: d_0 		 Labeled image: 8 		 Corrected count: 3
Accuracy: 0.554 		 Specificity: 0.024 		 Sensitivity: 1.0
Dunn index: 0.6140000224113464 	 Davies Bouldin: 3.162 	 Silhouette index: 0.0689999982714653
AUC: 0.506
training data: 2
feature neg: 0
feature pos: 2
supervised data: 6
unlabeled data: 92


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92/92 [00:01<00:00, 71.38it/s]


TP: 49  FP: 43
FN: 0  TN: 0
Calculating Dunn's index...
dunn_index:  0.591
Calculating Davies Bouldin index...
davies_bouldin_index:  2.172
Calculating Silhouette index...
silhouette_index:  0.007
Dataset: d_1 		 Labeled image: 8 		 Corrected count: 2
Accuracy: 0.533 		 Specificity: 0.0 		 Sensitivity: 1.0
Dunn index: 0.5910000205039978 	 Davies Bouldin: 2.172 	 Silhouette index: 0.007000000216066837
AUC: 0.49
training data: 2
feature neg: 1
feature pos: 1
supervised data: 6
unlabeled data: 92


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92/92 [00:01<00:00, 74.05it/s]


TP: 50  FP: 36
FN: 1  TN: 5
Calculating Dunn's index...
dunn_index:  0.593
Calculating Davies Bouldin index...
davies_bouldin_index:  3.984
Calculating Silhouette index...
silhouette_index:  0.047
Dataset: d_2 		 Labeled image: 8 		 Corrected count: 4
Accuracy: 0.598 		 Specificity: 0.122 		 Sensitivity: 0.98
Dunn index: 0.5929999947547913 	 Davies Bouldin: 3.984 	 Silhouette index: 0.04699999839067459
AUC: 0.626
training data: 2
feature neg: 1
feature pos: 1
supervised data: 13
unlabeled data: 85


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:01<00:00, 70.36it/s]


TP: 45  FP: 39
FN: 1  TN: 0
Calculating Dunn's index...
dunn_index:  0.494
Calculating Davies Bouldin index...
davies_bouldin_index:  4.325
Calculating Silhouette index...
silhouette_index:  0.034
Dataset: d_0 		 Labeled image: 15 		 Corrected count: 5
Accuracy: 0.529 		 Specificity: 0.0 		 Sensitivity: 0.978
Dunn index: 0.49399998784065247 	 Davies Bouldin: 4.325 	 Silhouette index: 0.03400000184774399
AUC: 0.572
training data: 2
feature neg: 0
feature pos: 2
supervised data: 13
unlabeled data: 85


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:01<00:00, 67.86it/s]


TP: 44  FP: 40
FN: 0  TN: 1
Calculating Dunn's index...
dunn_index:  0.585
Calculating Davies Bouldin index...
davies_bouldin_index:  3.436
Calculating Silhouette index...
silhouette_index:  -0.005
Dataset: d_1 		 Labeled image: 15 		 Corrected count: 4
Accuracy: 0.529 		 Specificity: 0.024 		 Sensitivity: 1.0
Dunn index: 0.5849999785423279 	 Davies Bouldin: 3.436 	 Silhouette index: -0.004999999888241291
AUC: 0.523
training data: 2
feature neg: 1
feature pos: 1
supervised data: 13
unlabeled data: 85


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:01<00:00, 72.57it/s]


TP: 44  FP: 27
FN: 4  TN: 10
Calculating Dunn's index...
dunn_index:  0.568
Calculating Davies Bouldin index...
davies_bouldin_index:  4.203
Calculating Silhouette index...
silhouette_index:  0.047
Dataset: d_2 		 Labeled image: 15 		 Corrected count: 6
Accuracy: 0.635 		 Specificity: 0.27 		 Sensitivity: 0.917
Dunn index: 0.5680000185966492 	 Davies Bouldin: 4.203 	 Silhouette index: 0.04699999839067459
AUC: 0.664


### Save the result

In [191]:
data_f_matrix = pd.DataFrame.from_dict(data_frame_metrix)
data_f_matrix.to_csv(f"../test/ct_scan/{s_model}_{s_distance}_dist.csv", index=False)