In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  
from IPython.display import HTML

In [1]:
from sklearn.metrics import f1_score, roc_auc_score, average_precision_score, precision_score, recall_score
import pandas
import numpy as np
import papermill as pm
import json
import matplotlib.pyplot as plt
import os
import uuid
from config import config
from db import Result
import ast
import math
import pickle
from clinical_data_models import features_data

pandas.options.display.float_format = '{:,.3f}'.format

In [2]:
from evaluate import plot_learning_curve, plot_accuracy_curve, load, get_results, get_labels, transform_binary_probabilities, transform_binary_predictions, calculate_accuracy_loss, plot_confusion_matrix, plot_precision_recall, plot_roc_curve, calculate_pr_auc, calculate_confusion_matrix_stats, calculate_confusion_matrix, plot_tsne, plot_multiple_grad_cam, plot_precision_recall

In [3]:
from data_gen import data
#from run import characterize_data

In [4]:
UUID = "8dab2c82-13a2-4936-a6f2-7bcb499b487a"

In [5]:
MODEL = "{}/models/{}_features.sav".format(config.OUTPUT, UUID)

In [6]:
result = Result.query.filter(Result.uuid == UUID).first()

In [7]:
train, validation, test = data(seed=uuid.UUID(result.split_seed), label_form=result.label_form, input_form=result.input_form, train_shuffle=False, test_shuffle=False, validation_shuffle=False, train_augment=False, validation_augment=False, test_augment=False)

100%|██████████| 596/596 [00:00<00:00, 4317.27it/s]
  0%|          | 0/165 [00:00<?, ?it/s]

Training points = 596
Benign: 348, Intermediate: 248, Malignant: 0

Validation points = 165
Benign: 97, Intermediate: 68, Malignant: 0

Testing points = 92
Benign: 53, Intermediate: 39, Malignant: 0
bone-penn-587
bone-china-153
bone-penn-396
bone-china-128
bone-china-010
bone-hup-287
bone-penn-095
bone-penn-094
bone-china-139
bone-penn-381
bone-penn-509
bone-penn-280
bone-china-115
bone-china-014
bone-china-109
bone-china-039
bone-china-151
bone-china-174
bone-china-126
bone-hup-139
bone-china-169
bone-penn-093
bone-hup-320
bone-china-006
bone-hup-292
bone-china-134
bone-penn-068
bone-penn-651
bone-hup-272
bone-penn-337
bone-china-131
bone-china-123
bone-china-121
bone-penn-619
bone-penn-497
bone-hup-147
bone-hup-291
bone-penn-091
bone-hup-51
bone-penn-036
bone-hup-128
bone-hup-327
bone-hup-330
bone-hup-124
bone-china-160
bone-penn-037
bone-penn-527
bone-penn-274
bone-penn-456
bone-hup-266
bone-hup-302
bone-penn-385
bone-china-149
bone-penn-462
bone-penn-152
bone-china-028
bone-china-0

100%|██████████| 165/165 [00:00<00:00, 4012.45it/s]
100%|██████████| 92/92 [00:00<00:00, 4285.84it/s]


bone-penn-252
bone-hup-73
bone-hup-213
bone-penn-173
bone-china-203
bone-hup-104
bone-hup-244
bone-penn-126
bone-penn-600
bone-penn-191
bone-penn-247
bone-hup-170
bone-china-179
bone-penn-176
bone-penn-192
bone-penn-224
bone-hup-277
bone-penn-052
bone-hup-70
bone-penn-633
bone-penn-464
bone-hup-76
bone-penn-307
bone-penn-020
bone-hup-62
bone-penn-314
bone-penn-623
bone-penn-186
bone-hup-132
bone-penn-026
bone-penn-211
bone-penn-467
bone-hup-95
bone-hup-108
bone-china-176
bone-china-217
bone-hup-141
bone-penn-311
bone-penn-429
bone-penn-463
bone-penn-222
bone-penn-219
bone-hup-133
bone-penn-238
bone-penn-607
bone-hup-53
bone-penn-143
bone-china-193
bone-penn-166
bone-china-194
bone-penn-161
bone-hup-80
bone-china-007
bone-china-012
bone-china-013
bone-china-018
bone-china-063
bone-china-064
bone-china-067
bone-china-082
bone-china-090
bone-china-094
bone-china-130
bone-china-137
bone-china-141
bone-china-142
bone-china-150
bone-china-156
bone-china-157
bone-china-181
bone-china-182
bon




In [8]:
print("training N:", len(train))
print("validation N:", len(validation))
print("test N:", len(test))

training N: 596
validation N: 165
test N: 92


In [9]:
class_inv = {v: k for k, v in train.class_indices.items()}
print("training {}:".format(class_inv[1]), sum(train.classes))
print("validation {}:".format(class_inv[1]), sum(validation.classes))
print("test {}:".format(class_inv[1]), sum(test.classes))

training 1.0: 248
validation 1.0: 68
test 1.0: 39


In [10]:
model = pickle.load(open(MODEL, 'rb'))

In [11]:
model

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=3, max_features=None, max_leaf_nodes=5,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

In [12]:
train_set, train_labels, val_set, val_labels, test_set, test_labels = features_data(train, validation, test)

# Train

In [19]:
probabilities=model.predict_proba(train_set, train_labels).tolist()
predictions=model.predict(train_set, train_labels).tolist()
labels = get_labels(train)

pm.record("train_labels", list(labels))
pm.record("train_probabilities", probabilities)
pm.record("train_predictions", predictions)

AttributeError: module 'papermill' has no attribute 'record'

# Validation

In [101]:
probabilities=model.predict_proba(val_set, val_labels).tolist()
predictions=model.predict(val_set, val_labels).tolist()
labels = get_labels(validation)

pm.record("validation_labels", list(labels))
pm.record("validation_probabilities", probabilities)
pm.record("validation_predictions", predictions)

# Test

In [20]:
probabilities=model.predict_proba(test_set, test_labels).tolist()
predictions=model.predict(test_set, test_labels).tolist()
labels = get_labels(test)

pm.record("test_labels", list(labels))
pm.record("test_probabilities", list(probabilities))
pm.record("test_predictions", list(predictions))