In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  
from IPython.display import HTML

In [2]:
from sklearn.metrics import f1_score, roc_auc_score, average_precision_score, precision_score, recall_score
import pandas
import numpy as np
import papermill as pm
import json
import matplotlib.pyplot as plt
import os
import uuid
from config import config
from db import Result
import ast
import math
import pickle
from clinical_data_models import features_data
import scrapbook as sb

pandas.options.display.float_format = '{:,.3f}'.format

In [3]:
from evaluate import plot_learning_curve, plot_accuracy_curve, load, get_results, get_labels, transform_binary_probabilities, transform_binary_predictions, calculate_accuracy_loss, plot_confusion_matrix, plot_precision_recall, plot_roc_curve, calculate_pr_auc, calculate_confusion_matrix_stats, calculate_confusion_matrix, plot_precision_recall

In [9]:
from data_gen import data

In [15]:
UUID = "82dc825c-9b8a-4e07-aa87-eca1a7ef2a59"

In [16]:
MODEL = "{}/models/{}_features.sav".format(config.OUTPUT, UUID)

In [7]:
result = Result.query.filter(Result.uuid == UUID).first()

In [10]:
train, validation, test, external = data(seed=uuid.UUID(result.split_seed), label_form=result.label_form, input_form=result.input_form, train_shuffle=False, test_shuffle=False, validation_shuffle=False, train_augment=False, validation_augment=False, test_augment=False)

 49%|████▉     | 344/697 [00:00<00:00, 3437.66it/s]

                outcome_pos  outcome_neg  outcome_3    age  sex  \
patientID                                                         
bone-brown-1              0            0          0 52.000    1   
bone-brown-10             0            0          0 52.000    0   
bone-brown-100            1            1          2 22.000    1   
bone-brown-101            0            0          0 88.000    0   
bone-brown-102            0            0          0 56.000    1   
...                     ...          ...        ...    ...  ...   
bone-penn-650             1            1          2 20.000    1   
bone-penn-651             0            0          0 16.000    1   
bone-penn-652             1            1          2  8.000    0   
bone-penn-653             1            1          2 16.000    1   
bone-penn-654             1            1          2  3.000    1   

                                                         location        sort  
patientID                                       

100%|██████████| 697/697 [00:00<00:00, 3431.92it/s]
100%|██████████| 196/196 [00:00<00:00, 3427.36it/s]
100%|██████████| 93/93 [00:00<00:00, 3457.49it/s]
100%|██████████| 97/97 [00:00<00:00, 3485.43it/s]

bone-penn-339
bone-penn-340
bone-penn-340
bone-penn-341
bone-penn-341
bone-penn-342
bone-penn-342
bone-penn-343
bone-penn-343
bone-penn-345
bone-penn-345
bone-penn-346
bone-penn-347
bone-penn-348
bone-penn-349
bone-penn-356
bone-penn-362
bone-penn-363
bone-penn-364
bone-penn-367
bone-penn-368
bone-penn-369
bone-penn-370
bone-penn-372
bone-penn-374
bone-penn-375
bone-penn-379
bone-penn-380
bone-penn-382
bone-penn-384
bone-penn-385
bone-penn-386
bone-penn-387
bone-penn-388
bone-penn-389
bone-penn-391
bone-penn-392
bone-penn-396
bone-penn-397
bone-penn-405
bone-penn-413
bone-penn-425
bone-penn-429
bone-penn-446
bone-penn-453
bone-penn-454
bone-penn-457
bone-penn-458
bone-penn-459
bone-penn-460
bone-penn-461
bone-penn-463
bone-penn-465
bone-penn-467
bone-penn-471
bone-penn-473
bone-penn-476
bone-penn-478
bone-penn-480
bone-penn-482
bone-penn-484
bone-penn-487
bone-penn-489
bone-penn-490
bone-penn-494
bone-penn-495
bone-penn-496
bone-penn-497
bone-penn-503
bone-penn-505
bone-penn-506
bone-p




In [11]:
print("training N:", len(train))
print("validation N:", len(validation))
print("test N:", len(test))
print("external N:", len(external))

training N: 697
validation N: 196
test N: 93
external N: 97


In [10]:
class_inv = {v: k for k, v in train.class_indices.items()}
print("training {}:".format(class_inv[1]), sum(train.classes))
print("validation {}:".format(class_inv[1]), sum(validation.classes))
print("test {}:".format(class_inv[1]), sum(test.classes))
print("external {}:".format(class_inv[1]), sum(external.classes))

training 1.0: 250
validation 1.0: 65
test 1.0: 39


In [17]:
model = pickle.load(open(MODEL, 'rb'))

In [12]:
model

LogisticRegression(random_state=0)

In [19]:
train_set, train_labels, val_set, val_labels, test_set, test_labels, external_set, external_labels = features_data(train, validation, test, external)

# Train

In [21]:
probabilities=model.predict_proba(train_set).tolist()
probabilities = [i[1] for i in probabilities]
predictions=model.predict(train_set).tolist()
labels = get_labels(train)

sb.glue("train_labels", list(labels))
sb.glue("train_probabilities", probabilities)
sb.glue("train_predictions", predictions)

# Validation

In [17]:
probabilities=model.predict_proba(val_set).tolist()
probabilities = [i[1] for i in probabilities]
predictions=model.predict(val_set).tolist()
labels = get_labels(validation)

sb.glue("validation_labels", list(labels))
sb.glue("validation_probabilities", probabilities)
sb.glue("validation_predictions", predictions)

# Test

In [24]:
probabilities=model.predict_proba(test_set).tolist()
probabilities = [i[1] for i in probabilities]
predictions=model.predict(test_set).tolist()
labels = get_labels(test)

sb.glue("test_labels", list(labels))
sb.glue("test_probabilities", list(probabilities))
sb.glue("test_predictions", list(predictions))

[0.3847973806081672, 0.46907569270318367, 0.4097402374733062, 0.3397373025383994, 0.46573613771013805, 0.369511851516447, 0.39433425810294453, 0.322674007037611, 0.3388778818495049, 0.3298260866186854, 0.4060492977600653, 0.3776612818370922, 0.6283236449460957, 0.4315429418327052, 0.35435378810978246, 0.3121004528405338, 0.5784433171692547, 0.34523268018271464, 0.35249490909392756, 0.6071984993478196, 0.6421775771728092, 0.32579727298416905, 0.5099511855900396, 0.5015745554285314, 0.6078789738745284, 0.3544836482075422, 0.5259550019849982, 0.34156439367571395, 0.34879056283582754, 0.4308429745661629, 0.3802804865143658, 0.3506404856248613, 0.3116680986380226, 0.5517595705516575, 0.3068852836822443, 0.5398636687885, 0.2961824151665477, 0.5632973414478315, 0.5772506831963766, 0.39799006439879553, 0.3120349810686449, 0.6717051285014299, 0.33778800245959634, 0.3341584493676332, 0.3051577236219034, 0.30000797130223583, 0.37836530410243696, 0.3116680986380226, 0.4328388304264059, 0.579234530

# External

In [22]:
probabilities=model.predict_proba(external_set).tolist()
probabilities = [i[1] for i in probabilities]
predictions=model.predict(external_set).tolist()
labels = get_labels(external)

sb.glue("external_labels", list(labels))
sb.glue("external_probabilities", list(probabilities))
sb.glue("external_predictions", list(predictions))