In [36]:
import json
import pickle
import numpy as np
from operator import itemgetter
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve
from sklearn.externals import joblib

In [37]:
# Read files
with open('clean_data_leap/visit_mappings.pkl', 'rb') as f:
        visit_mappings = pickle.load(f)

with open('clean_data_leap/sorted_drug_list.pkl', 'rb') as f:
        sorted_drug_list = pickle.load(f)

with open('logistic_models_top538/matrix.pkl', 'rb') as f:
        matrix = pickle.load(f)

with open('logistic_models_top538/train.pkl', 'rb') as f:
        train = pickle.load(f)

with open('logistic_models_top538/test.pkl', 'rb') as f:
        test = pickle.load(f)

In [38]:
# One hot encoding and buid vector
def feature_vector(visit_mappings, label):
    one_hot_vec = [0 for _ in range(len(visit_mappings))]
    for i in range(len(one_hot_vec)):
        if label[0] in visit_mappings[i]['drug_list']:
            one_hot_vec[i] = 1
    return one_hot_vec

In [39]:
# Find the first position of the num that is larger than or equal to target
def binary_search(nums, target):
    start, end = 0, len(nums) - 1
    while start + 1 < end:
        mid = start + (end - start) // 2
        if nums[mid] >= target:
            end = mid
        else:
            start = mid

    if nums[start] >= target:
        return start
    if nums[end] >= target:
        return end
    return end

In [56]:
# Read models and check the threshold
selected_thresholds = []
X_test = np.matrix([matrix[i] for i in test])
with open('recommendation_leap/threshold_50_60.txt', 'w') as f:
    for i in range(50):
        lr = joblib.load("logistic_models_top538/model" + str(i) + ".m")
        drug_vector = feature_vector(visit_mappings, sorted_drug_list[i - 1])
        y_test = [drug_vector[i] for i in test]
        probs = lr.predict_proba(X_test)
        y_score = list()
        for prob in probs:
            y_score.append(prob[1])
        fpr, tpr, thresholds = roc_curve(y_test, y_score, pos_label=1)
        index = binary_search(list(tpr), 0.60)
        selected_thresholds.append(thresholds[index])
        f.write('\nFPR:')
        for ele in fpr:
            f.write('%.4f; ' %(ele))
        f.write('\nTPR:')
        for ele in tpr:
            f.write('%.4f; ' %(ele))
        f.write('\nTHRESHOLD:')
        for ele in thresholds:
            f.write('%.4f; ' %(ele))
        f.write('\n-------------------------------------------------------\n')



In [57]:
# Write files
with open('recommendation_leap/thresholds_50_60.pkl', 'wb') as f:
        pickle.dump(selected_thresholds, f)