In [1]:
%matplotlib inline
import sys
import outliers
import pandas as pd
import matplotlib.pyplot as plt
from outliers import ddc_outlier
from sklearn.preprocessing import normalize
from sklearn.metrics import f1_score
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
medications = outliers.getOverdoseMedications()
medications.shape

(72,)

In [3]:

#medications = [
#    'DROPERIDOL 2,5 mg/ml SOL INJ',
#    'ANLODIPINO 10 mg CP',
#]

metrics = ['cosine', 'euclidean', 'mahalanobis', 'hamming', 'jaccard', 
           'manhattan', 'braycurtis', 'canberra', 'correlation', 'sqeuclidean']

results = pd.DataFrame(
    np.zeros((len(metrics),len(medications))), 
    index=metrics, 
    columns=medications
)

ep_range = np.arange(0.1,1.5,0.03)

for med in np.asarray(medications):

    X, Y = outliers.getPrescriptions(med)
    #X_norm = normalize(X,norm='l2')
    print(med + ', Size: ', len(X), ' Overdose: ', len(Y[Y==1]))

    results_med = pd.DataFrame(
        np.zeros((len(metrics),len(ep_range))), 
        index=metrics, 
        columns=ep_range
    )
    
    f_scores = []
    
    for epsilon in ep_range:
        for metric in metrics:
            clf = ddc_outlier(alpha=epsilon,metric=metric)
            clf.fit(X)
            y_pred = clf.predict(X)

            y_pred[y_pred == 1] = 0
            y_pred[y_pred == -1] = 1

            f = f1_score(y_pred, Y)
            ep = epsilon.mean()
            
            results_med.loc[metric, str(ep)] = f
        
        sys.stdout.write(str(ep) +', ')
        
    print('')
    results[med] = results_med.max(1).values
    print('Best Metric: ', results[med].idxmax(), 'F1 = ', results[med].max())
    print('Euclidean: ', results.loc['euclidean',med],
          ', Cosine: ', results.loc['cosine',med],
          ', Hamming: ', results.loc['hamming',med],
          ', Jaccard: ', results.loc['jaccard',med])
    print('')
    
    #display(results[med])
    
results.mean(1)

ACETAZOLAMIDA 250 mg CP, Size:  190  Overdose:  45
0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  cosine F1 =  0.75
Euclidean:  0.386266094421 , Cosine:  0.75 , Hamming:  0.569620253165 , Jaccard:  0.285714285714

ACICLOVIR 200 mg CP, Size:  2359  Overdose:  52
0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  cosine F1 =  0.488262910798
Euclidean:  0.165605095541 , Cosine:  0.488262910798 , Hamming:  0.256790123457 , Jaccard:  0.310447761194

ALBENDAZOL 400 mg CP, Size:  2658  Overdose:  53
0.1, 0.13, 0.16,

0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  jaccard F1 =  0.65
Euclidean:  0.00922367409685 , Cosine:  0.0287610619469 , Hamming:  0.0467625899281 , Jaccard:  0.65

DIAZEPAM 10 mg CP, Size:  9020  Overdose:  52
0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  sqeuclidean F1 =  0.732673267327
Euclidean:  0.140468227425 , Cosine:  0.676923076923 , Hamming:  0.0941619585687 , Jaccard:  0.527918781726

DICLOFENACO 50 mg CP, Size:  5559  Overdose:  14
0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37,

HIDROXIDO DE MAGNESIO 8 % SUSP ORAL, Size:  1777  Overdose:  299
0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  cosine F1 =  0.430215827338
Euclidean:  0.315666860804 , Cosine:  0.430215827338 , Hamming:  0.421126760563 , Jaccard:  0.227146814404

HIOSCINA (BUTILBROMETO DE ESCOPOLAMINA) 20 mg/ml SOL INJ, Size:  22064  Overdose:  747
0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  hamming F1 =  0.910975609756
Euclidean:  0.562448304384 , Cosine:  0.668316831683 , Hamming:  0.910975609756 , Jaccard:  0.90600

0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  braycurtis F1 =  0.935483870968
Euclidean:  0.0969494358546 , Cosine:  0.180124223602 , Hamming:  0.216822429907 , Jaccard:  0.229702970297

PETIDINA (MEPERIDINA) 100 mg/ 2 ml SOL INJ, Size:  224  Overdose:  79
0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  correlation F1 =  0.969325153374
Euclidean:  0.752380952381 , Cosine:  0.649484536082 , Hamming:  0.752380952381 , Jaccard:  0.628571428571

POLIMIXINA B 500.000 UI SOL INJ, Size:  7257  Overdose:  439
0.1

VANCOMICINA 500 mg/ 100 ml SOL INJ SISTEMA FECHADO, Size:  388  Overdose:  116
0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  correlation F1 =  0.666666666667
Euclidean:  0.130120481928 , Cosine:  0.473469387755 , Hamming:  0.659090909091 , Jaccard:  0.315789473684

VARFARINA 5 mg CP, Size:  6393  Overdose:  121
0.1, 0.13, 0.16, 0.19, 0.22, 0.25, 0.28, 0.31, 0.34, 0.37, 0.4, 0.43, 0.46, 0.49, 0.52, 0.55, 0.58, 0.61, 0.64, 0.67, 0.7, 0.73, 0.76, 0.79, 0.82, 0.85, 0.88, 0.91, 0.94, 0.97, 1.0, 1.03, 1.06, 1.09, 1.12, 1.15, 1.18, 1.21, 1.24, 1.27, 1.3, 1.33, 1.36, 1.39, 1.42, 1.45, 1.48, 
Best Metric:  hamming F1 =  0.9453125
Euclidean:  0.0785185185185 , Cosine:  0.371929824561 , Hamming:  0.9453125 , Jaccard:  0.930769230769

VITAMINAS DO COMPLEXO

cosine         0.512310
euclidean      0.335551
mahalanobis    0.359594
hamming        0.415251
jaccard        0.566440
manhattan      0.319861
braycurtis     0.384670
canberra       0.371802
correlation    0.377044
sqeuclidean    0.368376
dtype: float64