Concordance between resfinder and resistance/susceptible status

In [161]:
# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

import os
import sys
import dotenv

from sklearn.externals import joblib

# add the 'src' directory as one where we can import modules
project_dir = os.path.join(os.getcwd(), os.pardir)
src_dir = os.path.join(project_dir, 'src')
sys.path.append(src_dir)

# import my method from the source code
%aimport data.tools

# load env
%load_ext dotenv
dotenv_path = os.path.join(project_dir, '.env')
dotenv.load_dotenv(dotenv_path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


True

In [162]:
# Load master Xlsx sheet
import pandas as pd
amrfile = os.path.join(os.environ.get('PRDATA'),'raw','GenotypicAMR_Master.xlsx')
amrdf = pd.read_excel(amrfile)

amrdf = amrdf.replace(r'\s+', np.nan, regex=True)
amrdf = amrdf.replace(r'-', np.nan, regex=True)

In [176]:
# Compute accuracy of a single gene predicting resistant/susceptible
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np

# Beta lactamases
bladf = amrdf.filter(regex=("bla*"))

y_true = amrdf['R_AMP'].as_matrix().astype(float)
y_true[y_true == 0.5] = 0 # Set intermediate to susceptible
missing = np.isnan(y_true)

# Compute accuracy for each individual beta-lactamase
for g in bladf:
    y_pred = bladf[g].as_matrix().astype(float)
    missing2 = np.isnan(y_pred)
    
    y_true2 = y_true[~missing & ~missing2]
    y_pred2 = y_pred[~missing & ~missing2]
    
    print()
    print("\n------------------------------\n"+g+":")
    print("accuracy: "+str(accuracy_score(y_true2,y_pred2)))
    print(classification_report(y_true2, y_pred2))
    tn, fp, fn, tp = confusion_matrix(y_true2, y_pred2).ravel()
    print("TN (susceptible): "+str(tn))
    print("FN (resistant predicted as susceptible): "+str(fn))
    print("FP (susceptible predicted as resistant): "+str(fp))
    print("TP (resistant): "+str(tp))
    




------------------------------
blaCARB-2|M69058:
accuracy: 0.6381811054488435
             precision    recall  f1-score   support

        0.0       0.63      1.00      0.78      1602
        1.0       0.96      0.03      0.06       949

avg / total       0.76      0.64      0.51      2551

TN (susceptible): 1601
FN (resistant predicted as susceptible): 922
FP (susceptible predicted as resistant): 1
TP (resistant): 27


------------------------------
blaCMY-17|AY513266:
accuracy: 0.6283810270482164
             precision    recall  f1-score   support

        0.0       0.63      1.00      0.77      1602
        1.0       1.00      0.00      0.00       949

avg / total       0.77      0.63      0.49      2551

TN (susceptible): 1602
FN (resistant predicted as susceptible): 948
FP (susceptible predicted as resistant): 0
TP (resistant): 1


------------------------------
blaCMY-2|X91840:
accuracy: 0.7840062720501764
             precision    recall  f1-score   support

        0.0     

TN (susceptible): 1602
FN (resistant predicted as susceptible): 917
FP (susceptible predicted as resistant): 0
TP (resistant): 32


------------------------------
blaTEM-1B|JF910132:
accuracy: 0.7745981967855743
             precision    recall  f1-score   support

        0.0       0.74      1.00      0.85      1602
        1.0       1.00      0.40      0.57       949

avg / total       0.83      0.77      0.74      2551

TN (susceptible): 1601
FN (resistant predicted as susceptible): 574
FP (susceptible predicted as resistant): 1
TP (resistant): 375


------------------------------
blaTEM-1C|FJ560503:
accuracy: 0.6342610740885927
             precision    recall  f1-score   support

        0.0       0.63      1.00      0.77      1602
        1.0       1.00      0.02      0.03       949

avg / total       0.77      0.63      0.50      2551

TN (susceptible): 1602
FN (resistant predicted as susceptible): 933
FP (susceptible predicted as resistant): 0
TP (resistant): 16


-------------

In [185]:
# Compute accuracy for any beta-lactamase
anybla = np.zeros(bladf.shape[0])
for g in bladf:
    y = bladf[g].as_matrix().astype(float)
    anybla = np.logical_or(anybla, y)

anybla = anybla.astype(float)
missing2 = np.isnan(anybla)



print('ALL beta-lactamase:')

for g in ['R_AMC', 'R_AMP', 'R_TIO', 'R_FOX', 'R_CRO']:
    y_true = amrdf[g].as_matrix().astype(float)
    y_true[y_true == 0.5] = 0 # Set intermediate to susceptible
    missing = np.isnan(y_true)
    y_true2 = y_true[~missing & ~missing2]
    y_pred = anybla[~missing & ~missing2]
    print()
    print("------------------------------\n"+g+":")
    print("accuracy: "+str(accuracy_score(y_true2,y_pred)))
    print(classification_report(y_true2, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_true2, y_pred).ravel()
    print("TN (susceptible): "+str(tn))
    print("FN (resistant predicted as susceptible): "+str(fn))
    print("FP (susceptible predicted as resistant): "+str(fp))
    print("TP (resistant): "+str(tp))
    


ALL beta-lactamase:

------------------------------
R_AMC:
accuracy: 0.8229984301412873
             precision    recall  f1-score   support

        0.0       0.99      0.79      0.88      2071
        1.0       0.51      0.98      0.67       477

avg / total       0.90      0.82      0.84      2548

TN (susceptible): 1629
FN (resistant predicted as susceptible): 9
FP (susceptible predicted as resistant): 442
TP (resistant): 468

------------------------------
R_AMP:
accuracy: 0.9843198745589965
             precision    recall  f1-score   support

        0.0       0.98      1.00      0.99      1602
        1.0       1.00      0.96      0.98       949

avg / total       0.98      0.98      0.98      2551

TN (susceptible): 1600
FN (resistant predicted as susceptible): 38
FP (susceptible predicted as resistant): 2
TP (resistant): 911

------------------------------
R_TIO:
accuracy: 0.8559794256322332
             precision    recall  f1-score   support

        0.0       1.00      0.8

In [443]:
# Since I don't know, use a decision tree to tell me which genes 
# and in what combinations are important for classifying resistance
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.feature_selection import SelectFromModel
import graphviz

accuracies = {}

genes = ["ARR-6|JF922883","QnrA1|AY070235","QnrB19|HM146784","QnrB1|EF682133",
         "QnrB20|AB379831","QnrB4|DQ303921","QnrB5|DQ303919","QnrB6|EF523819",
         "QnrS1|AB187515","QnrS2|JF261185","aac(3)-IIa|X51534","aac(3)-IId|EU022314",
         "aac(3)-IVa|X01385","aac(3)-Id|AB114632","aac(3)-VIa|M88012",
         "aac(6)-IIc|AF162771","aac(6)-Ib|M21682","aac(6)Ib-cr|DQ303918",
         "aac(6)Ib-cr|EF636461","aacA4|KM278199","aadA12|AY665771","aadA13|AY713504",
         "aadA16|EU675686","aadA1|JQ414041","aadA1|JQ480156","aadA1|JX185132",
         "aadA2|JQ364967","aadA5|AF137361","aadA7|AF224733","ant(2)-Ia|M86913",
         "ant(3)-Ia|X02340","aph(3)-IIa|X57709","aph(3)-Ia|V00359","aph(3)-Ib|AF321551",
         "aph(4)-Ia|V01499","aph(6)-Ic|X01702","aph(6)-Id|M28829","armA|AY220558",
         "blaCARB-2|M69058","blaCMY-17|AY513266","blaCMY-2|X91840","blaCMY-42|HM146927",
         "blaCMY-4|AF420597","blaCMY-6|AJ011293","blaCTX-M-14b|DQ359215","blaCTX-M-14|AF252622",
         "blaCTX-M-15|DQ302097","blaCTX-M-1|DQ915955","blaCTX-M-27|EU916273",
         "blaCTX-M-55|GQ456159","blaCTX-M-65|GQ456158","blaCTX-M-8|AF189721",
         "blaDHA-1|Y16410","blaFOX-5|AY007369","blaKPC-2|AY034847","blaKPC-3|HM769262",
         "blaKPC-4|FJ473382","blaNDM-1|FN396876","blaNDM-5|JN104597","blaNDM-6|JN967644",
         "blaNDM-7|JX262694","blaOXA-181|HM992946","blaOXA-1|J02967","blaOXA-2|DQ310703",
         "blaOXA-9|JF703130","blaSHV-105|FJ194944","blaSHV-12|AF462395","blaSHV-2|GU064394",
         "blaTEM-199|JX050178","blaTEM-1A|HM749966","blaTEM-1B|JF910132","blaTEM-1C|FJ560503",
         "blaTEM-1D|AF188200","blaTEM-206|KC783461","blaTEM-33|GU371926","blaTEM-52B|AF027199",
         "catA1|V00622","catB3|AJ009818","cmlA1|M64556","dfrA12|AB571791","dfrA14|DQ388123",
         "dfrA15|HM449019","dfrA17|FJ460238","dfrA18|AJ310778","dfrA1|JQ690541","dfrA1|X00926",
         "dfrA25|DQ267940","dfrA29|AM237806","dfrA5|X12868","dfrA7|JF806498","dfrA8|U10186",
         "dfrB3|X72585","erm(B)|AF242872","erm(B)|JN899585","floR|AF118107","fosA7|LAPJ01000014",
         "mph(A)|D16251","mph(A)|U36578","mph(B)|D85892","mph(E)|EU294228","msr(E)|EU294228",
         "oqxA|EU370913","oqxB|EU370913","rmtB|AB103506","rmtC|AB194779","strA|M96392","sul1|AY963803",
         "sul1|CP002151","sul2|FN995456","sul2|GQ421466","sul2|HQ840942","sul3|AJ459418","tet(A)|AJ517790",
         "tet(A)|AY196695","tet(B)|AF326777","tet(B)|AP000342","tet(C)|AY046276","tet(C)|NC_003213",
         "tet(D)|AF467077","tet(G)|AF133140","tet(M)|U58985","tet(O)|M18896"]

genedf = amrdf[genes]
bla = genedf.filter(regex='bla*')

for drug, i_d in [['R_AMC', 0.001], ['R_AMP', 0.001], ['R_TIO', 0.001], ['R_FOX', 0.001], ['R_CRO', 0.001], ['phenotypic_aztreonam', 0.01], 
    ['phenotypic_cefazolin', 0.001], ['phenotypic_cefepime', 0.02], ['phenotypic_cefotaxime', 0.002], ['phenotypic_ceftazidime', 0.01]]: 
    y = amrdf[drug]
    missing = y.isnull()

    X = bla.values[~missing,:].astype(int)
    y = y[~missing].astype(int)

    clf = DecisionTreeClassifier(min_impurity_decrease=i_d)
    clf.fit(X, y)

    # dot_data = export_graphviz(clf, out_file=None, 
    #     feature_names=bla.columns,  
    #     class_names=['sus','res'],  
    #     filled=True, rounded=True,  
    #     special_characters=True)
    # graph = graphviz.Source(dot_data)
    # graph

    importantblagenes = bla.columns.values[clf.feature_importances_ > np.median(clf.feature_importances_)]
    
    impbla = np.zeros(bla.shape[0])
    for g in importantblagenes:
        c = bladf[g].as_matrix().astype(float)
        impbla = np.logical_or(impbla, c)
    y_pred = impbla.astype(int)
    y_pred = y_pred[~missing]

    print("\nDRUG: "+drug)
    print("\n------------------------------\nDecisionTree Set {}\n".format(','.join(importantblagenes)))
    accuracies[drug] = accuracy_score(y,y_pred)
    print("accuracy: "+str(accuracies[drug]))
    print(classification_report(y, y_pred))
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    print("TN (susceptible): "+str(tn))
    print("FN (resistant predicted as susceptible): "+str(fn))
    print("FP (susceptible predicted as resistant): "+str(fp))
    print("TP (resistant): "+str(tp))



DRUG: R_AMC

------------------------------
DecisionTree Set blaCMY-2|X91840,blaCMY-42|HM146927,blaKPC-2|AY034847,blaKPC-3|HM769262,blaNDM-5|JN104597,blaOXA-1|J02967

accuracy: 0.967032967032967
             precision    recall  f1-score   support

          0       0.98      0.98      0.98      2071
          1       0.90      0.93      0.91       477

avg / total       0.97      0.97      0.97      2548

TN (susceptible): 2022
FN (resistant predicted as susceptible): 35
FP (susceptible predicted as resistant): 49
TP (resistant): 442

DRUG: R_AMP

------------------------------
DecisionTree Set blaCARB-2|M69058,blaCMY-2|X91840,blaCTX-M-14|AF252622,blaCTX-M-15|DQ302097,blaCTX-M-1|DQ915955,blaCTX-M-27|EU916273,blaCTX-M-55|GQ456159,blaKPC-2|AY034847,blaOXA-1|J02967,blaOXA-2|DQ310703,blaTEM-1A|HM749966,blaTEM-1B|JF910132,blaTEM-1C|FJ560503,blaTEM-33|GU371926

accuracy: 0.9815758526068209
             precision    recall  f1-score   support

          0       0.97      1.00      0.99     

In [444]:
genedf = amrdf[genes]
for drug in ['R_TET', 'R_AZM', 'R_CIP']: 
    y = amrdf[drug]
    missing = y.isnull()

    X = genedf.values[~missing,:].astype(int)
    y = y[~missing].astype(int)

    clf = DecisionTreeClassifier(min_impurity_decrease=0.001)
    clf.fit(X, y)

    importantgenes = genedf.columns.values[clf.feature_importances_ > np.median(clf.feature_importances_)]
    
    imp = np.zeros(genedf.shape[0])
    for g in importantgenes:
        c = genedf[g].as_matrix().astype(float)
        imp = np.logical_or(imp, c)
    y_pred = imp.astype(int)
    y_pred = y_pred[~missing]

    print("\nDRUG: "+drug)
    print("\n------------------------------\nDecisionTree Set {}\n".format(','.join(importantgenes)))
    accuracies[drug] = accuracy_score(y,y_pred)
    print("accuracy: "+str(accuracies[drug]))
    print(classification_report(y, y_pred))
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    print("TN (susceptible): "+str(tn))
    print("FN (resistant predicted as susceptible): "+str(fn))
    print("FP (susceptible predicted as resistant): "+str(fp))
    print("TP (resistant): "+str(tp))


DRUG: R_TET

------------------------------
DecisionTree Set aph(3)-Ib|AF321551,tet(A)|AJ517790,tet(A)|AY196695,tet(B)|AF326777,tet(B)|AP000342,tet(C)|AY046276,tet(C)|NC_003213,tet(D)|AF467077,tet(G)|AF133140

accuracy: 0.9849026618990863
             precision    recall  f1-score   support

          0       0.99      0.97      0.98      1052
          1       0.98      0.99      0.99      1465

avg / total       0.98      0.98      0.98      2517

TN (susceptible): 1025
FN (resistant predicted as susceptible): 11
FP (susceptible predicted as resistant): 27
TP (resistant): 1454

DRUG: R_AZM

------------------------------
DecisionTree Set mph(A)|U36578

accuracy: 0.9991126885536823
             precision    recall  f1-score   support

          0       1.00      1.00      1.00      2246
          1       1.00      0.75      0.86         8

avg / total       1.00      1.00      1.00      2254

TN (susceptible): 2246
FN (resistant predicted as susceptible): 2
FP (susceptible predicted 

In [445]:
genedf = amrdf[genes]
for drug,i_d in [['R_FIS', 0.01],['R_SXT', 0.01]]: 
    y = amrdf[drug]
    missing = y.isnull()

    X = genedf.values[~missing,:].astype(int)
    y = y[~missing].astype(int)

    clf = DecisionTreeClassifier(min_impurity_decrease=i_d)
    clf.fit(X, y)

    importantgenes = genedf.columns.values[clf.feature_importances_ > np.median(clf.feature_importances_)]
    
    imp = np.zeros(genedf.shape[0])
    for g in importantgenes:
        c = genedf[g].as_matrix().astype(float)
        imp = np.logical_or(imp, c)
    y_pred = imp.astype(int)
    y_pred = y_pred[~missing]

    print("\nDRUG: "+drug)
    print("\n------------------------------\nDecisionTree Set {}\n".format(','.join(importantgenes)))
    accuracies[drug] = accuracy_score(y,y_pred)
    print("accuracy: "+str(accuracies[drug]))
    print(classification_report(y, y_pred))
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    print("TN (susceptible): "+str(tn))
    print("FN (resistant predicted as susceptible): "+str(fn))
    print("FP (susceptible predicted as resistant): "+str(fp))
    print("TP (resistant): "+str(tp))


DRUG: R_FIS

------------------------------
DecisionTree Set aadA2|JQ364967,dfrA17|FJ460238,mph(A)|U36578,sul1|CP002151,sul2|GQ421466,sul2|HQ840942

accuracy: 0.9275078369905956
             precision    recall  f1-score   support

          0       0.99      0.90      0.94      1721
          1       0.83      0.98      0.90       831

avg / total       0.94      0.93      0.93      2552

TN (susceptible): 1552
FN (resistant predicted as susceptible): 16
FP (susceptible predicted as resistant): 169
TP (resistant): 815

DRUG: R_SXT

------------------------------
DecisionTree Set dfrA12|AB571791

accuracy: 0.7351050150021432
             precision    recall  f1-score   support

          0       0.73      1.00      0.85      1691
          1       1.00      0.04      0.07       642

avg / total       0.81      0.74      0.63      2333

TN (susceptible): 1691
FN (resistant predicted as susceptible): 618
FP (susceptible predicted as resistant): 0
TP (resistant): 24


In [446]:
genedf = amrdf[genes]
# These are determined by chromosomal mutations in the "Quinolone resistance-determining regions"
for drug,i_d in [['R_CIP', 0.01],['R_NAL', 0.01], ['phenotypic_levofloxacin',0.015]]: 
    y = amrdf[drug]
    missing = y.isnull()

    X = genedf.values[~missing,:].astype(int)
    y = y[~missing].astype(int)

    clf = DecisionTreeClassifier(min_impurity_decrease=i_d)
    clf.fit(X, y)

    importantgenes = genedf.columns.values[clf.feature_importances_ > np.median(clf.feature_importances_)]
    
    imp = np.zeros(genedf.shape[0])
    for g in importantgenes:
        c = genedf[g].as_matrix().astype(float)
        imp = np.logical_or(imp, c)
    y_pred = imp.astype(int)
    y_pred = y_pred[~missing]

    print("\nDRUG: "+drug)
    print("\n------------------------------\nDecisionTree Set {}\n".format(','.join(importantgenes)))
    accuracies[drug] = accuracy_score(y,y_pred)
    print("accuracy: "+str(accuracies[drug]))
    print(classification_report(y, y_pred))
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    print("TN (susceptible): "+str(tn))
    print("FN (resistant predicted as susceptible): "+str(fn))
    print("FP (susceptible predicted as resistant): "+str(fp))
    print("TP (resistant): "+str(tp))


DRUG: R_CIP

------------------------------
DecisionTree Set blaCTX-M-15|DQ302097,dfrA17|FJ460238

accuracy: 0.9721786833855799
             precision    recall  f1-score   support

          0       0.98      0.99      0.99      2351
          1       0.92      0.71      0.80       201

avg / total       0.97      0.97      0.97      2552

TN (susceptible): 2338
FN (resistant predicted as susceptible): 58
FP (susceptible predicted as resistant): 13
TP (resistant): 143

DRUG: R_NAL

------------------------------
DecisionTree Set 

accuracy: 0.9802828975567939
             precision    recall  f1-score   support

          0       0.98      1.00      0.99      2287
          1       0.00      0.00      0.00        46

avg / total       0.96      0.98      0.97      2333

TN (susceptible): 2287
FN (resistant predicted as susceptible): 46
FP (susceptible predicted as resistant): 0
TP (resistant): 0

DRUG: phenotypic_levofloxacin

------------------------------
DecisionTree Set aac(6)Ib-

  'precision', 'predicted', average, warn_for)


In [428]:
genedf = amrdf[genes]
for drug,i_d in [['R_GEN', 0.01], ['phenotypic_streptomycin',0.008], ['phenotypic_kanamycin',0.008]]: 
    y = amrdf[drug]
    missing = y.isnull()

    X = genedf.values[~missing,:].astype(int)
    y = y[~missing].astype(int)

    clf = DecisionTreeClassifier(min_impurity_decrease=i_d)
    clf.fit(X, y)

    importantgenes = genedf.columns.values[clf.feature_importances_ > np.median(clf.feature_importances_)]
    
    imp = np.zeros(genedf.shape[0])
    for g in importantgenes:
        c = genedf[g].as_matrix().astype(float)
        imp = np.logical_or(imp, c)
    y_pred = imp.astype(int)
    y_pred = y_pred[~missing]

    print("\nDRUG: "+drug)
    print("\n------------------------------\nDecisionTree Set {}\n".format(','.join(importantgenes)))
    print("accuracy: "+str(accuracy_score(y,y_pred)))
    print(classification_report(y, y_pred))
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    print("TN (susceptible): "+str(tn))
    print("FN (resistant predicted as susceptible): "+str(fn))
    print("FP (susceptible predicted as resistant): "+str(fp))
    print("TP (resistant): "+str(tp))


DRUG: R_GEN

------------------------------
DecisionTree Set aac(3)-IIa|X51534,aac(3)-IId|EU022314,aac(3)-VIa|M88012,ant(2)-Ia|M86913,ant(3)-Ia|X02340

accuracy: 0.9760971786833855
             precision    recall  f1-score   support

          0       0.99      0.99      0.99      2210
          1       0.91      0.92      0.91       342

avg / total       0.98      0.98      0.98      2552

TN (susceptible): 2178
FN (resistant predicted as susceptible): 29
FP (susceptible predicted as resistant): 32
TP (resistant): 313

DRUG: phenotypic_streptomycin

------------------------------
DecisionTree Set aac(6)-Ib|M21682,aadA2|JQ364967,ant(3)-Ia|X02340,aph(6)-Id|M28829,strA|M96392

accuracy: 0.9480796586059744
             precision    recall  f1-score   support

          0       0.97      0.94      0.96       861
          1       0.92      0.95      0.93       545

avg / total       0.95      0.95      0.95      1406

TN (susceptible): 813
FN (resistant predicted as susceptible): 25
FP 

In [447]:
genedf = amrdf[genes]
for drug,i_d in [['phenotypic_ertapenem', 0.01], ['phenotypic_meropenem',0.008], ['phenotypic_doripenem',0.01]]: 
    y = amrdf[drug]
    missing = y.isnull()

    X = genedf.values[~missing,:].astype(int)
    y = y[~missing].astype(int)

    clf = DecisionTreeClassifier(min_impurity_decrease=i_d)
    clf.fit(X, y)

    importantgenes = genedf.columns.values[clf.feature_importances_ > np.median(clf.feature_importances_)]
    
    imp = np.zeros(genedf.shape[0])
    for g in importantgenes:
        c = genedf[g].as_matrix().astype(float)
        imp = np.logical_or(imp, c)
    y_pred = imp.astype(int)
    y_pred = y_pred[~missing]

    print("\nDRUG: "+drug)
    print("\n------------------------------\nDecisionTree Set {}\n".format(','.join(importantgenes)))
    accuracies[drug] = accuracy_score(y,y_pred)
    print("accuracy: "+str(accuracies[drug]))
    print(classification_report(y, y_pred))
    tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
    print("TN (susceptible): "+str(tn))
    print("FN (resistant predicted as susceptible): "+str(fn))
    print("FP (susceptible predicted as resistant): "+str(fp))
    print("TP (resistant): "+str(tp))


DRUG: phenotypic_ertapenem

------------------------------
DecisionTree Set blaKPC-2|AY034847,blaKPC-3|HM769262,blaNDM-5|JN104597,strA|M96392

accuracy: 0.8935185185185185
             precision    recall  f1-score   support

          0       0.89      0.99      0.94       177
          1       0.90      0.46      0.61        39

avg / total       0.89      0.89      0.88       216

TN (susceptible): 175
FN (resistant predicted as susceptible): 21
FP (susceptible predicted as resistant): 2
TP (resistant): 18

DRUG: phenotypic_meropenem

------------------------------
DecisionTree Set blaNDM-5|JN104597,blaNDM-6|JN967644,blaNDM-7|JX262694,cmlA1|M64556,strA|M96392,tet(A)|AY196695

accuracy: 0.9315068493150684
             precision    recall  f1-score   support

          0       0.95      0.98      0.96       194
          1       0.78      0.56      0.65        25

avg / total       0.93      0.93      0.93       219

TN (susceptible): 190
FN (resistant predicted as susceptible): 11
F

In [385]:
pdf = amrdf.filter(regex="phenotypic_*")
pdf.sum(axis=0)


phenotypic_amikacin                            13.0
phenotypic_amoxicillin-clavulanic acid        364.0
phenotypic_ampicillin                         707.0
phenotypic_ampicillin-sulbactam                49.0
phenotypic_azithromycin                         1.0
phenotypic_aztreonam                           92.0
phenotypic_cefalexin                            1.0
phenotypic_cefazolin                          155.0
phenotypic_cefepime                            79.0
phenotypic_cefotaxime                          49.0
phenotypic_cefotaxime-clavulanic acid           1.0
phenotypic_cefotetan                            0.0
phenotypic_cefoxitin                          294.0
phenotypic_ceftazidime                        115.0
phenotypic_ceftazidime-clavulanic acid          0.0
phenotypic_ceftiofur                          276.0
phenotypic_ceftriaxone                        479.0
phenotypic_chloramphenicol                     94.0
phenotypic_ciprofloxacin                      180.0
phenotypic_c

In [472]:
for key, value in sorted(accuracies.items(), key=lambda item: (item[1],item[0])):
    key=key.replace('phenotypic_','')
    key=key.replace('R_','')
    print("{key: <12} {value:10.4f}".format(key=key,value=value))

levofloxacin     0.5982
cefepime         0.6326
doripenem        0.6667
ceftazidime      0.7143
SXT              0.7351
cefotaxime       0.8361
aztreonam        0.8584
ertapenem        0.8935
FIS              0.9275
cefazolin        0.9281
meropenem        0.9315
FOX              0.9500
AMC              0.9670
CIP              0.9722
NAL              0.9803
AMP              0.9816
TET              0.9849
CRO              0.9898
TIO              0.9923
AZM              0.9991
