# Load Libraries

In [1]:
from sklearn.metrics import matthews_corrcoef, confusion_matrix
from src.helper_functions import load_data, get_model_perfs, save_model_perfs

Using TensorFlow backend.





# Load Performance Metrics

In [2]:
feature_names = ['NR.AhR', 'NR.AR', 'NR.AR.LBD', 'NR.Aromatase', 'NR.ER', 'NR.ER.LBD',\
                 'NR.PPAR.gamma', 'SR.ARE', 'SR.ATAD5', 'SR.HSE', 'SR.MMP', 'SR.p53']

In [3]:
nr_ahr = get_model_perfs(feature_names[0])

# Define New mcc Function

In [4]:
def mcc(confusion_matrix):
    """Calculates the Matthews Correlation Coefficient from a binary classifier confusion matrix.
    -----
    input:
      confusion_matrix: array-like confusion matrix for a binary classifier: [[tp, fp],[fn, tn]]
    returns: scalar - the Matthews Correlation Coefficient value.
    """
    tp, fp = confusion_matrix[0]
    fn, tn = confusion_matrix[1]
    return (tp*tn-fp*fn)/((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))**0.5

# Confirm mcc Calculates Correctly

In [5]:
y_true=[1,1,1,-1]
y_pred=[1,-1,1,1]
assert mcc(confusion_matrix(y_true, y_pred))==matthews_corrcoef(y_true, y_pred)

# Calculate mcc For All Models in DataFrame
Working out procedure that will be used below.

In [6]:
nr_ahr['mcc'] = nr_ahr['confusion_matrix'].apply(mcc)
# Put mcc after auc_roc if it is at the end, otherwise leave it as is:
if nr_ahr.columns.tolist().index('mcc') == len(nr_ahr.columns)-1:
    iauc_roc = nr_ahr.columns.tolist().index('auc_roc') + 1
    cols=nr_ahr.columns.tolist()
    cols=cols[:iauc_roc]+cols[-1:]+cols[iauc_roc:-1]
else:
    cols=nr_ahr.columns.tolist()
nr_ahr = nr_ahr[cols]
nr_ahr

Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.911475,0.756757,0.383562,0.509091,0.905028,0.498772,0.602102,"[[528, 9], [45, 28]]",RF0.joblib
1,RF_modT,0.235,0.842623,0.424837,0.890411,0.575221,0.905028,0.544005,0.602102,"[[449, 88], [8, 65]]",RF0.joblib
2,DNN,0.5,0.814754,0.364865,0.739726,0.488688,0.864723,0.427562,0.524576,"[[443, 94], [19, 54]]",DNN0.h5
3,DNN_modT,0.773516,0.901639,0.603175,0.520548,0.558824,0.864723,0.505541,0.524576,"[[512, 25], [35, 38]]",DNN0.h5
4,DNN,0.5,0.895082,0.561644,0.561644,0.561644,0.860935,0.502054,0.495738,"[[505, 32], [32, 41]]",DNN2.h5
5,DNN_modT,0.488593,0.893443,0.551282,0.589041,0.569536,0.860935,0.509172,0.495738,"[[502, 35], [30, 43]]",DNN2.h5
6,DNN,0.5,0.780328,0.323699,0.767123,0.455285,0.86795,0.395505,0.573394,"[[420, 117], [17, 56]]",DNN3.h5
7,DNN_modT,0.600385,0.885246,0.516854,0.630137,0.567901,0.86795,0.505763,0.573394,"[[494, 43], [27, 46]]",DNN3.h5
8,DNN,0.5,0.872131,0.474747,0.643836,0.546512,0.871126,0.481514,0.553833,"[[485, 52], [26, 47]]",DNN4.h5
9,DNN_modT,0.496647,0.87541,0.485714,0.69863,0.573034,0.871126,0.514235,0.553833,"[[483, 54], [22, 51]]",DNN4.h5


# Add mcc To All Target Performance Tables

In [7]:
for target in feature_names:
    df = get_model_perfs(target)
    df['mcc'] = df['confusion_matrix'].apply(mcc)
    df = df[cols]
    # Confirm this code correctly modifies all target performance tables:
#     print(target)
#     display(df)
    save_model_perfs(target,df)

In [8]:
for target in feature_names:
    print(target)
    display(get_model_perfs(target))

NR.AhR


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.911475,0.756757,0.383562,0.509091,0.905028,0.498772,0.602102,"[[528, 9], [45, 28]]",RF0.joblib
1,RF_modT,0.235,0.842623,0.424837,0.890411,0.575221,0.905028,0.544005,0.602102,"[[449, 88], [8, 65]]",RF0.joblib
2,DNN,0.5,0.814754,0.364865,0.739726,0.488688,0.864723,0.427562,0.524576,"[[443, 94], [19, 54]]",DNN0.h5
3,DNN_modT,0.773516,0.901639,0.603175,0.520548,0.558824,0.864723,0.505541,0.524576,"[[512, 25], [35, 38]]",DNN0.h5
4,DNN,0.5,0.895082,0.561644,0.561644,0.561644,0.860935,0.502054,0.495738,"[[505, 32], [32, 41]]",DNN2.h5
5,DNN_modT,0.488593,0.893443,0.551282,0.589041,0.569536,0.860935,0.509172,0.495738,"[[502, 35], [30, 43]]",DNN2.h5
6,DNN,0.5,0.780328,0.323699,0.767123,0.455285,0.86795,0.395505,0.573394,"[[420, 117], [17, 56]]",DNN3.h5
7,DNN_modT,0.600385,0.885246,0.516854,0.630137,0.567901,0.86795,0.505763,0.573394,"[[494, 43], [27, 46]]",DNN3.h5
8,DNN,0.5,0.872131,0.474747,0.643836,0.546512,0.871126,0.481514,0.553833,"[[485, 52], [26, 47]]",DNN4.h5
9,DNN_modT,0.496647,0.87541,0.485714,0.69863,0.573034,0.871126,0.514235,0.553833,"[[483, 54], [22, 51]]",DNN4.h5


NR.AR


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.981229,0.666667,0.166667,0.266667,0.678934,0.327293,0.232723,"[[573, 1], [10, 2]]",RF0.joblib
1,RF_modT,0.73,0.982935,1.0,0.166667,0.285714,0.678934,0.404738,0.232723,"[[574, 0], [10, 2]]",RF0.joblib
2,DNN,0.5,0.721843,0.042424,0.583333,0.079096,0.741507,0.09701,0.168663,"[[416, 158], [5, 7]]",DNN0.h5
3,DNN_modT,0.792447,0.96587,0.25,0.333333,0.285714,0.741507,0.271518,0.168663,"[[562, 12], [8, 4]]",DNN0.h5


NR.AR.LBD


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.982818,0.0,0.0,0.0,0.763284,-0.006933,0.060344,"[[572, 2], [8, 0]]",RF0.joblib
1,RF_modT,0.27,0.979381,0.166667,0.125,0.142857,0.763284,0.134046,0.060344,"[[569, 5], [7, 1]]",RF0.joblib
2,DNN,0.5,0.738832,0.02,0.375,0.037975,0.635126,0.031652,0.030153,"[[427, 147], [5, 3]]",DNN0.h5
3,DNN_modT,0.762549,0.929553,0.054054,0.25,0.088889,0.635126,0.090203,0.030153,"[[539, 35], [6, 2]]",DNN0.h5


NR.Aromatase


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.92803,1.0,0.025641,0.05,0.771931,0.154247,0.311884,"[[489, 0], [38, 1]]",RF0.joblib
1,RF_modT,0.13,0.840909,0.235294,0.512821,0.322581,0.771931,0.270359,0.311884,"[[424, 65], [19, 20]]",RF0.joblib
2,DNN,0.5,0.772727,0.159664,0.487179,0.240506,0.718158,0.176949,0.143086,"[[389, 100], [20, 19]]",DNN0.h5
3,DNN_modT,0.475931,0.767045,0.166667,0.538462,0.254545,0.718158,0.198647,0.143086,"[[384, 105], [18, 21]]",DNN0.h5


NR.ER


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.916667,0.785714,0.215686,0.338462,0.769007,0.384352,0.445565,"[[462, 3], [40, 11]]",RF0.joblib
1,RF_modT,0.4,0.912791,0.607143,0.333333,0.43038,0.769007,0.407974,0.445565,"[[454, 11], [34, 17]]",RF0.joblib
2,DNN,0.5,0.748062,0.203008,0.529412,0.293478,0.736369,0.205688,0.280064,"[[359, 106], [24, 27]]",DNN0.h5
3,DNN_modT,0.625295,0.815891,0.270833,0.509804,0.353741,0.736369,0.275529,0.280064,"[[395, 70], [25, 26]]",DNN0.h5


NR.ER.LBD


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.971667,1.0,0.15,0.26087,0.750345,0.381744,0.268778,"[[580, 0], [17, 3]]",RF0.joblib
1,RF_modT,0.305,0.965,0.461538,0.3,0.363636,0.750345,0.354999,0.268778,"[[573, 7], [14, 6]]",RF0.joblib
2,DNN,0.5,0.725,0.049689,0.4,0.088398,0.678319,0.05518,0.069591,"[[427, 153], [12, 8]]",DNN0.h5
3,DNN_modT,0.74828,0.915,0.102564,0.2,0.135593,0.678319,0.101689,0.069591,"[[545, 35], [16, 4]]",DNN0.h5


NR.PPAR.gamma


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.947107,0.0,0.0,0.0,0.709846,-0.009456,0.129989,"[[573, 1], [31, 0]]",RF0.joblib
1,RF_modT,0.13,0.890909,0.181818,0.322581,0.232558,0.709846,0.18728,0.129989,"[[529, 45], [21, 10]]",RF0.joblib
2,DNN,0.5,0.750413,0.115385,0.580645,0.192513,0.74039,0.171483,0.10955,"[[436, 138], [13, 18]]",DNN0.h5
3,DNN_modT,0.577705,0.798347,0.130081,0.516129,0.207792,0.74039,0.180636,0.10955,"[[467, 107], [15, 16]]",DNN0.h5


SR.ARE


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.841441,0.592593,0.172043,0.266667,0.780838,0.257342,0.430711,"[[451, 11], [77, 16]]",RF0.joblib
1,RF_modT,0.34,0.823423,0.47619,0.537634,0.505051,0.780838,0.39916,0.430711,"[[407, 55], [43, 50]]",RF0.joblib
2,DNN,0.5,0.762162,0.355556,0.516129,0.421053,0.7055,0.285366,0.347718,"[[375, 87], [45, 48]]",DNN0.h5
3,DNN_modT,0.457539,0.753153,0.353333,0.569892,0.436214,0.7055,0.302701,0.347718,"[[365, 97], [40, 53]]",DNN0.h5


SR.ATAD5


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.938907,0.0,0.0,0.0,0.795625,,0.427828,"[[584, 0], [38, 0]]",RF0.joblib
1,RF_modT,0.255,0.948553,0.636364,0.368421,0.466667,0.795625,0.459939,0.427828,"[[576, 8], [24, 14]]",RF0.joblib
2,DNN,0.5,0.736334,0.125,0.552632,0.203883,0.711608,0.162318,0.153415,"[[437, 147], [17, 21]]",DNN0.h5
3,DNN_modT,0.577705,0.789389,0.155556,0.552632,0.242775,0.711608,0.20766,0.153415,"[[470, 114], [17, 21]]",DNN0.h5


SR.HSE


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.965574,0.6,0.136364,0.222222,0.759586,0.274958,0.239563,"[[586, 2], [19, 3]]",RF0.joblib
1,RF_modT,0.34,0.965574,0.545455,0.272727,0.363636,0.759586,0.370222,0.239563,"[[583, 5], [16, 6]]",RF0.joblib
2,DNN,0.5,0.72623,0.055215,0.409091,0.097297,0.626739,0.062018,0.054621,"[[434, 154], [13, 9]]",DNN0.h5
3,DNN_modT,0.450345,0.708197,0.066667,0.545455,0.118812,0.626739,0.106186,0.054621,"[[420, 168], [10, 12]]",DNN0.h5


SR.MMP


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.906077,0.615385,0.4,0.484848,0.930331,0.447984,0.560184,"[[468, 15], [36, 24]]",RF0.joblib
1,RF_modT,0.3825,0.913444,0.6,0.65,0.624,0.930331,0.575769,0.560184,"[[457, 26], [21, 39]]",RF0.joblib
2,DNN,0.5,0.815838,0.330508,0.65,0.438202,0.803882,0.369779,0.296565,"[[404, 79], [21, 39]]",DNN0.h5
3,DNN_modT,0.505391,0.81768,0.333333,0.65,0.440678,0.803882,0.372498,0.296565,"[[405, 78], [21, 39]]",DNN0.h5


SR.p53


Unnamed: 0,model,threshold,accuracy,precision,recall,f1,auc_roc,mcc,avg_precision,confusion_matrix,model_filename
0,RF,0.5,0.933442,0.5,0.02439,0.046512,0.806702,0.099247,0.22199,"[[574, 1], [40, 1]]",RF0.joblib
1,RF_modT,0.22,0.886364,0.254237,0.365854,0.3,0.806702,0.245058,0.22199,"[[531, 44], [26, 15]]",RF0.joblib
2,DNN,0.5,0.753247,0.150943,0.585366,0.24,0.768696,0.199692,0.168234,"[[440, 135], [17, 24]]",DNN0.h5
3,DNN_modT,0.540362,0.780844,0.169014,0.585366,0.262295,0.768696,0.224981,0.168234,"[[457, 118], [17, 24]]",DNN0.h5
