In [1]:
import numpy as np
import pandas as pd
import pickle
from lbs.dl.metrics import calc_sov
import sklearn.metrics
pd.options.display.float_format = '{:,.3f}'.format
import re

Using TensorFlow backend.


In [2]:
def get_benchmarks(true_column, pred_column):
    yt = []
    yp = []
    sovs = []
    for true, pred in zip(true_column.tolist(), pred_column.tolist()):
        for match in re.finditer(r"([%s]+)" % '1', true):
            sovs.append(calc_sov(true[match.start():match.end()], pred[match.start():match.end()]))
        for y_true, y_pred in zip(true, pred):
            yt.append(y_true)
            yp.append(y_pred)
    prec = sklearn.metrics.precision_score(yt, yp, pos_label='1')
    sens = sklearn.metrics.recall_score(yt, yp, pos_label='1')
    f1 = sklearn.metrics.f1_score(yt, yp, pos_label='1')
    sovs = np.asarray(sovs)
    return(prec, sens, f1, np.mean(sovs), (np.count_nonzero(sovs)/sovs.shape[0]))

In [3]:
mapping = {'deepcoil_assignment': 'DeepCoil', 'cchmmprof_assignment': 'CCHMM_PROF', 'marcoil_10_assignment':'Marcoil_10', 
           'marcoil_50_assignment':'Marcoil_50', 'marcoil_90_assignment':'Marcoil_90', 'pcoils_14_assignment': 'PCoils_14',
           'pcoils_21_assignment': 'PCoils_21', 'pcoils_28_assignment': 'PCoils_28'}

### Get results from cross-validation

In [4]:
df_cv = pickle.load(open('./../4_Train/out/seq/cv_results.p', 'rb'))

#### Update cchmm_prof results

In [5]:
del df_cv['cchmmprof_assignment']
del df_cv['cchmmprof_cc']
df_cchmm_new = pickle.load(open('./../1_Data_Preparation/out/pickle/cchmmprof_all_74.p', 'rb'))
df_cv = pd.concat([df_cv, df_cchmm_new], axis=1)
df_cv = df_cv[df_cv['cc'].notnull()]

In [6]:
results = {}
variants = ['deepcoil_assignment', 'cchmmprof_assignment', 'marcoil_10_assignment', 'marcoil_50_assignment',
            'marcoil_90_assignment', 'pcoils_14_assignment', 'pcoils_21_assignment', 'pcoils_28_assignment']
for variant in variants:
    results[mapping[variant]] = get_benchmarks(df_cv['socket_assignment'], df_cv[variant])

In [7]:
results_cv = pd.DataFrame.from_dict(results, orient='index')
results_cv.columns = ['Precision', 'Sensitivity', 'F1 score', 'Mean SOV', 'Detected segments']
results_cv.sort_index(inplace=True)

### Get results from test set

In [8]:
df_test = pickle.load(open('./../4_Train/out/seq/test_results.p', 'rb'))

#### Update cchmm_prof results

In [9]:
del df_test['cchmmprof_assignment']
del df_test['cchmmprof_cc']
df_cchmm_new = pickle.load(open('./../1_Data_Preparation/out/pickle/cchmmprof_all_74.p', 'rb'))
df_test = pd.concat([df_test, df_cchmm_new], axis=1)
df_test = df_test[df_test['cc'].notnull()]

In [10]:
results = {}
variants = ['deepcoil_assignment', 'cchmmprof_assignment', 'marcoil_10_assignment', 'marcoil_50_assignment',
            'marcoil_90_assignment', 'pcoils_14_assignment', 'pcoils_21_assignment', 'pcoils_28_assignment']
for variant in variants:
    results[mapping[variant]] = get_benchmarks(df_test['socket_assignment'], df_test[variant])

In [11]:
results_test = pd.DataFrame.from_dict(results, orient='index')
results_test.columns = ['Precision', 'Sensitivity', 'F1 score', 'Mean SOV', 'Detected segments']
results_test.sort_index(inplace=True)

### Show results

#### Cross-validation

In [12]:
results_cv

Unnamed: 0,Precision,Sensitivity,F1 score,Mean SOV,Detected segments
CCHMM_PROF,0.375,0.232,0.287,0.154,0.155
DeepCoil,0.384,0.499,0.434,0.447,0.585
Marcoil_10,0.303,0.377,0.336,0.28,0.303
Marcoil_50,0.415,0.267,0.325,0.181,0.198
Marcoil_90,0.499,0.179,0.263,0.113,0.125
PCoils_14,0.399,0.24,0.3,0.195,0.229
PCoils_21,0.397,0.351,0.373,0.27,0.29
PCoils_28,0.379,0.381,0.38,0.275,0.294


#### Test set

In [13]:
results_test

Unnamed: 0,Precision,Sensitivity,F1 score,Mean SOV,Detected segments
CCHMM_PROF,0.29,0.301,0.295,0.216,0.217
DeepCoil,0.388,0.488,0.432,0.428,0.523
Marcoil_10,0.306,0.414,0.352,0.306,0.333
Marcoil_50,0.394,0.275,0.324,0.192,0.213
Marcoil_90,0.473,0.17,0.251,0.107,0.122
PCoils_14,0.394,0.256,0.31,0.208,0.244
PCoils_21,0.386,0.376,0.381,0.285,0.302
PCoils_28,0.383,0.454,0.416,0.312,0.339
