In [1]:
import numpy as np
import pandas as pd
import pickle
from lbs.dl.metrics import calc_sov
import sklearn.metrics
pd.options.display.float_format = '{:,.3f}'.format
import re

Using TensorFlow backend.


In [2]:
def get_benchmarks(true_column, pred_column):
    yt = []
    yp = []
    sovs = []
    for true, pred in zip(true_column.tolist(), pred_column.tolist()):
        for match in re.finditer(r"([%s]+)" % '1', true):
            sovs.append(calc_sov(true[match.start():match.end()], pred[match.start():match.end()]))
        for y_true, y_pred in zip(true, pred):
            yt.append(y_true)
            yp.append(y_pred)
    prec = sklearn.metrics.precision_score(yt, yp, pos_label='1')
    sens = sklearn.metrics.recall_score(yt, yp, pos_label='1')
    f1 = sklearn.metrics.f1_score(yt, yp, pos_label='1')
    sovs = np.asarray(sovs)
    return(prec, sens, f1, np.mean(sovs), (np.count_nonzero(sovs)/sovs.shape[0]))

In [3]:
mapping = {'deepcoil_assignment': 'DeepCoil', 'cchmmprof_assignment': 'CCHMM_PROF', 'marcoil_10_assignment':'Marcoil_10', 
           'marcoil_50_assignment':'Marcoil_50', 'marcoil_90_assignment':'Marcoil_90', 'pcoils_14_assignment': 'PCoils_14',
           'pcoils_21_assignment': 'PCoils_21', 'pcoils_28_assignment': 'PCoils_28'}

### Get results from cross-validation

In [4]:
df_cv = pickle.load(open('./../4_Train/out/seq/cv_results.p', 'rb'))

In [5]:
results = {}
variants = ['deepcoil_assignment', 'cchmmprof_assignment', 'marcoil_10_assignment', 'marcoil_50_assignment',
            'marcoil_90_assignment', 'pcoils_14_assignment', 'pcoils_21_assignment', 'pcoils_28_assignment']
for variant in variants:
    results[mapping[variant]] = get_benchmarks(df_cv['socket_assignment'], df_cv[variant])

In [6]:
results_cv = pd.DataFrame.from_dict(results, orient='index')
results_cv.columns = ['Precision', 'Sensitivity', 'F1 score', 'Mean SOV', 'Detected segments']
results_cv.sort_index(inplace=True)

### Get results from test set

In [7]:
df_test = pickle.load(open('./../4_Train/out/seq/test_results.p', 'rb'))

In [8]:
results = {}
variants = ['deepcoil_assignment', 'cchmmprof_assignment', 'marcoil_10_assignment', 'marcoil_50_assignment',
            'marcoil_90_assignment', 'pcoils_14_assignment', 'pcoils_21_assignment', 'pcoils_28_assignment']
for variant in variants:
    results[mapping[variant]] = get_benchmarks(df_test['socket_assignment'], df_test[variant])

In [9]:
results_test = pd.DataFrame.from_dict(results, orient='index')
results_test.columns = ['Precision', 'Sensitivity', 'F1 score', 'Mean SOV', 'Detected segments']
results_test.sort_index(inplace=True)

### Show results

#### Cross-validation

In [10]:
results_cv

Unnamed: 0,Precision,Sensitivity,F1 score,Mean SOV,Detected segments
CCHMM_PROF,0.094,0.807,0.168,0.773,0.815
DeepCoil,0.38,0.497,0.43,0.427,0.567
Marcoil_10,0.285,0.396,0.331,0.288,0.31
Marcoil_50,0.395,0.282,0.329,0.188,0.203
Marcoil_90,0.481,0.189,0.271,0.117,0.127
PCoils_14,0.377,0.251,0.301,0.201,0.235
PCoils_21,0.374,0.368,0.371,0.278,0.296
PCoils_28,0.359,0.402,0.38,0.284,0.302


#### Test set

In [11]:
results_test

Unnamed: 0,Precision,Sensitivity,F1 score,Mean SOV,Detected segments
CCHMM_PROF,0.078,0.828,0.142,0.803,0.836
DeepCoil,0.383,0.455,0.416,0.414,0.503
Marcoil_10,0.285,0.383,0.327,0.285,0.3
Marcoil_50,0.379,0.259,0.307,0.173,0.19
Marcoil_90,0.452,0.162,0.238,0.105,0.116
PCoils_14,0.351,0.237,0.283,0.189,0.23
PCoils_21,0.35,0.357,0.354,0.27,0.294
PCoils_28,0.343,0.404,0.371,0.281,0.306
