In [1]:
import pandas as pd
import numpy as np
from deepblast.trainer import LightningAligner
import matplotlib.pyplot as plt
import seaborn as sns
from deepblast.dataset.utils import states2edges
from deepblast.score import (alignment_score, score_local_identity, 
                             roc_edges_kernel_identity, alignment_score_kernel,
                             score_global_alignment, score_local_alignment
                            )
from scipy.stats import sem
%matplotlib inline

In [2]:
!ls -lhrt ../lightning_logs/version_174156/checkpoints

total 590M
-rw-r--r-- 1 juermieboop juermieboop 590M Aug 18 12:15 'epoch=6.ckpt'


TODO: Have the pretrained model be downloadable

TODO: Have the malisam and malidup datasets downloadable as well

In [3]:
#ckpt_path = '../../deep_blast_training/lightning_logs/version_174156/checkpoints/epoch=6.ckpt'
ckpt_path = '../../deep_blast_training/lightning_logs/version_174137/checkpoints/epoch=9.ckpt'

model = LightningAligner.load_from_checkpoint(ckpt_path).cuda()

In [4]:
from deepblast.dataset.parse_mali import read_mali, read_mali_mammoth

import seaborn as sns
from deepblast.dataset.utils import states2matrix
from deepblast.dataset.utils import state_f, tmstate_f, revstate_f
from deepblast.score import alignment_score_kernel

In [5]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment
from Bio.SubsMat import MatrixInfo as matlist
from deepblast.dataset.utils import state_f
matrix = matlist.blosum62
def nw_f(x, y):
    alignments = pairwise2.align.globaldx(x, y, matrix)
    states = list(map(state_f, zip(list(alignments[0].seqA), list(alignments[0].seqB))))
    states = ''.join(list(map(revstate_f, states)))
    return states

In [6]:
malidup_root = '../data/structure_benchmarks/malidup'
malisam_root = '../data/structure_benchmarks/malisam'

mammoth_dup_root = '../data/structure_benchmarks/mammoth/malidup'
mammoth_sam_root = '../data/structure_benchmarks/mammoth/malisam'

mali_root = malisam_root
mali_mammoth = mammoth_sam_root
benchmark = 'malisam'

# Malidup / Malisam benchmark

In [7]:
# read in manual and Mammoth
mammoth = read_mali_mammoth(mali_mammoth, report_ids=True)
manual = read_mali(mali_root, tool='manual', report_ids=True)
res = pd.merge(manual, mammoth, left_on='dir', right_on='pdb')
res = res[['0_x', '1_x', '2_x', '2_y']]
res = res.rename(columns={'0_x' : 0, '1_x' : 1, '2_x' : 'manual', '2_y': 'mammoth'})
res = res[~res.set_index([0, 1]).index.duplicated(keep='first')]
res = res.set_index([0, 1])

# read in TMalign, Fast and Dali 
fast   = read_mali(mali_root, tool='fast')
tm     = read_mali(mali_root, tool='tm')
dali   = read_mali(mali_root, tool='dali')
fast = fast[~fast.set_index([0, 1]).index.duplicated(keep='first')]
tm = tm[~tm.set_index([0, 1]).index.duplicated(keep='first')]
dali = dali[~dali.set_index([0, 1]).index.duplicated(keep='first')]

# build multi-indexes
tm_ = tm.set_index([0, 1]).rename(columns={2: 'tm'})
fast_ = fast.set_index([0, 1]).rename(columns={2: 'fast'})
dali_ = dali.set_index([0, 1]).rename(columns={2: 'dali'})

# merge together
res = pd.merge(res, fast_, left_index=True, right_index=True)
res = pd.merge(res, tm_, left_index=True, right_index=True)
res = pd.merge(res, dali_, left_index=True, right_index=True)
#res = rename ['fast', 'tm', 'dali']

# Needleman-wunsch and deepblast
nw     = res.reset_index().apply(lambda x: nw_f(x[0], x[1]), axis=1)
dp     = res.reset_index().apply(lambda x: model.align(x[1], x[0]), axis=1)
res['needleman-wunsch'] = nw.values
res['deepblast'] = dp.values

In [14]:
# Saving the alignment
manual = manual.set_index([0, 1])
pd.merge(manual[['pdb', 'dir']], res, left_index=True, right_index=True).to_csv('malisam_alignments.csv')

KeyError: 'None of [0, 1] are in the columns'

Parse BLAST and HMMER

In [9]:
# Blast and HMMER
from deepblast.dataset.parse_hmmer import get_hmmer_alignments
from deepblast.dataset.parse_blast import get_blast_alignments

hmmer_path = f'../results/hmmer/{benchmark}-hmm.out'
blast_path = f'../results/blast/{benchmark}_blast_alignments.xml'

hmmer_df = get_hmmer_alignments(hmmer_path, mali_root)
blast_df = get_blast_alignments(blast_path, mali_root)

ref = read_mali(mali_root, tool='manual', report_ids=True)
ref = ref.set_index(['query_id', 'hit_id'])

ref_hmmer = pd.merge(ref, hmmer_df.set_index(['query_id', 'hit_id']), 
                     left_index=True, right_index=True, how='left')
ref_hmmer = ref_hmmer.rename(columns={2: 'manual'})
ref_blast = pd.merge(ref, blast_df.set_index(['query_id', 'hit_id']), 
                     left_index=True, right_index=True, how='left')
ref_blast = ref_blast.rename(columns={2: 'manual'})

# clean up types

ref_blast = ref_blast.fillna(-1)
ref_blast['query_start'] = ref_blast['query_start'].astype(np.int64)
ref_blast['hit_start'] = ref_blast['hit_start'].astype(np.int64)

ref_hmmer = ref_hmmer.fillna(-1)
ref_hmmer['query_start'] = ref_hmmer['query_start'].astype(np.int64)
ref_hmmer['hit_start'] = ref_hmmer['hit_start'].astype(np.int64)

In [10]:
# There are some shit mammoth alignments, filter those out
idx = res['mammoth'].apply(len) > 0
res = res.loc[idx]

Define scoring functions with predefined kernels, and parallelize with Dask.

Obtain local and global alignment stats

In [11]:
import warnings
warnings.filterwarnings("ignore")
k = [1, 3, 5, 10]

# Obtain local alignment stats in parallel
blast_stats = score_local_alignment(ref_blast.reset_index(), k, n_cores=30)
hmmer_stats = score_local_alignment(ref_hmmer.reset_index(), k, n_cores=30)

# Global alignments
fast_stats = score_global_alignment(res.reset_index(), 'fast', k, n_cores=30)
tm_stats = score_global_alignment(res.reset_index(), 'tm', k, n_cores=30)
dali_stats = score_global_alignment(res.reset_index(), 'dali', k, n_cores=30)
deep_stats = score_global_alignment(res.reset_index(), 'deepblast', k, n_cores=30)
nw_stats = score_global_alignment(res.reset_index(), 'needleman-wunsch', k, n_cores=30)

In [None]:
mam_stats = score_global_alignment(res.reset_index(), 'mammoth', k, n_cores=30)

Perform some dataset massaging to plot.

In [None]:
fast_stats['tool'] = 'fast'
tm_stats['tool'] = 'tm'
dali_stats['tool'] = 'dali'
deep_stats['tool'] = 'deepblast'
nw_stats['tool'] = 'nw'
blast_stats['tool'] = 'blast'
hmmer_stats['tool'] = 'hmmer'
mam_stats['tool'] = 'mammoth'


# add additional metadata regarding the pdb files
manual = read_mali(mali_root, tool='manual', report_ids=True)
fast_stats['pdb'] = manual['pdb']
tm_stats['pdb'] = manual['pdb']
dali_stats['pdb'] = manual['pdb']
deep_stats['pdb'] = manual['pdb']
nw_stats['pdb'] = manual['pdb']
blast_stats['pdb'] = manual['pdb']
hmmer_stats['pdb'] = manual['pdb']
mam_stats['pdb'] = manual['pdb']


# combine stats
data = pd.concat((fast_stats, tm_stats, dali_stats, deep_stats, nw_stats, blast_stats, hmmer_stats, mam_stats))

# save file locally for later
data.to_csv(f'{benchmark}_perc_id.csv')

# melt dataframe to make it easier to plot
data = pd.melt(data, id_vars=['tool', 'pdb'], var_name='kernel_width')

Now, let's plot the benchmark results.

In [None]:
# Run if need to regenerate figures from existing data
benchmark = 'malidup'
data = pd.read_csv('malidup_perc_id.csv', index_col=0)
data = pd.melt(data, id_vars=['tool', 'pdb'], var_name='kernel_width')

In [None]:
lookup = {'fast' : 'Fast', 'tm' : 'TM-align', 'dali' : 'Dali', 
 'mammoth': 'Mammoth-local', 'deepblast' : 'DeepBLAST', 
 'nw' : 'Needleman-Wunsch', 'blast' : 'BLAST', 'hmmer' : 'HMMER'}

data['Method'] = data.apply(lambda x: lookup[x['tool']], axis=1)
def structure_f(x):
    return x in {'Fast', 'TM-align', 'Dali', 'Mammoth-local'}
        
data['Structural'] = data['Method'].apply(structure_f)
green = sns.light_palette("seagreen")
purple = sns.dark_palette("blue", reverse=True)
fig, ax = plt.subplots()
palette = {
    'Fast' : '#00838f',
    'TM-align' : '#4dd0e1',
    'Dali' : '#4db6ac',
    'Mammoth-local' : '#81c784',
    'DeepBLAST': 'r',
    'Needleman-Wunsch' : '#ad1457', 
    'BLAST' : '#6a1b9a',
    'HMMER' : '#283593'
}

markers = {True : '^', False : 'o'}

palette = sns.color_palette("Set3", 8)
sns.lineplot(data=data, x='kernel_width', y='value', hue='Method', style='Structural',
             ax=ax,  markers=markers, palette=palette, 
             hue_order=['Fast', 'TM-align', 'Dali', 'Mammoth-local', 
                        'DeepBLAST', 'Needleman-Wunsch', 'BLAST', 'HMMER'])

ax.set_ylabel('Percent Identity')

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

ax.set_ylabel('True Positive Rate', fontsize=18)
ax.set_xlabel('Kernel Width', fontsize=18)
ax.tick_params(labelsize=14)

# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 16})

for legobj in ax.legend_.legendHandles:
    legobj.set_linewidth(5.0)
    
fig.savefig(f'{benchmark}.png', bbox_inches='tight')

Other statistics to plot

In [None]:
columns = [
        'val_tp', 'val_fp', 'val_fn', 'val_perc_id',
        'val_ppv', 'val_fnr', 'val_fdr'
]

fast_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['fast']), axis=1).values), 
                          columns=columns)
tm_stats   = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['tm']), axis=1).values), 
                          columns=columns)
dali_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['dali']), axis=1).values), 
                          columns=columns)
mam_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['mammoth']), axis=1).values), 
                          columns=columns)
deep_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['deepblast']), axis=1).values), 
                          columns=columns)
nw_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['needleman-wunsch']), axis=1).values), 
                          columns=columns)

In [None]:
def local_alignment_score(x):
    if x['aln'] == -1:
        n_matches = np.sum(np.array(list(x['manual'])) == ':')
        return 0, np.nan, n_matches, 0, 0, 1, 0
    else:
        return alignment_score(x['manual'], x['aln'])

In [None]:
blast_stats = pd.DataFrame(list(ref_blast.apply(local_alignment_score, axis=1).values), 
                           columns=columns)
hmmer_stats = pd.DataFrame(list(ref_hmmer.apply(local_alignment_score, axis=1).values), 
                           columns=columns)

In [None]:
fast_stats['tool'] = 'Fast'
tm_stats['tool'] = 'TM-align'
dali_stats['tool'] = 'Dali'
mam_stats['tool'] = 'Mammoth-local'
deep_stats['tool'] = 'DeepBLAST'
nw_stats['tool'] = 'Needleman-Wunsch'
blast_stats['tool'] = 'BLAST'
hmmer_stats['tool'] = 'HMMER'

data = pd.concat((fast_stats, tm_stats, dali_stats, mam_stats, deep_stats, nw_stats, blast_stats, hmmer_stats))

data.to_csv(f'{benchmark}_allstats.csv')

In [None]:
# read in locally to save time
data = pd.read_csv(f'{benchmark}_allstats.csv', index_col=0)

In [None]:
data = data.dropna()
data['precision'] = data.apply(lambda x: x['val_tp'] / (x['val_tp'] + x['val_fp']), axis=1)
data['recall'] = data.apply(lambda x: x['val_tp'] / (x['val_tp'] + x['val_fn']), axis=1)
data['f1'] = data.apply(lambda x: 2 / ((1 / (x['precision'] + 1e-6)) + (1 / (x['recall'] + 1e-6))), axis=1)

In [None]:
data

In [None]:
palette = sns.color_palette("Set3", 8)
sns.boxplot(data=data, x='recall', y='tool', orient='h', palette=palette)
locs, labels = plt.xticks()
_ = plt.setp(labels, fontsize=16)

locs, labels = plt.yticks()
_ = plt.setp(labels, fontsize=16)

plt.xlabel('Recall', fontsize=18)
plt.ylabel('')
plt.tight_layout()
plt.savefig(f'{benchmark}_recall.png')

In [None]:
sns.boxplot(data=data, x='precision', y='tool', orient='h', palette=palette)
locs, labels = plt.xticks()
_ = plt.setp(labels, fontsize=16)

locs, labels = plt.yticks()
_ = plt.setp(labels, fontsize=16)

plt.xlabel('Precision', fontsize=18)
plt.ylabel('')
plt.tight_layout()
plt.savefig(f'{benchmark}_precision.png')

In [None]:
sns.boxplot(data=data, x='f1', y='tool', orient='h', palette=palette)
locs, labels = plt.xticks()
_ = plt.setp(labels, fontsize=16)

locs, labels = plt.yticks()
_ = plt.setp(labels, fontsize=16)

plt.xlabel('F1 score', fontsize=18)
plt.ylabel('')
plt.tight_layout()
plt.savefig(f'{benchmark}_f1_score.png')

In [None]:
green = sns.light_palette("seagreen")
purple = sns.dark_palette("blue", reverse=True)
fig, ax = plt.subplots()
palette = {
    'fast' : '#00838f',
    'tm' : '#4dd0e1',
    'dali' : '#4db6ac',
    'mammoth' : '#81c784',
    'deepblast': 'r',
    'nw' : '#ad1457', 
    'blast' : '#6a1b9a',
    'hmmer' : '#283593'
}
sns.lineplot(data=data, x='precision', y='recall', hue='tool', ax=ax,
             palette=palette, hue_order=['fast', 'tm', 'dali', 'mammoth', 'deepblast', 'nw'])

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
fig.savefig(f'{benchmark}_pr_curve.png')

In [None]:
print(data.groupby(['tool']).mean()['f1'].to_latex())

In [None]:
print(data.groupby(['tool']).agg(sem)['f1'].to_latex())

In [None]:
print(data.groupby(['tool']).mean()['precision'].to_latex())

In [None]:
print(data.groupby(['tool']).agg(sem)['precision'].to_latex())

In [None]:
print(data.groupby(['tool']).mean()['recall'].to_latex())

In [None]:
print(data.groupby(['tool']).agg(sem)['recall'].to_latex())

# Other scratch work

In [None]:
columns = [
        'val_tp', 'val_fp', 'val_fn', 'val_perc_id',
        'val_ppv', 'val_fnr', 'val_fdr'
]

fast_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['fast']), axis=1).values), 
                          columns=columns)
tm_stats   = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['tm']), axis=1).values), 
                          columns=columns)
dali_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['dali']), axis=1).values), 
                          columns=columns)
deep_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['deepblast']), axis=1).values), 
                          columns=columns)
nw_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['needleman-wunsch']), axis=1).values), 
                          columns=columns)

In [None]:

sns.distplot(fast_stats['val_perc_id'], label='fast')
sns.distplot(tm_stats['val_perc_id'], label='tm')
sns.distplot(dali_stats['val_perc_id'], label='dali')
sns.distplot(deep_stats['val_perc_id'], label='deepblast')  # meh
sns.distplot(nw_stats['val_perc_id'], label='needleman-wunsch')  # meh

plt.legend()

In [None]:
sns.distplot(deep_stats['val_perc_id'], label='deepblast')
sns.distplot(nw_stats['val_perc_id'], label='needleman-wunsch')
plt.legend()

In [None]:
box_df = pd.DataFrame({
    'fast': fast_stats['val_perc_id'],
    'tm': dali_stats['val_perc_id'],
    'dali': tm_stats['val_perc_id'],
    'deepblast': deep_stats['val_perc_id'],
    'nw': nw_stats['val_perc_id']

})
box_df = box_df.melt(var_name='tool', value_name='perc_id')
sns.boxplot(x='tool', y='perc_id', data=box_df)

Percent identity vs kernel

In [None]:
fast_stats['tool'] = 'fast'
tm_stats['tool'] = 'tm'
dali_stats['tool'] = 'dali'
deep_stats['tool'] = 'deepblast'
nw_stats['tool'] = 'nw'
data = pd.concat((fast_stats, tm_stats, dali_stats, deep_stats, nw_stats))

In [None]:
data = pd.melt(data, id_vars=['tool'], var_name='kernel_width')

In [None]:
sns.boxplot(data=data, x='kernel_width', y='value', hue='tool') 

In [None]:
data = pd.read_csv('malidup_perc_id.csv')

In [None]:
fig, ax = plt.subplots()
sns.lineplot(data=data, x='kernel_width', y='value', hue='tool', ax=ax)

ax.set_ylabel('Percent Identity')

# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# Malisam benchmark

In [None]:
manual = read_mali(malisam_root, tool='manual')
fast   = read_mali(malisam_root, tool='fast')
tm     = read_mali(malisam_root, tool='tm')
dali   = read_mali(malisam_root, tool='dali')

manual = manual[~manual.set_index([0, 1]).index.duplicated(keep='first')]
fast = fast[~fast.set_index([0, 1]).index.duplicated(keep='first')]
tm = tm[~tm.set_index([0, 1]).index.duplicated(keep='first')]
dali = dali[~dali.set_index([0, 1]).index.duplicated(keep='first')]

# build multi-indexes
manual_ = manual.set_index([0, 1])
tm_ = tm.set_index([0, 1])
fast_ = fast.set_index([0, 1])
dali_ = dali.set_index([0, 1])

# merge together
res = pd.merge(manual_, fast_, left_index=True, right_index=True)
res = pd.merge(res, tm_, left_index=True, right_index=True)
res = pd.merge(res, dali_, left_index=True, right_index=True)
res.columns = ['manual', 'fast', 'tm', 'dali']

In [None]:
nw     = manual_.loc[res.index].reset_index().apply(lambda x: nw_f(x[1], x[0]), axis=1)
dp     = manual.apply(lambda x: model.align(x[1], x[0])[0], axis=1)

In [None]:
# build multi-indexes
manual = manual.set_index([0, 1])
tm = tm.set_index([0, 1])
fast = fast.set_index([0, 1])
dali = dali.set_index([0, 1])
tm = tm[~tm.index.duplicated(keep='first')]

# merge together
res = pd.merge(manual, fast, left_index=True, right_index=True)
res = pd.merge(res, tm, left_index=True, right_index=True)
res = pd.merge(res, dali, left_index=True, right_index=True)
res.columns = ['manual', 'fast', 'tm', 'dali']
res = res.dropna()
res['needleman-wunsch'] = nw.values
res['deepblast'] = dp.values

In [None]:
from deepblast.score import alignment_score
columns = [
        'val_tp', 'val_fp', 'val_fn', 'val_perc_id',
        'val_ppv', 'val_fnr', 'val_fdr'
]
    
fast_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['fast']), axis=1).values), 
                          columns=columns)
tm_stats   = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['tm']), axis=1).values), 
                          columns=columns)
dali_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['dali']), axis=1).values), 
                          columns=columns)
deep_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['deepblast']), axis=1).values), 
                          columns=columns)
nw_stats = pd.DataFrame(list(res.apply(lambda x: alignment_score(x['manual'], x['needleman-wunsch']), axis=1).values), 
                          columns=columns)

In [None]:
sns.distplot(fast_stats['val_perc_id'], label='fast')
sns.distplot(tm_stats['val_perc_id'], label='tm')
sns.distplot(dali_stats['val_perc_id'], label='dali')
sns.distplot(deep_stats['val_perc_id'], label='deepblast')  # meh
sns.distplot(nw_stats['val_perc_id'], label='needleman-wunsch')  # meh

plt.legend()

In [None]:
sns.distplot(deep_stats['val_perc_id'], label='deepblast')
sns.distplot(nw_stats['val_perc_id'], label='needleman-wunsch')
plt.legend()

In [None]:
box_df = pd.DataFrame({
    'fast': fast_stats['val_perc_id'],
    'tm': dali_stats['val_perc_id'],
    'dali': tm_stats['val_perc_id'],
    'deepblast': deep_stats['val_perc_id'],
    'nw': nw_stats['val_perc_id']

})
box_df = box_df.melt(var_name='tool', value_name='perc_id')
sns.boxplot(x='tool', y='perc_id', data=box_df)


In [None]:
plt.scatter(dali_stats['val_fdr'], dali_stats['val_fnr'], label='dali')
plt.scatter(tm_stats['val_fdr'], tm_stats['val_fnr'], label='tm')
plt.scatter(fast_stats['val_fdr'], fast_stats['val_fnr'], label='fast')
plt.scatter(deep_stats['val_fdr'], deep_stats['val_fnr'], label='deepblast')

plt.legend()

plt.xlabel('FDR')
plt.ylabel('FNR')

# Debugging

In [None]:
i = 2
x = res.index[i]
pred, A = model.align(x[1], x[0])

fig, ax = plt.subplots(1, 3, figsize=(15, 5))

truth = res.iloc[i]['manual']

columns = [
        'val_tp', 'val_fp', 'val_fn', 'val_perc_id',
        'val_ppv', 'val_fnr', 'val_fdr'
]
print(pd.Series(alignment_score(truth, pred), index=columns))

sns.heatmap(states2matrix(list(map(tmstate_f, truth))), ax=ax[0])
sns.heatmap(states2matrix(list(map(tmstate_f, pred))), ax=ax[1])
sns.heatmap(A.cpu().detach().numpy().squeeze(), ax=ax[2], robust=True)

In [None]:
from deepblast.dataset.utils import states2alignment
a, b = states2alignment(np.array(list(map(tmstate_f, truth))), x[1], x[0])
print('Ground Truth')
print(a)
print(b)
a, b = states2alignment(np.array(list(map(tmstate_f, pred))), x[1], x[0])
print('Prediction')
print(a)
print(b)

In [None]:
columns = [
        'val_tp', 'val_fp', 'val_fn', 'val_perc_id',
        'val_ppv', 'val_fnr', 'val_fdr'
]

print(pd.Series(alignment_score(truth, pred), index=columns))

In [None]:
deep_stats

In [None]:
sns.distplot(deep_stats['val_perc_id'], label='deepblast')
sns.distplot(nw_stats['val_perc_id'], label='needleman-wunsch')
plt.legend()

In [None]:
sns.distplot(nw_stats['val_perc_id'])

In [None]:
x

In [None]:
res