## trfermikit is more sensitive than manta at discovering small DELs

In [17]:
import plotly.graph_objects as go
import json

def get_precision_recall(experiment, caller, svtype):
    with open('data/{}/truvari-{}-pacbio-{}/summary.txt'.format(experiment, svtype, caller)) as json_file:
        d = json.load(json_file)
        try:
            precision = d['TP-base']/float(d['TP-base'] + d['FP'])
        except ZeroDivisionError: 
            precision = None
        recall = d['TP-base']/float(d['TP-base'] + d['FN'])
        return precision, recall

def get_config(experiment): 
    with open('data/{}/config.json'.format(experiment)) as json_file:
        return json.load(json_file)

def get_precisions_recalls_configs(caller, svtype):   
    experiments = ! ls data

    precisions = []
    recalls = []
    configs = []

    for experiment in experiments: 
        precision, recall = get_precision_recall(experiment, caller, svtype)
        precisions.append(precision) 
        recalls.append(recall) 
        config = get_config(experiment)
        configs.append(config)
        
    return precisions, recalls, configs

def format_config(config, outer_key_top=None, inner_key_top=None): 
    if outer_key_top and inner_key_top: 
        s = '{}.{}: {}<br><br>'.format(
            outer_key_top, 
            inner_key_top, 
            config[outer_key_top][inner_key_top]
        )
    else: 
        s = ''
        
    for outer_key, outer_value in config.items(): 
        if outer_key == 'general': continue 
        s += '{}: <br>'.format(outer_key)
        for inner_key, inner_value in outer_value.items(): 
            if outer_key == outer_key_top and inner_key == inner_key_top: continue 
            s += '  {}: {}<br>'.format(inner_key, inner_value)
    return s

def initialize_colors(configs, outer_key, inner_key): 
    string_list = [config[outer_key][inner_key] for config in configs]
    string_list = list(set(string_list)) # get unique strings
    color_list = ['red', 'green', 'blue', 'yellow', 'magenta', 'cyan']        
    color_dict = {}
    for index, string in enumerate(string_list): 
        color_dict[string] = color_list[index]
    return color_dict

def sort_colors(colors):
    return sorted({int(k): v for k, v in colors.items()}.items())    

def precision_recall_graph(outer_key, inner_key, commentary, svtype, marker_size=10, line_width=1): 
    trfermikit_precisions, trfermikit_recalls, trfermikit_configs = get_precisions_recalls_configs('trfermikit.unitigSupport.thinned', svtype) 
    manta_precisions, manta_recalls, manta_configs = get_precisions_recalls_configs('manta', svtype) 
    
    colors = initialize_colors(trfermikit_configs, outer_key, inner_key) 
    
    fig = go.Figure(data=go.Scatter(
        x=trfermikit_recalls,
        y=trfermikit_precisions,
        mode='markers',
        name='trfermikit',
        showlegend=False,
        marker=dict(
            symbol='circle',
            size=marker_size,
            line=dict(width=line_width, color='black'),
            color=[colors[config[outer_key][inner_key]] for config in trfermikit_configs]
        ),
        hovertemplate ='%{text}',
        text=[format_config(config, outer_key, inner_key) for config in trfermikit_configs]))
    fig.update_xaxes(title_text='recall')
    fig.update_yaxes(title_text='precision')
    
    fig.add_trace(go.Scatter(
        x=manta_recalls, 
        y=manta_precisions,
        mode='markers',
        name='manta',
        showlegend=False,
        marker=dict(
            symbol='square',
            size=marker_size,
            line=dict(width=line_width, color='black'),
            color=[colors[config[outer_key][inner_key]] for config in manta_configs]
        ),
        hovertemplate ='%{text}',
        text=[format_config(config, outer_key, inner_key) for config in manta_configs]
    ))  

    caller_to_symbol = { 
        'trfermikit': 'circle',
        'manta': 'square'        
    }
    
    for caller, symbol in caller_to_symbol.items():         
        fig.add_trace(go.Scatter(
            y=[None], 
            mode='markers',
            marker=dict(
                symbol=symbol,
                size=marker_size,
                line=dict(width=line_width, color='black'),
                color='white'
            ),
            name=caller
        ))

    for value, color in sort_colors(colors):                 
        fig.add_trace(go.Scatter(
            y=[None], 
            mode='markers',
            marker=dict(
                symbol='circle',
                size=marker_size,
                line=None,
                color=color
            ),
            name='{}={}'.format(inner_key, value)
        ))
    
    fig.update_layout(title={
        'text': svtype + '<br>' + commentary,
        'y':0.9,
        'x':0.5,
    })
    
    write_image(
        fig, 
        image_directory='images/precision_recall', 
        inner_key=inner_key, 
        svtype=svtype
    )
    
    fig.show()

def write_image(fig, image_directory, inner_key, svtype): 
    import os
    import pathlib
    if not os.path.exists(image_directory):
        pathlib.Path(image_directory).mkdir(parents=True, exist_ok=True)
    fig.write_image("{}/{}-{}.svg".format(image_directory, inner_key, svtype))
    
def precision_recall_graphs_DEL(): 
    for outer_key, inner_key, commentary in [
        (
            'filterCalls', 
            'minSVSize', 
            'minSVSize moves precision and recall in same direction;<br>' + 
            'trfermikit dominates manta only when small SVs are included:'
        ), 
        (
            'filterCalls', 
            'minUnitigBlockLength', 
            'minUnitigBlockLength moves precision and recall in opposite directions:'
        )
    ]: 
        precision_recall_graph(outer_key, inner_key, commentary, svtype='DEL') 

def precision_recall_graphs_INS(): 
    for outer_key, inner_key, commentary in [
        (
            'filterCalls', 
            'minSVSize',
            ''
        ), 
        (
            'filterCalls', 
            'minUnitigBlockLength', 
            ''
        )
    ]: 
        precision_recall_graph(outer_key, inner_key, commentary, svtype='INS') 


precision_recall_graphs_DEL()
precision_recall_graphs_INS()

## trfermikit is complementary to manta

In [None]:
def list_to_value(list_): 
    value, = list_
    return int(value)

overlap_fraction = 0.9
root = "/scratch/ucgd/lustre-work/quinlan/u6018199/chaisson_2019/analysis/locally_assemble_short_reads/trfermikit"

def tr_fermikit_TP_intersect_manta(manta_callset, output): 
    list_ = ! $root/bin/bedtools intersect -u -wb -f $overlap_fraction -r \
      -a $output/truvari-pacbio-manta/$manta_callset".vcf" \
      -b $output/truvari-pacbio-trfermikit.unitigSupport.thinned/tp-base.vcf \
      | wc -l 
    return list_to_value(list_)

def tr_fermikit_TP_less_manta(manta_callset, output):
    list_ = ! $root/bin/bedtools subtract -A -f $overlap_fraction -r \
      -a $output/truvari-pacbio-trfermikit.unitigSupport.thinned/tp-base.vcf \
      -b $output/truvari-pacbio-manta/$manta_callset".vcf" \
      | wc -l
    return list_to_value(list_)

def manta_less_tr_fermikit_TP(manta_callset, output):
    list_ = ! $root/bin/bedtools subtract -A -f $overlap_fraction -r \
      -a $output/truvari-pacbio-manta/$manta_callset".vcf" \
      -b $output/truvari-pacbio-trfermikit.unitigSupport.thinned/tp-base.vcf \
    | wc -l 
    return list_to_value(list_)

def check(manta_callset): 
    if manta_callset != 'fn' and manta_callset != 'tp-base': 
        raise Exception
    else: 
        pass
        
def get_manta_fraction(manta_callset, experiment):
    check(manta_callset)
    output = 'data/{}'.format(experiment)
    overlap_count = tr_fermikit_TP_intersect_manta(manta_callset, output)
    manta_exclusive_count = manta_less_tr_fermikit_TP(manta_callset, output)
    return overlap_count/float(overlap_count + manta_exclusive_count)

def get_manta_fractions_configs():   
    experiments = ! ls data

    fractions_of_manta_TP = []
    fractions_of_manta_FN = []
    configs = []

    for experiment in experiments: 
        fractions_of_manta_TP.append(get_manta_fraction(manta_callset='tp-base', experiment=experiment))
        fractions_of_manta_FN.append(get_manta_fraction(manta_callset='fn', experiment=experiment))
        config = get_config(experiment)
        configs.append(config)
        
    return fractions_of_manta_TP, fractions_of_manta_FN, configs

def fractions_of_manta_graph(outer_key, inner_key, title=None, marker_size=10, line_width=1): 
    fractions_of_manta_TP, fractions_of_manta_FN, configs = get_manta_fractions_configs() 
    
    colors = initialize_colors(configs, outer_key, inner_key) 
                
    fig = go.Figure(data=go.Scatter(
        x=fractions_of_manta_TP,
        y=fractions_of_manta_FN,
        mode='markers',
        showlegend=False,        
        marker=dict(
            symbol='circle',
            size=marker_size,
            line=dict(width=line_width, color='black'),
            color=[colors[config[outer_key][inner_key]] for config in configs]
        ),
        hovertemplate='%{text}',
        text=[format_config(config, outer_key, inner_key) for config in configs]))
    fig.update_xaxes(title_text='fraction of manta TPs recovered by trfermikit')
    fig.update_yaxes(title_text='fraction of manta FNs recovered by trfermikit')
    
    fig.update_layout(title=title) 
    
    for value, color in sort_colors(colors):                 
        fig.add_trace(go.Scatter(
            y=[None], 
            mode='markers',
            marker=dict(
                symbol='circle',
                size=marker_size,
                color=color
            ),
            name='{}={}'.format(inner_key, value)
        ))
    
    write_image(fig, image_directory='images/fractions_of_manta', inner_key=inner_key)
    
    fig.show()

def fractions_of_manta_graphs(): 
    for outer_key, inner_key, commentary in [
        (
            'filterCalls', 
            'minSVSize',
            'trfermikit is most complementary to manta when small SVs are included'
        ), 
#         ( 
#             'makeRegions',
#             'minRepeatLength',
#             None
#         )
#         (
#             'filterCalls', 
#             'minUnitigBlockLength', 
#             None
#         ),
    ]: fractions_of_manta_graph(outer_key, inner_key, title=commentary) 

fractions_of_manta_graphs()

## The trade-off between trfermikit precision and its sensitivity to events missed by manta

In [None]:
def get_fractionsOfMantaFN_trfermikitPrecisions_configs():   
    experiments = ! ls data

    fractionsOfMantaFN = []
    trfermikitPrecisions = []
    configs = []

    for experiment in experiments: 
        fractionsOfMantaFN.append(get_manta_fraction(
            manta_callset='fn', 
            experiment=experiment
        ))
        
        precision, recall = get_precision_recall(
            experiment=experiment, 
            caller='trfermikit.unitigSupport.thinned'
        )
        trfermikitPrecisions.append(precision)
        
        config = get_config(experiment)
        configs.append(config)        
        
    return fractionsOfMantaFN, trfermikitPrecisions, configs

def fractionsOfMantaFN_trfermikitPrecisions_graph(
    outer_key, 
    inner_key, 
    title=None, 
    marker_size=10, 
    line_width=1
): 
    fractionsOfMantaFN, trfermikitPrecisions, configs = get_fractionsOfMantaFN_trfermikitPrecisions_configs()
    
    colors = initialize_colors(configs, outer_key, inner_key) 
                
    fig = go.Figure(data=go.Scatter(
        x=fractionsOfMantaFN,
        y=trfermikitPrecisions,
        mode='markers',
        showlegend=False,        
        marker=dict(
            symbol='circle',
            size=marker_size,
            line=dict(width=line_width, color='black'),
            color=[colors[config[outer_key][inner_key]] for config in configs]
        ),
        hovertemplate='%{text}',
        text=[format_config(config, outer_key, inner_key) for config in configs]))
    fig.update_xaxes(title_text='fraction of manta FNs recovered by trfermikit')
    fig.update_yaxes(title_text='trfermikit precision')
    
    fig.update_layout(title=title) 
    
    for value, color in sort_colors(colors):                 
        fig.add_trace(go.Scatter(
            y=[None], 
            mode='markers',
            marker=dict(
                symbol='circle',
                size=marker_size,
                color=color
            ),
            name='{}={}'.format(inner_key, value)
        ))
    
    write_image(fig, image_directory='images/fractionsOfMantaFN_trfermikitPrecisions', inner_key=inner_key)
    
    fig.show()

def fractionsOfMantaFN_trfermikitPrecisions_graphs(): 
    for outer_key, inner_key, commentary in [
        (
            'filterCalls', 
            'minSVSize',
            'minSVSize similarly influences trfermikit precision and<br>' + 
            'sensitivity to events missed by manta'
        ), 
#         ( 
#             'makeRegions',
#             'minRepeatLength',
#             None
#         ),
        (
            'filterCalls', 
            'minUnitigBlockLength', 
            'minUnitigBlockLength controls the trade-off between trfermikit precision<br>' +
            'and its sensitivity to events missed by manta'
        ),
    ]: fractionsOfMantaFN_trfermikitPrecisions_graph(outer_key, inner_key, title=commentary) 

fractionsOfMantaFN_trfermikitPrecisions_graphs()