In [12]:
import plotly.graph_objects as go
import json

def get_precision_recall(experiment, caller):
    with open('data/{}/truvari-pacbio-{}/summary.txt'.format(experiment, caller)) as json_file:
        d = json.load(json_file)
        precision = d['TP-base']/float(d['TP-base'] + d['FP'])
        recall = d['TP-base']/float(d['TP-base'] + d['FN'])
        return precision, recall

def get_config(experiment): 
    with open('data/{}/config.json'.format(experiment)) as json_file:
        return json.load(json_file)

def get_precisions_recalls_configs(caller):   
    experiments = ! ls data

    precisions = []
    recalls = []
    configs = []

    for experiment in experiments: 
        precision, recall = get_precision_recall(experiment, caller)
        precisions.append(precision)
        recalls.append(recall)
        config = get_config(experiment)
        configs.append(config)
        
    return precisions, recalls, configs

def format_config(config, outer_key_top=None, inner_key_top=None): 
    if outer_key_top and inner_key_top: 
        s = '{}.{}: {}<br><br>'.format(
            outer_key_top, 
            inner_key_top, 
            config[outer_key_top][inner_key_top]
        )
    else: 
        s = ''
        
    for outer_key, outer_value in config.items(): 
        if outer_key == 'general': continue 
        s += '{}: <br>'.format(outer_key)
        for inner_key, inner_value in outer_value.items(): 
            if outer_key == outer_key_top and inner_key == inner_key_top: continue 
            s += '  {}: {}<br>'.format(inner_key, inner_value)
    return s

def color(s): 
    try: 
        return int(s)
    except ValueError:
        return { 
            '5,20': 0,
            '6,26': 1,
            '10,35': 2
        }[s]
    

def precision_recall_graph(outer_key, inner_key): 
    trfermikit_precisions, trfermikit_recalls, trfermikit_configs = get_precisions_recalls_configs('trfermikit.unitigSupport.thinned') 
    manta_precisions, manta_recalls, manta_configs = get_precisions_recalls_configs('manta') 
    
    fig = go.Figure(data=go.Scatter(
        x=trfermikit_recalls,
        y=trfermikit_precisions,
        mode='markers',
        name='trfermikit',
        marker_color=[color(config[outer_key][inner_key]) for config in trfermikit_configs],
        hovertemplate ='%{text}',
        text=[format_config(config, outer_key, inner_key) for config in trfermikit_configs]))
    fig.update_xaxes(title_text='recall')
    fig.update_yaxes(title_text='precision')
    
    fig.add_trace(go.Scatter(
        x=manta_recalls, 
        y=manta_precisions,
        mode='markers',
        name='manta',
        marker_symbol='square',
        marker_size=10,
        marker_color=[color(config[outer_key][inner_key]) for config in trfermikit_configs],
        hovertemplate ='%{text}',
        text=[format_config(config, outer_key, inner_key) for config in manta_configs]
    ))    
    fig.show()
    


In [19]:
def list_to_value(list_): 
    value, = list_
    return int(value)

overlap_fraction = 0.9
root = "/scratch/ucgd/lustre-work/quinlan/u6018199/chaisson_2019/analysis/locally_assemble_short_reads/trfermikit"

def tr_fermikit_TP_intersect_manta(manta_callset, output): 
    list_ = ! $root/bin/bedtools intersect -u -wb -f $overlap_fraction -r \
      -a $output/truvari-pacbio-manta/$manta_callset".vcf" \
      -b $output/truvari-pacbio-trfermikit.unitigSupport.thinned/tp-base.vcf \
      | wc -l 
    return list_to_value(list_)

def tr_fermikit_TP_less_manta(manta_callset, output):
    list_ = ! $root/bin/bedtools subtract -A -f $overlap_fraction -r \
      -a $output/truvari-pacbio-trfermikit.unitigSupport.thinned/tp-base.vcf \
      -b $output/truvari-pacbio-manta/$manta_callset".vcf" \
      | wc -l
    return list_to_value(list_)

def manta_less_tr_fermikit_TP(manta_callset, output):
    list_ = ! $root/bin/bedtools subtract -A -f $overlap_fraction -r \
      -a $output/truvari-pacbio-manta/$manta_callset".vcf" \
      -b $output/truvari-pacbio-trfermikit.unitigSupport.thinned/tp-base.vcf \
    | wc -l 
    return list_to_value(list_)

def check(manta_callset): 
    if manta_callset != 'fn' and manta_callset != 'tp-base': 
        raise Exception
    else: 
        pass
        
def get_manta_fraction(manta_callset, experiment):
    check(manta_callset)
    output = 'data/{}'.format(experiment)
    overlap_count = tr_fermikit_TP_intersect_manta(manta_callset, output)
    manta_exclusive_count = manta_less_tr_fermikit_TP(manta_callset, output)
    return overlap_count/float(overlap_count + manta_exclusive_count)

def get_manta_fractions_configs():   
    experiments = ! ls data

    fractions_of_manta_TP = []
    fractions_of_manta_FN = []
    configs = []

    for experiment in experiments: 
        fractions_of_manta_TP.append(get_manta_fraction(manta_callset='tp-base', experiment=experiment))
        fractions_of_manta_FN.append(get_manta_fraction(manta_callset='fn', experiment=experiment))
        config = get_config(experiment)
        configs.append(config)
        
    return fractions_of_manta_TP, fractions_of_manta_FN, configs

def fractions_of_manta_graph(outer_key, inner_key): 
    fractions_of_manta_TP, fractions_of_manta_FN, configs = get_manta_fractions_configs() 
    
    fig = go.Figure(data=go.Scatter(
        x=fractions_of_manta_TP,
        y=fractions_of_manta_FN,
        mode='markers',
        marker_color=[color(config[outer_key][inner_key]) for config in configs],
        hovertemplate='%{text}',
        text=[format_config(config, outer_key, inner_key) for config in configs]))
    fig.update_xaxes(title_text='fraction of manta TPs recovered by trfermikit')
    fig.update_yaxes(title_text='fraction of manta FNs recovered by trfermikit')
    
    fig.show()



## "minUnitigBlockLength" controls the trade-off between precision and recall

Hover over data points to see the full set of `trfermikit` configuration parameters

In [14]:
precision_recall_graph(outer_key='filterCalls', inner_key='minUnitigBlockLength') 

## TODO: put conclusions here

In [22]:
fractions_of_manta_graph(outer_key='makeCalls', inner_key='gapOpenPenalties')

In [20]:
fractions_of_manta_graph(outer_key='filterCalls', inner_key='minUnitigBlockLength')