## Fig 1a

In [1]:
import json

def get_TPR_FDR(experiment, caller, svtype):
    with open('{}/truvari-{}-pacbio-{}/summary.txt'.format(experiment, svtype, caller)) as json_file:
        d = json.load(json_file)
        TPR = d['TP-base']/float(d['TP-base'] + d['FN'])
        FDR = d['FP']/float(d['TP-base'] + d['FP']) 
        return TPR, FDR

def get_config(experiment): 
    with open('{}/config.json'.format(experiment)) as json_file:
        return json.load(json_file)

def get_TPR_FDR_config(experiment, caller, svtype):   
    TPR, FDR = get_TPR_FDR(experiment, caller, svtype)
    config = get_config(experiment)
    return TPR, FDR, config
    
def get_experiments(): 
    # some experiments correspond to the same trfermikit configuration, e.g. 
    # INS/singleBaseMatchReward_singleBaseMismatchPenalty_gapOpenPenalties_gapExtensionPenalties/data/gapExtensionPenalties=1,0_gapOpenPenalties=6,26_singleBaseMatchReward=10_singleBaseMismatchPenalty=12/config.json
    # optimized_for_DELs/minCoverage_gapOpenPenalties_minUnitigMappingQuality_minUnitigBlockLength/data/minCoverage=0_gapOpenPenalties=6,26_minUnitigMappingQuality=0_minUnitigBlockLength=25/config.json
    # but do not yield the same trfermikit callset, 
    # perhaps because of fermikit's stochasticity: https://github.com/lh3/fermikit#limitations
    experiments = ! ls -d \
        INS/singleBaseMatchReward_singleBaseMismatchPenalty_gapOpenPenalties_gapExtensionPenalties/data/* \
        optimized_for_DELs/minCoverage_gapOpenPenalties_minUnitigMappingQuality_minUnitigBlockLength/data/*
    return experiments

def get_TPRs_FDRs_configs(caller, svtype): 
    return tuple(zip(*[get_TPR_FDR_config(experiment, caller, svtype) for experiment in get_experiments()]))

In [6]:
import plotly.graph_objects as go

def write(fig, sub_directory, svtype, data=None): 
    directory = 'paper_figures/{}'.format(sub_directory)
    
    import os
    import pathlib
    if not os.path.exists(directory):
        pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
    fig.write_image('{}/{}.svg'.format(directory, svtype))

    if data: 
        with open('{}/{}.json'.format(directory, svtype), 'w') as data_file:
            json.dump(data, data_file)
    

def get_marker_core(symbol): 
    return dict(
        symbol=symbol,
        line=dict(width=1, color='black'),
        size=16
    )
   
def get_font(): 
    return dict(family='Arial', color='black', size=20)

def update_axes(fig, axis, text, linewidth=1.5): 
    update = { 
        'x': fig.update_xaxes,
        'y': fig.update_yaxes
    }
    
    update[axis]( 
        title=dict(text=text, font=get_font()),
        showgrid=False,
        showline=True, 
        linewidth=linewidth, 
        linecolor='black',
        ticks='outside', 
        tickwidth=linewidth, 
        tickcolor='black', 
        ticklen=10,
        tickfont=get_font()
    )
    
def plot_TPRs_FDRs(svtype, trfermikit_flavor): 
    trfermikit_TPRs, trfermikit_FDRs, trfermikit_configs = get_TPRs_FDRs_configs(trfermikit_flavor, svtype) 
    manta_TPRs, manta_FDRs, _ = get_TPRs_FDRs_configs('manta', svtype) 

    fig = go.Figure(data=go.Scatter(
        x=trfermikit_FDRs,
        y=trfermikit_TPRs,
        mode='markers',
        name='trfermikit',
        marker=get_marker_core('circle')
    ))
    update_axes(fig, 'x', 'False Discovery Rate, FP/(TP + FP)') 
    update_axes(fig, 'y', 'Recall, TP/(TP + FN)') 
    fig.update_layout(plot_bgcolor='white')    
    
    fig.add_trace(go.Scatter(
        x=manta_FDRs, 
        y=manta_TPRs,
        mode='markers',
        name='manta',
        marker=get_marker_core('square'),
    ))   
    
    write(
        fig, 
        sub_directory='TPRs_FDRs', 
        svtype=svtype
    )
    
    fig.show()
    
plot_TPRs_FDRs(svtype='DEL', trfermikit_flavor='trfermikit.unitigSupport.thinned')

## Fig 1b

In [3]:
def check(manta_callset): 
    if manta_callset != 'fn' and manta_callset != 'tp-base': 
        raise Exception
    else: 
        pass

def list_to_value(list_): 
    value, = list_
    return int(value)

def tr_fermikit_TP_intersect_manta(manta_callset, experiment, svtype, trfermikit_flavor, overlap_fraction, root): 
    list_ = ! $root/bin/bedtools intersect -u -wb -f $overlap_fraction -r \
      -a $experiment/truvari-$svtype-pacbio-manta/$manta_callset".vcf" \
      -b $experiment/truvari-$svtype-pacbio-$trfermikit_flavor/tp-base.vcf \
      | wc -l 
#     debug = ! echo "$experiment/truvari-$svtype-pacbio-$trfermikit_flavor/tp-base.vcf"
#     print(debug)
    return list_to_value(list_)

def manta_less_tr_fermikit_TP(manta_callset, experiment, svtype, trfermikit_flavor, overlap_fraction, root):
    list_ = ! $root/bin/bedtools subtract -A -f $overlap_fraction -r \
      -a $experiment/truvari-$svtype-pacbio-manta/$manta_callset".vcf" \
      -b $experiment/truvari-$svtype-pacbio-$trfermikit_flavor/tp-base.vcf \
    | wc -l 
    return list_to_value(list_)

def get_counts(manta_callset, experiment, svtype, trfermikit_flavor): 
    overlap_fraction = 0.9
    root = "/scratch/ucgd/lustre-work/quinlan/u6018199/chaisson_2019/analysis/locally_assemble_short_reads/trfermikit"
    overlap_count = tr_fermikit_TP_intersect_manta(manta_callset, experiment, svtype, trfermikit_flavor, overlap_fraction, root)
    manta_exclusive_count = manta_less_tr_fermikit_TP(manta_callset, experiment, svtype, trfermikit_flavor, overlap_fraction, root)
    return overlap_count, manta_exclusive_count
    
def get_manta_fraction(manta_callset, experiment, svtype, trfermikit_flavor):
    check(manta_callset)
    overlap_count, manta_exclusive_count = get_counts(manta_callset, experiment, svtype, trfermikit_flavor)    
    return overlap_count/float(overlap_count + manta_exclusive_count)

def get_normalized_trfermikit_FDR(experiment, svtype, trfermikit_flavor):
    _, trfermikit_FDR = get_TPR_FDR(experiment, trfermikit_flavor, svtype)
    _, manta_FDR = get_TPR_FDR(experiment, 'manta', svtype)
    return trfermikit_FDR/manta_FDR

def get_normalizedTrfermikitFDR_mantaFraction_config(experiment, svtype, trfermikit_flavor): 
    normalized_trfermikit_FDR = get_normalized_trfermikit_FDR(
        experiment, 
        svtype, 
        trfermikit_flavor)
    fraction_of_manta_FN = get_manta_fraction(
        manta_callset='fn', 
        experiment=experiment, 
        svtype=svtype, 
        trfermikit_flavor=trfermikit_flavor)
    config = get_config(experiment)
    return normalized_trfermikit_FDR, fraction_of_manta_FN, config
    
def get_normalizedTrfermikitFDRs_mantaFractions_configs(svtype, trfermikit_flavor):
    return tuple(zip(*[get_normalizedTrfermikitFDR_mantaFraction_config(experiment, svtype, trfermikit_flavor) for experiment in get_experiments()]))

In [5]:
def get_marker_extra(color, title): 
    return dict(
        color=color,
        colorscale='Viridis',
        showscale=True,
        colorbar=dict(
            title=dict(text=title, font=get_font()),
            tickfont=get_font()
        )
    )
    
def get_marker(color, title): 
    return {**get_marker_core('circle'), **get_marker_extra(color, title)}
    
def plot_manta_complementarity(svtype, trfermikit_flavor): 
    normalized_trfermikit_FDRs, fractions_of_manta_FN, configs = get_normalizedTrfermikitFDRs_mantaFractions_configs(svtype='DEL', trfermikit_flavor='trfermikit.unitigSupport.thinned')
    trfermikit_TPRs, _, _ = get_TPRs_FDRs_configs(trfermikit_flavor, svtype) 
    
    fig = go.Figure(data=go.Scatter(
        x=normalized_trfermikit_FDRs,
        y=fractions_of_manta_FN,
        mode='markers',
        showlegend=False,     
        marker=get_marker(color=trfermikit_TPRs, title='trfermikit recall'),
    ))
    update_axes(fig, 'x', '(trfermikit FDR)/(default-manta FDR)') 
    update_axes(fig, 'y', 'fraction of manta FNs recovered by trfermikit') 
    fig.update_layout(plot_bgcolor='white')    
        
    write(
        fig, 
        sub_directory='manta_complementarity', 
        svtype=svtype,
        data=[
            {
                'trfermikit_TPR': trfermikit_TPR,
                'normalized_trfermikit_FDR': normalized_trfermikit_FDR,
                'fraction_of_manta_FN': fraction_of_manta_FN,
                'config': config
            }
            for trfermikit_TPR, normalized_trfermikit_FDR, fraction_of_manta_FN, config
            in sorted(zip(trfermikit_TPRs, normalized_trfermikit_FDRs, fractions_of_manta_FN, configs))
        ]
    )
        
    fig.show()

plot_manta_complementarity(svtype='DEL', trfermikit_flavor='trfermikit.unitigSupport.thinned')

## Encode three operating points into the software using a single CLI flag

Use the following to parse out configs for low, medium, and high recall: 
```
jq '.[30].config | del(.general)' experiments/paper_figures/manta_complementarity/DEL.json    
jq '.[85].config | del(.general)' experiments/paper_figures/manta_complementarity/DEL.json  
jq '. | last | .config | del(.general)' DEL.json
```
