In [1]:
from tensorflow import enable_eager_execution
enable_eager_execution()


## Google Colab set up

Make sure that you're using a GPU: 

```Runtime > Change Runtime Type > Hardware Accelerator > GPU```

In [2]:
# import tensorflow as tf

# if tf.test.gpu_device_name() != '/device:GPU:0':
#   print('WARNING: GPU device not found.')
# else:
#   print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name()))

Download code dependencies:

In [3]:
# !rm load_preprocess_data.py 
# !wget https://raw.githubusercontent.com/petermchale/denoising_coverage_profiles/master/load_preprocess_data.py
# !wget https://raw.githubusercontent.com/petermchale/denoising_coverage_profiles/master/utility.py
# !pip install pyfaidx
# !pip install biopython

# !wget https://github.com/arq5x/bedtools2/releases/download/v2.28.0/bedtools
# !chmod +x ./bedtools



Download data dependencies: 

In [4]:
# !rm chr2.fa chr2.fa.gz
# !wget http://hgdownload.soe.ucsc.edu/goldenPath/hg19/chromosomes/chr2.fa.gz
# !gunzip chr2.fa.gz
# # change the chromosome label in the FASTA to match that in Hao's call file
# !sed -ie 's/chr2/2/' chr2.fa

# !rm chr22.fa chr22.fa.gz
# !wget http://hgdownload.soe.ucsc.edu/goldenPath/hg19/chromosomes/chr22.fa.gz
# !gunzip chr22.fa.gz
# !sed -ie 's/chr22/22/' chr22.fa

# !wget https://gist.githubusercontent.com/petermchale/efb4140dcebf6e2a7b59760fab482b5d/raw/480c656cfe791dda4564c290f32c267860c6d7c0/download_google_drive.sh
# !chmod +x download_google_drive.sh
# !./download_google_drive.sh 1yJhUJYfnv_VnRKNPPFkj5BbaG-w9W-WI 100.multicov.int32.bin
# !./download_google_drive.sh 1DIOO65uVUQU9gt6BoiVMGZZCkk_Zu-TL 1.multicov.int32.bin

# !wget http://home.chpc.utah.edu/~u0875014/call.filtered.bed

# !rm limo-truth.vcf
# !wget http://home.chpc.utah.edu/~u6018199/limo-truth.vcf.gz
# !gunzip limo-truth.vcf.gz

  

## SV calls from Hao's naive caller

Hao applied his naive SV caller (of deletions only) to read-fragment-corrected depth-of-coverage data from hg002 (GRCh37; chr2; http://home.chpc.utah.edu/~u0875014/hg002.bam). 




In [5]:
from collections import OrderedDict    
import pandas as pd
pd.set_option('max_colwidth', 100)
import numpy as np 

def calls_to_dataframe(calls):
    chromosomes, starts, ends, sequences, number_of_Ns = [], [], [], [], []
    for call in calls: 
        chromosome, start, end, sequence = call.replace('-', '\t').replace(':', '\t').split('\t')
        chromosomes.append(chromosome)
        starts.append(start)
        ends.append(end)
        sequences.append(sequence)
        number_of_Ns.append(sequence.upper().count('N'))
        
    return pd.DataFrame(OrderedDict([
        ('chromosome', chromosomes),
        ('start', starts),
        ('end', ends),
        ('sequence', sequences),
        ('number_of_Ns', number_of_Ns)
    ]))    

def path(file): 
#     return './' + file
    return '../data/temp/' + file

def get_naive_calls(): 
    call_set = path('call.filtered.bed')
    chr2 = path('chr2.fa')
    calls = !bedtools getfasta -bed $call_set -fi $chr2 -fo /dev/stdout -tab 2>info 
    return calls_to_dataframe(calls)

naive_calls = get_naive_calls()
print('number of naive calls: {}'.format(len(naive_calls)))
naive_calls.head()


number of naive calls: 5840


Unnamed: 0,chromosome,start,end,sequence,number_of_Ns
0,2,1,10183,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...,9999
1,2,10890,11120,ggcgcagagaggcgcaccgcgcccgcgcaggcgcagagaggcgcaccgcgcccgcgcaggcgcagagaggcgcaccgcgcccgcgcaggcgcagag...,0
2,2,133229,133339,aaaattatcctggcatggtggcgtatgcctgtagtcccaaccgcacaggaggcttaggtgggaggatccactgaactcaggaggtggtggatgcag...,0
3,2,197732,198231,GCCTGTTGACGTGGAGACGCGCGGGGCCTTTTCCCTTCTGTCTGCCTTTCCGGCCTGTTGACGTGGAGACGCGCGGGGCGTTTTCCCTTCTGTCTG...,0
4,2,198238,198395,TGTCTGCCTTTCCGGCCTGTTGACGTGGAGACGCGCGGGGCCTTTTCCCTTCTGTCTGCCTTTCCGGCCTGTTGACGTGGAGACGCGCGGGGCCTT...,0


## False calls from naive SV caller

Calls that are not present in a truth set for this sample are deemed "false calls". 


In [6]:
def get_false_naive_calls(): 
    call_set = path('call.filtered.bed')
    false_call_set = path('call.filtered.false.bed')
    truth_set = path('limo-truth.vcf')
    chr2 = path('chr2.fa')
    !bedtools intersect -v -a $call_set -b $truth_set > $false_call_set 2>> info
    calls = !bedtools getfasta -bed $false_call_set -fi $chr2 -fo /dev/stdout -tab 2>> info
    df = calls_to_dataframe(calls)
    df['length'] = df['sequence'].map(lambda s: len(s))
    return df

false_naive_calls = get_false_naive_calls()
print('number of false naive calls: {}'.format(len(false_naive_calls)))
false_naive_calls.head()


number of false naive calls: 4369


Unnamed: 0,chromosome,start,end,sequence,number_of_Ns,length
0,2,1,10183,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...,9999,10182
1,2,10890,11120,ggcgcagagaggcgcaccgcgcccgcgcaggcgcagagaggcgcaccgcgcccgcgcaggcgcagagaggcgcaccgcgcccgcgcaggcgcagag...,0,230
2,2,133229,133339,aaaattatcctggcatggtggcgtatgcctgtagtcccaaccgcacaggaggcttaggtgggaggatccactgaactcaggaggtggtggatgcag...,0,110
3,2,197732,198231,GCCTGTTGACGTGGAGACGCGCGGGGCCTTTTCCCTTCTGTCTGCCTTTCCGGCCTGTTGACGTGGAGACGCGCGGGGCGTTTTCCCTTCTGTCTG...,0,499
4,2,198238,198395,TGTCTGCCTTTCCGGCCTGTTGACGTGGAGACGCGCGGGGCCTTTTCCCTTCTGTCTGCCTTTCCGGCCTGTTGACGTGGAGACGCGCGGGGCCTT...,0,157


## False SV calls containing AT repeats

[Ross et al 2013](https://genomebiology.biomedcentral.com/articles/10.1186/gb-2013-14-5-r51) (Table 2), [Smith et al 2015](https://peerj.com/articles/836/) (Fig 3), and we (CEPH data) have established that sequences containing AT dinucleotide repeats tend to have lower read depths than random sequences. This systematic reduction in depth could have fooled Hao's caller into making a call where no SV exists.  

First, let's get false calls containing AT repeats: 



In [7]:
def sequences_containing_motif(df, motif_):
    motif_in_sequence = df['sequence'].map(lambda sequence: motif_.upper() in sequence.upper())
    return df[motif_in_sequence]

false_naive_calls_containing_AT = sequences_containing_motif(false_naive_calls, 'AT'*10)
false_naive_calls_containing_AT


Unnamed: 0,chromosome,start,end,sequence,number_of_Ns,length
1022,2,87778896,87778998,ATTTTGGCCTGAGTTCTGCCTTGGATTTATGTTTTTTGTtacatatatatatatatatatatatatattttttttttttttttttttttttttttt...,0,102
1779,2,92206632,92222959,ACTCTTAATGATACAACAGCTAAATATAGGTCTAATGCTCATTCCGTGTGGACAACAATAGCAGCCATTCCCACAAATGGCTGATTTGTGGGAAGT...,0,16327
2432,2,110746778,110746954,gcattgtttaaatgagttataagattgagcatgttttcatgtttattggctgcttgtatatcttctttggagaaatgtctattcagattgtctccc...,0,176
2433,2,110746930,110747194,gtatatatatatatatatatatatttttttttttttttttttttgagacggagtcttactctgttgccaggctggagtgcagtggcacgatcttgg...,0,264
3359,2,131299237,131305361,GCCATATCTGACCCTTTCTCTCTGTCTAAGGAAATGGGGCAAGGCAGGGGAAGCAGGCCCAGTGGTGCACAGTGGGCTGTTGGTGTCAGGAGGCTG...,0,6124
3361,2,131306037,131316741,aattcctggactcatacaatcctcccaccttagcctcccaagtactaggattacaggtgtgagccatcacacccggcTCCATTTAGCTATTTCAAA...,0,10704
3796,2,175459741,175459872,ccagggtgggaagatagcttgagcccaagagtttgagaccatcctgggcaaacatagtgggaccctcatctctacaaaaaaaaaaaaaaaaaaaaa...,0,131


Next, let's compute the depths along each call:

In [8]:
from utility import named_tuple
import os 

def fetch_chromosome(df):
    return int(df['chromosome'].iloc[0])

def read_depths_new(args):
    sample, chromosome, per_base, tool, dtype, suffix = os.path.basename(args.depth_file_name).split('.')
    assert (chromosome == 'chr' + str(args.target_chromosome_number))
    assert (per_base == 'per-base')
    assert (tool == args.target_tool)
    assert (suffix == 'bin')
    return np.fromfile(args.depth_file_name, dtype=getattr(np, dtype))

def add_depths(df, depth_file_name): 
    args = named_tuple({
        'depth_file_name': depth_file_name,
        'target_chromosome_number': fetch_chromosome(df), 
        'target_tool': 'mosdepth'
    })
    depths = read_depths_new(args)

    def depths_and_meanDepth(row): 
        start = np.int32(row['start'])
        end = np.int32(row['end'])
        depths_slice = depths[start:end]
        uncorrected_depths = depths_slice/np.mean(depths)
        mean_uncorrected_depths = np.mean(uncorrected_depths)
        return pd.Series(
            [depths_slice, uncorrected_depths, mean_uncorrected_depths], 
            index=['observed_depths', 'uncorrected_depths', 'mean_uncorrected_depths']
        )
    
    series = df.apply(depths_and_meanDepth, axis=1)
    return pd.concat([df, series], axis=1)
    
false_naive_calls_containing_AT = add_depths(
    false_naive_calls_containing_AT, 
    depth_file_name='../data/temp/hg002.chr2.per-base.mosdepth.int32.bin'
)
false_naive_calls_containing_AT


Unnamed: 0,chromosome,start,end,sequence,number_of_Ns,length,observed_depths,uncorrected_depths,mean_uncorrected_depths
1022,2,87778896,87778998,ATTTTGGCCTGAGTTCTGCCTTGGATTTATGTTTTTTGTtacatatatatatatatatatatatatattttttttttttttttttttttttttttt...,0,102,"[14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 10, 10,...","[0.550422702575531, 0.550422702575531, 0.550422702575531, 0.550422702575531, 0.550422702575531, ...",0.391232
1779,2,92206632,92222959,ACTCTTAATGATACAACAGCTAAATATAGGTCTAATGCTCATTCCGTGTGGACAACAATAGCAGCCATTCCCACAAATGGCTGATTTGTGGGAAGT...,0,16327,"[31, 32, 32, 32, 32, 32, 32, 32, 31, 31, 32, 31, 31, 30, 30, 30, 30, 29, 29, 28, 29, 29, 28, 29,...","[1.218793127131533, 1.2581090344583568, 1.2581090344583568, 1.2581090344583568, 1.25810903445835...",1.191399
2432,2,110746778,110746954,gcattgtttaaatgagttataagattgagcatgttttcatgtttattggctgcttgtatatcttctttggagaaatgtctattcagattgtctccc...,0,176,"[27, 28, 28, 29, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, 27, 27, 27, 26, 26, 26, 27,...","[1.0615294978242384, 1.100845405151062, 1.100845405151062, 1.1401613124778858, 1.218793127131533...",0.626374
2433,2,110746930,110747194,gtatatatatatatatatatatatttttttttttttttttttttgagacggagtcttactctgttgccaggctggagtgcagtggcacgatcttgg...,0,264,"[3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 4,...","[0.11794772198047093, 0.11794772198047093, 0.11794772198047093, 0.0786318146536473, 0.0786318146...",0.633671
3359,2,131299237,131305361,GCCATATCTGACCCTTTCTCTCTGTCTAAGGAAATGGGGCAAGGCAGGGGAAGCAGGCCCAGTGGTGCACAGTGGGCTGTTGGTGTCAGGAGGCTG...,0,6124,"[12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,...","[0.47179088792188373, 0.47179088792188373, 0.47179088792188373, 0.47179088792188373, 0.471790887...",0.597128
3361,2,131306037,131316741,aattcctggactcatacaatcctcccaccttagcctcccaagtactaggattacaggtgtgagccatcacacccggcTCCATTTAGCTATTTCAAA...,0,10704,"[15, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 18, 18, 18, 17, 16, 16, 16, 16, 16, 16, 16, 16,...","[0.5897386099023547, 0.5897386099023547, 0.5897386099023547, 0.6290545172291784, 0.6290545172291...",0.818255
3796,2,175459741,175459872,ccagggtgggaagatagcttgagcccaagagtttgagaccatcctgggcaaacatagtgggaccctcatctctacaaaaaaaaaaaaaaaaaaaaa...,0,131,"[11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9...","[0.4324749805950601, 0.4324749805950601, 0.4324749805950601, 0.39315907326823646, 0.393159073268...",0.282714


Next, let's plot the depth profile across each call (in order of increasing effects on depth):

In [9]:
my_df = false_naive_calls_containing_AT[false_naive_calls_containing_AT['length']<200]
my_df

Unnamed: 0,chromosome,start,end,sequence,number_of_Ns,length,observed_depths,uncorrected_depths,mean_uncorrected_depths
1022,2,87778896,87778998,ATTTTGGCCTGAGTTCTGCCTTGGATTTATGTTTTTTGTtacatatatatatatatatatatatatattttttttttttttttttttttttttttt...,0,102,"[14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 10, 10,...","[0.550422702575531, 0.550422702575531, 0.550422702575531, 0.550422702575531, 0.550422702575531, ...",0.391232
2432,2,110746778,110746954,gcattgtttaaatgagttataagattgagcatgttttcatgtttattggctgcttgtatatcttctttggagaaatgtctattcagattgtctccc...,0,176,"[27, 28, 28, 29, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, 27, 27, 27, 26, 26, 26, 27,...","[1.0615294978242384, 1.100845405151062, 1.100845405151062, 1.1401613124778858, 1.218793127131533...",0.626374
3796,2,175459741,175459872,ccagggtgggaagatagcttgagcccaagagtttgagaccatcctgggcaaacatagtgggaccctcatctctacaaaaaaaaaaaaaaaaaaaaa...,0,131,"[11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9...","[0.4324749805950601, 0.4324749805950601, 0.4324749805950601, 0.39315907326823646, 0.393159073268...",0.282714


In [10]:
def g(row, number_rows):     
    return dict(
        label = row['mean_uncorrected_depths'],
        method = 'update', 
        args = [
            {'visible': [False] * number_rows},
            {'title': row['sequence']}
        ]
    ) 
import functools
g_partial = functools.partial(g, number_rows=len(my_df))
list(my_df.apply(g_partial, axis=1))

[{'args': [{'visible': [False, False, False]},
   {'title': 'ATTTTGGCCTGAGTTCTGCCTTGGATTTATGTTTTTTGTtacatatatatatatatatatatatatatttttttttttttttttttttttttttttttttga'}],
  'label': 0.39123182290907854,
  'method': 'update'},
 {'args': [{'visible': [False, False, False]},
   {'title': 'gcattgtttaaatgagttataagattgagcatgttttcatgtttattggctgcttgtatatcttctttggagaaatgtctattcagattgtctccccattttaaaatcgtgttattttcttttttactattgacttgtgaatgttcaaacatgtatatatatatatatatatatat'}],
  'label': 0.6263738871841678,
  'method': 'update'},
 {'args': [{'visible': [False, False, False]},
   {'title': 'ccagggtgggaagatagcttgagcccaagagtttgagaccatcctgggcaaacatagtgggaccctcatctctacaaaaaaaaaaaaaaaaaaaaaatatatatatatatatatatatatatatataatta'}],
  'label': 0.28271438703715934,
  'method': 'update'}]

Unnamed: 0,chromosome,start,end,sequence,number_of_Ns,length,observed_depths,uncorrected_depths,mean_uncorrected_depths
3796,2,175459741,175459872,ccagggtgggaagatagcttgagcccaagagtttgagaccatcctgggcaaacatagtgggaccctcatctctacaaaaaaaaaaaaaaaaaaaaa...,0,131,"[11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9...","[0.4324749805950601, 0.4324749805950601, 0.4324749805950601, 0.39315907326823646, 0.393159073268...",0.282714
1022,2,87778896,87778998,ATTTTGGCCTGAGTTCTGCCTTGGATTTATGTTTTTTGTtacatatatatatatatatatatatatattttttttttttttttttttttttttttt...,0,102,"[14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 10, 10,...","[0.550422702575531, 0.550422702575531, 0.550422702575531, 0.550422702575531, 0.550422702575531, ...",0.391232
2432,2,110746778,110746954,gcattgtttaaatgagttataagattgagcatgttttcatgtttattggctgcttgtatatcttctttggagaaatgtctattcagattgtctccc...,0,176,"[27, 28, 28, 29, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, 27, 27, 27, 26, 26, 26, 27,...","[1.0615294978242384, 1.100845405151062, 1.100845405151062, 1.1401613124778858, 1.218793127131533...",0.626374


In [47]:
my_df

Unnamed: 0,chromosome,start,end,sequence,number_of_Ns,length,observed_depths,uncorrected_depths,mean_uncorrected_depths
1022,2,87778896,87778998,ATTTTGGCCTGAGTTCTGCCTTGGATTTATGTTTTTTGTtacatatatatatatatatatatatatattttttttttttttttttttttttttttt...,0,102,"[14, 14, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 10, 10,...","[0.550422702575531, 0.550422702575531, 0.550422702575531, 0.550422702575531, 0.550422702575531, ...",0.391232
2432,2,110746778,110746954,gcattgtttaaatgagttataagattgagcatgttttcatgtttattggctgcttgtatatcttctttggagaaatgtctattcagattgtctccc...,0,176,"[27, 28, 28, 29, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 28, 27, 27, 27, 26, 26, 26, 27,...","[1.0615294978242384, 1.100845405151062, 1.100845405151062, 1.1401613124778858, 1.218793127131533...",0.626374
3796,2,175459741,175459872,ccagggtgggaagatagcttgagcccaagagtttgagaccatcctgggcaaacatagtgggaccctcatctctacaaaaaaaaaaaaaaaaaaaaa...,0,131,"[11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9...","[0.4324749805950601, 0.4324749805950601, 0.4324749805950601, 0.39315907326823646, 0.393159073268...",0.282714


In [55]:
import plotly
import plotly.graph_objs as go

def f(row): 
    uncorrected_depths = row['uncorrected_depths']    
    positions = np.arange(len(uncorrected_depths))
    return go.Bar(
        visible=False,
        x=positions,
        y=uncorrected_depths
    )

def create_data(df, active): 
    data = list(df.apply(f, axis=1))
    data[active]['visible'] = True
    return data

def g(row, number_rows):  
    nucleotides = list(row['sequence'])
    positions = np.arange(len(nucleotides))    
    return dict(
        label = row['mean_uncorrected_depths'],
        method = 'update', 
        args = [
            {
                'visible': [False] * number_rows
            },
            {
                'xaxis.ticktext': nucleotides, 
                'xaxis.tickvals': positions
            }
        ]
    ) 

def create_steps(df): 
    import functools
    g_partial = functools.partial(g, number_rows=len(df))
    steps = list(df.apply(g_partial, axis=1))
    for counter, step in enumerate(steps):
        step['args'][0]['visible'][counter] = True # Toggle i'th trace to "visible"
    return steps 

def plot_depth_profile(df, active=0):
    df = df.sort_values('mean_uncorrected_depths')
    
    data = create_data(df, active)

    # https://plot.ly/python/ipython-notebook-tutorial/#plot-controls--ipython-widgets    
    sliders = [dict(
        active=active,
        currentvalue={"prefix": "mean_uncorrected_depths: "},
        pad={"t": 50},
        steps=create_steps(df)
    )]
    
    nucleotides = list(df.iloc[active]['sequence'])
    print(len(nucleotides))
    positions = np.arange(len(nucleotides))
    print(len(positions))
    
    layout = go.Layout(
        sliders=sliders,
        yaxis=go.layout.YAxis(
            fixedrange=True
        ),
        xaxis=go.layout.XAxis(
            ticktext=nucleotides,
            tickvals=positions,
            tickangle=0
        )
    )

    fig = go.Figure(data=data, layout=layout) 
    plotly.offline.init_notebook_mode(connected=True)
    plotly.offline.iplot(fig, config={'scrollZoom': True})

plot_depth_profile(my_df)

131
131


In [15]:
# data = [dict(
#         visible = False,
#         line=dict(color='black', width=6),
#         name = '𝜈 = '+str(step),
#         x = np.arange(0,10,0.01),
#         y = np.sin(step*np.arange(0,10,0.01))) for step in np.arange(0,5,0.1)]
# data[10]['visible'] = True

# steps = []
# for i in range(len(data)):
#     step = dict(
#         method = 'update', # 'restyle',
# #         args = ['visible', [False] * len(data)],
#         args = [
#             {'visible': [False] * len(data)},
#             {'title': '{}'.format(i)}
#         ]
#     )
# #     step['args'][1][i] = True # Toggle i'th trace to "visible"
#     step['args'][0]['visible'][i] = True # Toggle i'th trace to "visible"
#     steps.append(step)

# sliders = [dict(
#     active = 10,
#     currentvalue = {"prefix": "Frequency: "},
#     pad = {"t": 50},
#     steps = steps
# )]

# layout = dict(sliders=sliders)
# print(layout)
# fig = dict(data=data, layout=layout)

# plotly.offline.iplot(fig, filename='Sine Wave Slider')

## To Do 

* make calls on more samples so that we can restrict false SV calls to those that recur in multiple individuals
