In [1]:
"""
    Show a simple visualization of variants to reference.
"""
from biograph import BioGraph, Reference, find_variants, visualize

In [2]:
# Import the Biograph
na12878 = BioGraph("/share/datasets/HG002/HG002-NA24385-50x.bg/")

In [3]:
# Import a reference
grch37 = Reference("/share/reference/human_g1k_v37/")

In [4]:
# Find variants
na12878_vs_grch37 = find_variants(na12878, grch37, "1", 245822567, 245824567,)

In [6]:
# Let's see them
for assembly in na12878_vs_grch37:
    visualize(assembly, ascii=False)

                                .
                                .
                                .
1           :   245822860   41 T│ 
1           :   245822861   41 A│ 
1           :   245822862   41 C│ 
                            0  ├───╮  41
1           :   245822863    0 C│   │T 41
                            0  ├───╯  41
1           :   245822864   41 G│ 
1           :   245822865   41 G│ 
1           :   245822866   41 G│ 
                                .
                                .
                                .
1           :   245823075   34 T│ 
1           :   245823076   32 C│ 
1           :   245823077   32 A│ 
                            16  ├───╮  16
1           :   245823078   16 C│   │T 16
                            0  ├───────────╮  16
1           :   245823079    0 A│   │G 16  │G 16
                            0  ├───╯  16   │    
                            16  ├───────────╯  16
1           :   245823080   32 C│ 
1           :   245823081   32 C│ 
1     

In [8]:
# List all of the variants
na12878_vs_grch37[0].variants

[<biograph.Variant 1:245824428_1:1>,
 <biograph.Variant 1:245824292_0:1>,
 <biograph.Variant 1:245823631-_3865_1:245823568->,
 <biograph.Variant 1:245823078_2:2>,
 <biograph.Variant 1:245824141_1:1>,
 <biograph.Variant 1:245823079_1:1>,
 <biograph.Variant 1:245822863_1:1>]

In [9]:
# The large deletion is a structural variant with both breakends in the reverse direction
sv = na12878_vs_grch37[0].variants[2]
print sv.is_structural, sv.left_forward, sv.right_forward

True False False


In [10]:
# Since both breakends are in the reverse direction, show the reverse complement of the sequence
print sv.flip().sequence

CTGAGGATCTAAGTACAGGGGAGTTCTGGGCATGCATTAGTGCTCTAAGGAGAGGCCTTCGCTGGTTAATGTCCCCAGGTTTGGGTTGGTAGCCTACCATTTGGTATCATACTGAGCTCCCTAATTTTGAGCTACATCTAGAAGACCACGTGCCTGGTGCCAAGAAGGACACTTAGACTATGTGTGGATGGAGCTTCATCAGCTGTTCTAATGAACGTTTTGCAAAAGACCATTGTGAGTGAGATGGGAGTTTCTCTGTTTTCAAGCCCAGGGCCCTGCCTGCTGGTGTGTCCCTCTTCTGAGTCATTCAGCCTTGACCCACTCTAGGGAACCGGAGGAATCCACAGCCCAGAGCTCTCCCATGGTGGCCTGTTTGACAGTCACATTCTGAAAAAGCAAACCTCTTGGCTTGATTTCTTGCTGGAATAATGAAACCCAGCTCGGCCCCTGCTTGATAATTGCCTTTGGGAAAAAGGTAATAGCCACCTTGAAACACCATTCAGTTGGACCTTTCCAAAAGCCACGGGTTTTTCTTCACCAAAGTCCTTCATTTGAGCCTATAATCTGTGAATGGAAAGAAGCGATGATGCCTCTAAAAGCCTTTTGGTTGGAGTCAGAGTCACCCTTTGAGCACTGAGTTCTCTTTCCAGGGACCTCTCCCATGTGGTCTTGGAGCCATGGGTGTTGTACAAAAGCAACAGGCCTAGCAGAGCTACCAGTGTGTGAGCGTTTGCTGTGTGAGCCCAGGCTTTCCACAGGACGAAGATTCTCTCACCAGAGTGTGAGCATTTGCAGTGTGAGCCCAGGCTCTCTACAGGACGAAGATTCTCTTACCAGCATGTGAACATTTGCAGTGTGAGCCCAGGCTCTCTACAGGACGAAGATTCTCTCACCAGAGTGTGAGCATTTGCTAGTGTGAGCCCAGGCTCTCTACAGGACGAAGATTCTCTTACCAGCATGTGAGCATTTGCAGTGTGAGCCCAGGCTCTCCACAGGACAA

What is this sequence? We could <a href="http://blast.ncbi.nlm.nih.gov/Blast.cgi">BLAST</a> it to find out <a href="http://www.ncbi.nlm.nih.gov/nucleotide/568815500?report=genbank&log$=nuclalign&blast_rank=1&RID=K493Z58P014">what it really is</a>.

...or see the <a href='Multiple_references.ipynb'>Multiple references</a> demonstration.
