## Reader

In [None]:
import os
import time
import timeit
import vcf
from pprint import pprint
from nest.parsers.vcfReader import Reader
from nest.parsers.vcf import Vcf

In [None]:
vcf_reader = Reader('local/tmp_freebayes_gatk.vcf')
samples = 0
lines = 0
vcf_reader.readheader()
for records in vcf_reader.readvcf():
    print(records.CHROM, records.UID)


In [None]:
def bareread():
    vcf_reader = open('local/MaRS_test/SRR6463548/SRR6463548_variants_merged_annotated.vcf')
    lines = 0
    for line in vcf_reader:
        if line[0] == '#':
            continue
        else:
            lines += 1

%timeit -r 5 -n 1000 bareread()

In [None]:
def oldread():
    lines =0 
    vcf_reader = Vcf.Reader('local/MaRS_test/SRR6463548/SRR6463548_variants_merged_annotated.vcf').read()
    for line in vcf_reader:
        lines += 1
        
%timeit -r 5 -n 1000 oldread()  

In [None]:
def pyvcf():
    vcf_reader = vcf.Reader(filename='local/MaRS_test/SRR6463548/SRR6463548_variants_merged_annotated.vcf')
    lines = 0
    for records in vcf_reader:
        lines += 1

%timeit -r 5 -n 1000 pyvcf()

In [None]:
for key, value in vcf_reader.header.items():
    print('Heading category : {0}'.format(key))
    if key == 'fileFormat':
        print(value)
    elif key == 'info':
        for data in value:
            print(data.id, data.number, data.type, data.description, data.version, data.source)
    elif key == 'filter':
        for data in value:
            print(data.id, data.description)
    elif key == 'format':
        for data in value:
            print(data.id, data.number, data.type, data.description)
    elif key == 'alt':
        for data in value:
            print(data.id, data.description)
    elif key == 'contig':
        for data in value:
            print(data.id, data.url, data.assembly, data.length)
    elif key == 'other':
        for data in value:
            print(data.key, data.value)
    elif key == 'samples' or key == 'fields':
        print(value)


## Writer

In [None]:
from nest.parsers.vcfReader import Reader
from nest.parsers.vcfwriter import Writer
vcf_reader = Reader('local/MaRS_test/SRR6463548/SRR6463548_variants_merged_annotated.vcf')
vcf_reader.readheader()
reader = vcf_reader.readvcf()
writer = Writer('test.vcf')
writer.writeHeaders(vcf_reader.header)
for rec in reader:
    writer.writeRecords(rec)

## Merge

In [1]:
from nest.parsers.vcfmerge import Merge
tmp_dir = 'local'
vcf_dict = {'local/MaRS_test/SRR6463548/SRR6463548_variants_samtools.vcf' : 'samtools',  'local/MaRS_test/SRR6463548/SRR6463548_variants_gatk.vcf' : 'gatk', 'local/MaRS_test/SRR6463548/SRR6463548_variants_freebayes.vcf' : 'freebayes'}
merger = Merge(tmp_dir, vcf_dict)
merger.splitter(list(vcf_dict.keys()))

['local/MaRS_test/SRR6463548/SRR6463548_variants_gatk.vcf'] ['local/MaRS_test/SRR6463548/SRR6463548_variants_freebayes.vcf']
GT ['0/1']
AD [39, 15]
DP [54]
GQ [99]
PL [257, 0, 867]
RO [39]
QR [1480]
AO [15]
QA [571]
GL [-33.415, 0.0, -109.734]
GT ['0/1']
DP [17]
AD [15, 2]
RO [15]
QR [536]
AO [2]
QA [76]
GL [-1.826, 0.0, -41.0327]
GT ['0/1']
AD [13, 11]
DP [24]
GQ [99]
PL [199, 0, 408]
GT ['0/0']
DP [96]
AD [89, 7]
RO [89]
QR [3396]
AO [7]
QA [260]
GL [0.0, -15.4202, -264.722]
GT ['0/1']
AD [5, 5]
DP [10]
GQ [99]
PL [195, 0, 195]
GT ['0/1']
AD [5, 5]
DP [10]
GQ [99]
PL [195, 0, 195]
RO [0]
QR [0]
AO [5]
QA [183]
GL [-14.1411, -1.50515, 0.0]
GT ['0/1']
AD [5, 5]
DP [10]
GQ [99]
PL [195, 0, 195]
GT ['1/1']
DP [6]
AD [0, 6]
RO [0]
QR [0]
AO [6]
QA [229]
GL [-17.8085, -1.80618, 0.0]
GT ['0/1']
AD [18, 5]
DP [23]
GQ [99]
PL [156, 0, 741]
GT ['0/1']
DP [20]
AD [16, 3]
RO [16]
QR [610]
AO [3]
QA [112]
GL [-0.444874, 0.0, -44.683]
GT ['0/1']
DP [21]
AD [17, 4]
RO [17]
QR [655]
AO [4]
QA [154]


QA [2516]
GL [-190.19, 0.0, -52.4581]
PfMDR1 551
GT ['0/1']
PL [255, 0, 255]
AD [52, 70]
DP [122]
GQ [99]
RO [52]
QR [1923]
AO [70]
QA [2690]
GL [-195.442, 0.0, -130.314]
PfMDR1 1924
GT ['0/1']
PL [139, 0, 255]
AD [62, 19]
DP [81]
GQ [99]
RO [58]
QR [2178]
AO [29]
QA [1107]
GL [-67.0867, 0.0, -161.25]
GT ['0/1']
PL [255, 0, 217]
GT ['0/1']
AD [48, 22]
DP [70]
GQ [99]
PL [775, 0, 1921]
PfMDR1 1954
GT ['1/1']
PL [255, 42, 0]
AD [34, 40]
DP [74]
GQ [99]
PfMDR1 2157
GT ['0/1']
PL [255, 0, 249]
AD [26, 48]
DP [74]
GQ [99]
RO [26]
QR [996]
AO [48]
QA [1837]
GL [-136.579, 0.0, -63.9804]


['local/tmp_samtools_gatk_freebayes.vcf']

## Annotate

In [1]:
from nest.parsers.vcfannotate import Annotate
annotater = Annotate()
out_file = annotater.getAnnotation('ref/pfalciparum/mdr.bed', 'local/tmp_samtools_gatk_freebayes.vcf', 'ref/pfalciparum/mdr.fa', 'local/', 'local/MaRS_test/SRR6463548/alignments/output_FM_SR_DD_RG.bam')

GT ['0/1']
PL [150, 0, 239]
AD [39, 15]
DP [54]
GQ [99]
RO [39]
QR [1480]
AO [15]
QA [571]
GL [-33.415, 0.0, -109.734]
GT ['0/1']
DP [17]
AD [15, 2]
RO [15]
QR [536]
AO [2]
QA [76]
GL [-1.826, 0.0, -41.0327]
GT ['0/1']
AD [13, 11]
DP [24]
GQ [99]
PL [199, 0, 408]
GT ['0/0']
DP [96]
AD [89, 7]
RO [89]
QR [3396]
AO [7]
QA [260]
GL [0.0, -15.4202, -264.722]
GT ['0/1']
AD [5, 5]
DP [10]
GQ [99]
PL [195, 0, 195]
GT ['0/1']
AD [5, 5]
DP [10]
GQ [99]
PL [195, 0, 195]
RO [0]
QR [0]
AO [5]
QA [183]
GL [-14.1411, -1.50515, 0.0]
GT ['0/1']
AD [5, 5]
DP [10]
GQ [99]
PL [195, 0, 195]
GT ['1/1']
PL [42, 3, 0]
GT ['0/1']
AD [18, 5]
DP [23]
GQ [99]
PL [156, 0, 741]
GT ['0/1']
DP [20]
AD [16, 3]
RO [16]
QR [610]
AO [3]
QA [112]
GL [-0.444874, 0.0, -44.683]
GT ['0/1']
DP [21]
AD [17, 4]
RO [17]
QR [655]
AO [4]
QA [154]
GL [-2.14301, 0.0, -48.1121]
GT ['0/1']
DP [23]
AD [19, 4]
RO [19]
QR [731]
AO [4]
QA [156]
GL [-1.54664, 0.0, -54.0042]
GT ['0/1']
DP [23]
AD [19, 4]
RO [19]
QR [733]
AO [4]
QA [92]
GL [