In [175]:
from collections import OrderedDict
from collections import Counter

# Essential VCF headers
VCF_HEADER = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT']

def lines(filename):
    with open(filename) as fh:
        for line in fh:
            if line.startswith('#'):
                continue
            else:
                yield parse(line)

# parse all the VCF file data into a ordered dictiorany including each INFO and FORMAT in key:pair
def parse(line):
    result = OrderedDict()
    fields = line.rstrip().split('\t')

    for i, col in enumerate(VCF_HEADER[:7]):
        result[col] = _get_value(fields[i])

    infos = fields[7].split(';')

    for i, info in enumerate(infos, 1):
        try:
            key, value = info.split('=')
        except ValueError:
            key = 'INFO{}'.format(i)
            value = info
        result[key] = get_value(value)
        
    formats_key = fields[8].split(':')
    formats_value = fields[9].split(':')
    
    for i in range(len(formats_key)):
        result[formats_key[i]] = get_value(formats_value[i])

    return result

# if value is not present return NULL
def get_value(value):
    if not value or value in ['', '.', 'NA']:
        return None
    if ',' in value:
        return value.split(',')
    return value

# for getting
def without_keys(d, keys):
    return {x: d[x] for x in d if x not in keys}

In [184]:
# Combine the VCF data
new_vcf_list = []
for fb in lines('freebayes_raw.vcf'):
    for vs in lines('varscan_raw.vcf'):
        
        if fb['CHROM'] == vs['CHROM'] and fb['POS'] == vs['POS']:
            
            common_header_temp = set(fb.keys()).intersection(vs.keys())
            common_header = common_header_temp.symmetric_difference(VCF_HEADER[:7])
            # replace common headers
            for header in common_header:
                fb["freebayes_" + header] = fb.pop(header)
                vs["varscan_" + header] = vs.pop(header)
            # keep only diffrent filds from other vcf
            new_vs = without_keys(vs, VCF_HEADER[:7])
            
            # merge
            new_vcf = OrderedDict(list(fb.items()) + list(new_vs.items()) + list({'TOOL': 'both'}.items()))
            #print(new_vcf)
            new_vcf_list.append(new_vcf)
            break



In [186]:
import csv
keys = new_vcf_list[0].keys()
with open('merged_VCF.csv', 'w') as outfile:
    fp = csv.DictWriter(outfile, keys)
    fp.writeheader()
    fp.writerows(new_vcf_list)