In [1]:
''' Manipulate Multi-Sample VCF files in Pandas (Python3). 
'''
from collections import OrderedDict
import pandas as pd
import numpy as np
import re
import sys
import os
import collections

# path to this %%file
if sys.platform == "win32":
    file_path = os.path.dirname(os.path.abspath("__file__"))+"\\"
else:
    file_path = os.path.dirname(os.path.abspath("__file__"))+"/"

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', -1)


def vcf2dataframe(filename, genotype_level=True, info_level=True, UID=False):
    '''Open a VCF file and returns a MultiIndex pandas.DataFrame.
    Args:
        filename: vcf file to be converted to a dataframe
        genotype_level: place the genotype information into a second level column index
        info_level: place the info IDs into a second level column index
        UID: rename index to a unique variant identifier
    Notes:
        having any of these variables set to True will result
        in the DataFrame being generated very slowly. This is
        especially true for the UID variable.
    '''
    if filename.endswith(".gz"):
        raise IOError("pdVCF does not support compressed VCF files.")

    # get INFO fields and Headers as lists
    VCF_HEADER = get_vcf_header(filename)
    INFO_FIELDS = get_info_fields(filename)
    print(INFO_FIELDS)

    # Count how many comment lines should be skipped.
    comments = count_comments(filename)

    # Return a simple dataframe representative of the VCF data.
    df = pd.read_table(filename, skiprows=comments,
                       names=VCF_HEADER, usecols=range(len(VCF_HEADER)))

    if genotype_level:
        df = get_genotype_data(df)

    if info_level:
        df = get_info_data(df, INFO_FIELDS)

    if UID:
        df = index2UID(df)
    
    # replace empty cells
    df = df.replace("", np.nan)
    
    return df


def get_vcf_header(filename):
    ''' Get all header names from a given VCF file and return as a list.
    '''
    with open(filename) as input_file:
        row = [x for x in input_file if not x.startswith('##')] # skip unwanted headers
        head = next(iter(row))    # generator to deal with the header line only.
        split_head = [re.sub(r'#|\n', '', x) for x in head.split("\t")]
        return split_head


def get_info_fields(filename):
    ''' Get all ID names in the given VCFs INFO field and return as a list.
    '''
    with open(filename) as input_file:
        row = [x for x in input_file if x.startswith('##INFO')]
        info_fields = [x[11:].split(',')[0] for x in row]
        return info_fields



def count_comments(filename):
    ''' Count all lines in a given VCF file starting with #.
    '''
    comments = 0
    with open(filename) as f:
        for line in f:
            if line.startswith('#'):
                comments += 1

            else:
                break

    return comments



def replace_series_strings(df, col, dic, substring):
    ''' Replace the the keys with the items of the given
        dictionary for all strings or substrings in a
        given column
    Args:
        col: column name to replace strings
        dic: dictionary where the key is the string to replace with the item
        substrings: search and replace for either substrings (True) or exact strings (False)
    Returns:
        dataframe with the given column having all the
        entries identified as the key in the given dict
        replaced with the item in said dict
    '''
    if not isinstance(substring, bool):
        raise TypeError("substring argument must equal True or False")

    for string, correction in dic.items():
        if substring is True:
            df[col] = df[col].str.replace(string, correction)
        elif substring is False:
            df[col] = df[col].replace(string, correction, regex=True)

    return df



def get_genotype_data(df):
    ''' Give each sample column a second level column for every field
        detailed in the FORMAT column and return as a MultIndex dataframe.
    Args:
        df: DataFrame deriving from a VCF via vcf2dataframe()
    '''
    # contain the variant columns and the sample names in seperate lists
    normal = list(df.iloc[:, :9].columns)
    samples = list(df.iloc[:, 9:].columns)
    form = df['FORMAT'].str.split(":")[0]

    # These columns remain the same
    remain = pd.DataFrame(data=df[normal].values,
                          columns=pd.MultiIndex.from_tuples(
                            [(x, '') for x in normal] ))

    # list of dataframes where every sample has sub columns for each genotype info
    sams = [pd.DataFrame(data=list(df[col].str.split(':').dropna()),
                         columns=pd.MultiIndex.from_product([ [col], form ]))
            for col in samples]
    
    # add allele balance to sample genotype information
    sams = [calc_AB(sam) for sam in sams]
    
    # concat all dfs in the list
    df2 = pd.concat([remain] + sams, axis=1)

    return df2



def calc_AB(vcf):
    ''' Calculate allele balance for all samples in a given 
        pdVCF. Also converts DP & GQ to numeric type.
    
    Args:
        vcf: pdVCF with genotype information extracted
        
    Notes:
        ONLY WORKS FOR BIALLELIC VARIANTS
    '''
    sam = vcf.columns.levels[0][0]
    vcf[sam,'DP'] = pd.to_numeric(vcf[sam,'DP'])
    vcf[sam,'GQ'] = pd.to_numeric(vcf[sam,'GQ'])
    AD = vcf.xs('AD', level=1, axis=1).unstack().str.split(",", n=2)
    DP = vcf.xs('DP', level=1, axis=1).unstack()
    AB = round(pd.to_numeric(AD.str[1]) / pd.to_numeric(DP), 2)
    vcf[sam, 'AB'] = AB.tolist()
        
    return vcf



def get_info_data(df, info_fields):
    ''' Transform the INFO IDs into second level column indexes and return
        the df as a MultiIndex dataframe.
    Args:
        df: DataFrame deriving from a VCF via vcf2dataframe()
        info_fields: a list of all the INFO IDs in the given df
    '''
    # Alter Info field for some variables that don't work well
    df['INFO'] = df['INFO'].str.replace(";DB",";DB=1")
    df['INFO'] = df['INFO'].str.replace(";STR",";STR=1")

    # identify Info fields not present in each row and fill them with a 0
    for name in info_fields:
        if name == info_fields[0]:
            name = "{}=".format(name)
        else:
            name = ";{}=".format(name)

        not_present = df['INFO'][~df.INFO.str.contains(name)].add("{}0".format(name))
        present = df['INFO'][df.INFO.str.contains(name)]
        df['INFO'] = not_present.append(present).sort_index()

    # reorder INFO fields so they are are all in the same order
    df['INFO'] = df['INFO'].apply(lambda x: ';'.join(elem for elem in sorted(x.split(";"))))

    # remove all info_field names from the info values, starting with the info field with the longest name first
    unwanted = info_fields + ['=']
    unwanted.sort(key=len, reverse=True)
    remove = collections.OrderedDict([(x, '') for x in unwanted])
    df = replace_series_strings(df, col='INFO', dic=remove, substring=True)

    # create a new multi-index df containing only the info fields with the IDs as the second level
    info = pd.DataFrame(data=list(df['INFO'].str.split(';')),
                        columns=pd.MultiIndex.from_product([ ['INFO'], info_fields]))

    if not isinstance(df.columns, pd.MultiIndex):
        # create another multi-index df without the info fields where the second level is nothing
        df = pd.DataFrame(data=df.drop('INFO', axis=1).values,
                              columns=pd.MultiIndex.from_tuples(
                                [(x, '') for x in list(df.drop('INFO', axis=1).columns)] ))

    else:
        df = df.drop('INFO', axis=1)

    variant = df.iloc[:, :8]
    samples = df.iloc[:, 8:]

    # replace the info fields in the original df with the multi-index df created above
    final_df = pd.concat([variant] + [info] + [samples], axis=1)

    # MQ and MQ0 are in the wrong order so name swapping is required
    if 'MQ0' in info_fields and 'MQ' in info_fields:
        final_df = final_df.rename(columns={'MQ0': 'TEMP', 'MQ': 'MQ0'})
        final_df = final_df.rename(columns={'TEMP': 'MQ'})

    return final_df



def index2UID(df):
    ''' Replace the index with a unique variant identifier.
    '''
    if isinstance(df.columns, pd.MultiIndex):
        UID = df.apply(lambda x: "{}:{}-{}/{}".format(x['CHROM'][0], x['POS'][0],
                                                      x['REF'][0], x['ALT'][0]), axis=1)
    else:
         UID = df.apply(lambda x: "{}:{}-{}/{}".format(x['CHROM'], x['POS'],
                                                  x['REF'], x['ALT']), axis=1)

    df['UID'] = UID

    if df['UID'].value_counts()[0] > 1:
        raise ValueError("The UID is not unique.")
        
    # remove UID column (UID now only accessible via index)  
    df = df.drop('UID', axis=1)

    return df.rename(UID)


In [32]:
j = "var.both.taadUkJan2017.filters.vcf"
o = "var.both.taadUkOctoberRepeatLibrariesDec2016.filters.vcf"
t = "26PL1207.oct.noInDels.recode.vcf"
print(type(t))
print(type(test_vcf.vcf))

if isinstance(test_vcf.vcf, pd.DataFrame):
    print(1)

<class 'str'>
<class 'pandas.core.frame.DataFrame'>
1


In [6]:
import time
start = time.time()

vcf2dataframe(j, genotype_level=False, 
             info_level=False, UID=False)

done = time.time() - start
print(done)

1.4514596462249756


In [137]:
vcf = vcf2dataframe(o, genotype_level=True, 
             info_level=True, UID=True)

#vcf[(vcf['21AI1224']['AB'] > 0.3) & (vcf['21AI1224']['DP'] > 49) & (vcf['21AI1224']['GQ'] > 29)]['21AI1224'].head()
vcf['INFO'].apply(pd.to_numeric, errors='ignore')['AF'].head()

#vcf['INFO']['AF'] > 12

['AC', 'AF', 'AN', 'BaseQRankSum', 'DB', 'DP', 'DS', 'Dels', 'ExcessHet', 'FS', 'HaplotypeScore', 'InbreedingCoeff', 'MLEAC', 'MLEAF', 'MQ', 'MQ0', 'MQRankSum', 'QD', 'RPA', 'RU', 'ReadPosRankSum', 'SOR', 'STR', 'set']




1:2160444-A/G    0.341
1:2160448-G/A    0.032
1:2160449-C/T    0.010
1:2160451-G/A    0.021
1:2160455-C/T    0.010
Name: AF, dtype: object

In [97]:

    

vcf = vcf2dataframe(t, genotype_level=True, 
              info_level=True, UID=True)
vcf



Unnamed: 0_level_0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,FORMAT,INFO,INFO,26PL1207,26PL1207,26PL1207,26PL1207,26PL1207,26PL1207,22MI1099,22MI1099,22MI1099,22MI1099,22MI1099,22MI1099
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,AC,LOLZ,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB
1:1000-A/G,1,1000,.,A,G,24.89,LowQual,GT:AD:DP:GQ:PL,2,9,0/1,101,11,3,30197,0.09,./.,,,,,
1:2234385-C/T,1,2234385,.,C,T,9953.67,PASS,GT:AD:DP:GQ:PL,1,90,0/1,511,16,99,2890103,0.69,0/1,101.0,11.0,3.0,30197.0,0.09
1:2235243-C/T,1,2235243,.,C,T,82.06,PASS,GT:AD:DP:GQ:PL,0,0,0/1,41,5,16,160102,0.2,0/1,40100.0,140.0,50.0,15001888.0,0.71
1:2235792-A/G,1,2235792,.,A,G,436.66,QDfilterSNV,GT:AD:DP:GQ:PL,0,0,0/1,9317,110,99,14501894,0.15,0/0,5050.0,100.0,89.0,1000100.0,0.5
1:2235901-A/G,1,2235901,.,A,G,24.89,LowQual,GT:AD:DP:GQ:PL,0,0,0/1,101,11,3,30197,0.09,1/1,200200.0,400.0,90.0,1000100.0,0.5
"1:2239999-A/G,T",1,2239999,.,A,"G,T",24.89,LowQual,GT:AD:DP:GQ:PL,0,0,0/1,1014,15,3,30197,0.07,0/1,10020100.0,220.0,90.0,10005001000.0,0.09
1:2235501-A/GGT,1,2235501,.,A,GGT,24.89,LowQual,GT:AD:DP:GQ:PL,0,0,0/1,101,11,3,30197,0.09,1/1,200200.0,400.0,90.0,1000100.0,0.5
1:2239901-GA/G,1,2239901,.,GA,G,24.89,LowQual,GT:AD:DP:GQ:PL,0,0,0/1,101,11,3,30197,0.09,1/1,200200.0,400.0,90.0,1000100.0,0.5


In [207]:
import numpy as np
import logging


class VCF(object):
    ''' A VCF file stored as a Pandas DataFrame
    
    Atrributes:
        vcf: vcf file to be converted to a Pandas DataFrame or a VCF object
        genotype_level: place the genotype information into a second level column index
        info_level: place the info IDs into a second level column index
        UID: rename index to a unique variant identifier
        
    Notes:
        it is not recommended to alter the boolean attributes when initilising
        a VCF object, as it may break method functionality and limit data 
        manipulation of the resulting object.
    '''
    def __init__(self, vcf, genotype_level=True, info_level=True, UID=True):
        
        if isinstance(vcf, pd.DataFrame):
            self.vcf = vcf
        else:
            logging.info("Creating pdVCF DataFrame...")
            self.vcf = vcf2dataframe(vcf, genotype_level=genotype_level,
                                     info_level=info_level, UID=UID)
    
    def get_samples(self):
        ''' Get all sample names within the vcf and return as a list
        '''
        return self.vcf.xs('DP', level=1, axis=1).columns.tolist()


    def get_genotype(self, gen):
        ''' Access specific genotype information across samples
            in the vcf.
        Args:
            gen: genotype attribute of interest in string format e.g 'DP'
        '''
        return self.vcf.xs(gen, level=1, axis=1)

    
    def get_info(self, info):
        ''' Return INFO field of interest e.g. 'AC'
        '''
        return self.vcf['INFO'][info]
    
    


class FilterVCF(VCF):
    ''' A VCF file which can be readily filtered
    
    Attributes:
        vcf: vcf file to be converted to a Pandas DataFrame or a VCF object
    '''
    def __init__(self, vcf):
        VCF.__init__(self, vcf)


    def subset(self, sams, exclude_ref=False, remove_uncalled=True):
        ''' Subset a multisample VCF by a given samples.
        Args:
            vcf: Pandas DataFrame VCF
            sams: list of samples to subset the vcf for
            exlude_ref: remove variant if all GT values for subset are 0/0
            remove_uncalled: remove variant if all GT values for subset are ./.

        Returns:
            subsetted Pandas DataFrame VCF
        '''
        # split variant and genotype information 
        genotype = self.vcf[sams]
        num_info = self.vcf['INFO'].columns.shape[0]
        variant = self.vcf.ix[:,:8+num_info]

        GT = genotype.xs('GT', level=1, axis=1)
        uncalled= []

        if remove_uncalled:
            uncalled = GT[GT[sams] == './.'].dropna().index.tolist() 

        if exclude_ref:
            uncalled += GT[GT[sams] == '0/0'].dropna().index.tolist() 

        sub = pd.concat([variant, genotype], axis=1)
        self.vcf = sub.drop(uncalled)
        return self.vcf

    
    def filter_genotype(self, minDP=None, minGQ=None, minAB=None):
        ''' Filter for variants in which all the samples in the given vcf 
            meet the minimum genotype values given.
        
        Args:
            minDP: minimum variant depth
            minGQ: minimum genotype quality
            minAB: minimum allele balance
        
        Notes:
            Doesn't handle multiallelic information properly and
            will filter for this first ALT value e.g. if DP = 12,1,100
            it will be filtered out even if minDP=30.
        '''
        # split variant and genotype information
        num_info = self.vcf['INFO'].columns.shape[0]
        variant = self.vcf.ix[:,:8+num_info]
        genotype = self.vcf.ix[:,9+num_info:]

        # store all variants that don't meet the minimum value given for the args here
        below_min = []
        
        if minDP:
            DP = genotype.xs('DP', level=1, axis=1).fillna(0)
            above_min = DP[DP >= minDP] 
            below_min += DP[above_min.isnull().any(axis=1)].index.tolist()
            
        if minGQ:
            GQ = genotype.xs('GQ', level=1, axis=1).fillna(0)
            above_min = GQ[GQ >= minGQ]
            below_min +=  GQ[above_min.isnull().any(axis=1)].index.tolist()
        
        if minAB:
            AB = genotype.xs('AB', level=1, axis=1).fillna(0)
            above_min = AB[AB >= minAB]
            below_min +=  AB[above_min.isnull().any(axis=1)].index.tolist()
        
        # remove variants that don't meet the requirements from the vcf
        self.vcf = self.vcf.drop(below_min)
        return self.vcf
    
    
    def filter_info(self, field, value):
        ''' Filter for variants that are above the given value
            (if value is number) or are equal to the given value
            (if value is string).
            
        Args:
            field: INFO field of interest
            value: string or int value to test the field with
            
        Notes:
            Doesn't handle multiallelic information properly and
            will filter any thing that has this e.g. if AC = 12,34
            it will be filtered out even if value=1 as the AC with
            a comma cant be converted to an int so becomes np.nan
        '''
        if isinstance(value, int) or isinstance(value, float):
            mask = pd.to_numeric(self.vcf['INFO'][field], errors='coerrce') >= value
            self.vcf = self.vcf[mask]
            return self.vcf
        
        elif isinstance(value, str):
            mask= self.vcf['INFO'][field] == value
            self.vcf = self.vcf[mask]
            return self.vcf
        
    
    def remove_indels(self):
        ''' Remove indels from vcf.
        '''
        alt_mask = (self.vcf.ALT.str.len() == 1) | (self.vcf.ALT.str.contains(','))
        ref_mask = (self.vcf.REF.str.len() == 1) | (self.vcf.REF.str.contains(','))
        return(self.vcf[alt_mask & ref_mask])
    
    
    def biallelic(self):
        ''' Filter for biallelic variants only.
        '''
        self.vcf = self.vcf[self.vcf.ALT.str.split(',').str.len() == 1]
        return self.vcf
    
    
    def multiallelic(self):
        ''' Filter for multiallelic variants only.
        '''
        self.vcf = self.vcf[self.vcf.ALT.str.split(',').str.len() > 1]
        return self.vcf
    
    
    def positions(self, positions, include=True):
        ''' Include or exclude variants in the given position(s)
            or position ranges.
        
        Args:
            positions: a position, position ranges or list of the two e.g.
                            ['1:2234385', '1:2235901-2240000']
            include: include in the vcf if true, otherwise exclude
        '''
        positions = positions if isinstance(positions, list) else [positions]
        
        # get the variants named in positions
        selected_variants = []
        
        # get the indexes of the varants within pos from the vcf 
        for pos in positions:
            pos = FilterVCF.pos2range(pos)
            chrom, start, end = [int(x) for x in re.split(r'[:-]', pos)]
            mask = (self.vcf['CHROM'] == chrom) & (self.vcf['POS'] >= start) & (self.vcf['POS'] <= end)
            variants = self.vcf[mask].index.tolist()
            selected_variants.append(variants)
            
        # flatten
        selected_variants = [y for x in selected_variants for y in x]
        
        if not include:
            selected_variants = list(set(self.vcf.index.tolist()) - set(selected_variants))
            
        self.vcf = self.vcf.loc[FilterVCF.natural_sort(selected_variants)] 

        return self.vcf
    
    
    @staticmethod
    def natural_sort(l): 
        ''' Sort a list in human natural alphanumerical
            order.
        '''
        convert = lambda text: int(text) if text.isdigit() else text.lower() 
        alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
        return sorted(l, key = alphanum_key)
    
    
    @staticmethod
    def pos2range(pos, num=0): 
        ''' Alter a genomic position to a genomic range
            e.g. 2:1234 becomes 2:1234-1234
        '''
        if "-" not in pos:
            split = pos.split(":")
            pos = "{}:{}-{}".format(split[0], split[1], int(split[1])+num)
            return pos
        else:
            return pos
        
        
    

def common_variants(vcf1, vcf2):
    ''' Find common variants between two VCF objects
        and return common variants in a list.
    '''
    return list(set.intersection(set(vcf1.vcf.index.values), set(vcf2.vcf.index.values)))    
        
    

    
    #artifacts = guv.check_vcf([(x.split("-")[0], x.split("-")[1]) for x in mismatch], dbSNP)     
    #mismatch = list(set(mismatch)-set(artifacts))
    return len(match)



In [225]:
first = VCF(t)
second = VCF(first.vcf.head(3))

common_indexes = common_variants(first, second)

gt1 = second.get_genotype('GT')
gt2 = first.get_genotype('GT')[2:]


match = []
mismatch = []
for sam in first.get_samples():
    
    gt1_dict = gt1.to_dict().get(sam)
    gt2_dict = gt2.to_dict().get(sam)
    
    for n in range(0, len(common_indexes)):
        if gt1_dict.get(common_indexes[n]) != gt2_dict.get(common_indexes[n]):
            mismatch.append(common_indexes[n])
        else:
            match.append(common_indexes[n])
    
mismatch

#first.vcf.xs('DP', level=1, axis=1).columns.tolist()

#first.get_samples()

['AC', 'LOLZ']




['1:2234385-C/T', '1:1000-A/G', '1:2234385-C/T', '1:1000-A/G']

In [221]:
print(gt1)
gt2

              26PL1207 22MI1099
1:1000-A/G     0/1      ./.    
1:2234385-C/T  0/1      0/1    
1:2235243-C/T  0/1      0/1    


Unnamed: 0,26PL1207,22MI1099
1:2235243-C/T,0/1,0/1
1:2235792-A/G,0/1,0/0
1:2235901-A/G,0/1,1/1
"1:2239999-A/G,T",0/1,0/1
1:2235501-A/GGT,0/1,1/1
1:2239901-GA/G,0/1,1/1


In [119]:
v = VCF(o, genotype_level=True, 
    info_level=True, UID=True)


test_vcf = FilterVCF(v.vcf)

#mask = (test_vcf.vcf.REF.str.len() == 1) & (test_vcf.vcf.ALT.str.len() == 1)
#test_vcf.vcf[mask]
#test_vcf.vcf[test_vcf.vcf.ALT.str.split(',').str.len() == 1]
#test_vcf.remove_indels()
#test_vcf.vcf['INFO'].apply(pd.to_numeric)
#test_vcf.get_genotype('DP')['22MI1099']

range_pos = ['20:45354612-45364737', '1:2160444']


test_vcf.positions(range_pos, include=True)

['AC', 'AF', 'AN', 'BaseQRankSum', 'DB', 'DP', 'DS', 'Dels', 'ExcessHet', 'FS', 'HaplotypeScore', 'InbreedingCoeff', 'MLEAC', 'MLEAF', 'MQ', 'MQ0', 'MQRankSum', 'QD', 'RPA', 'RU', 'ReadPosRankSum', 'SOR', 'STR', 'set']




Unnamed: 0_level_0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,FORMAT,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,INFO,21AI1224,21AI1224,21AI1224,21AI1224,21AI1224,21AI1224,21IB1281,21IB1281,21IB1281,21IB1281,21IB1281,21IB1281,21IL1279,21IL1279,21IL1279,21IL1279,21IL1279,21IL1279,21JH1219,21JH1219,21JH1219,21JH1219,21JH1219,21JH1219,21JL1278,21JL1278,21JL1278,21JL1278,21JL1278,21JL1278,21KC1280,21KC1280,21KC1280,21KC1280,21KC1280,21KC1280,21PB1303,21PB1303,21PB1303,21PB1303,21PB1303,21PB1303,21PS1217,21PS1217,21PS1217,21PS1217,21PS1217,21PS1217,21QA1105,21QA1105,21QA1105,21QA1105,21QA1105,21QA1105,21WG1262,21WG1262,21WG1262,21WG1262,21WG1262,21WG1262,22AW1185,22AW1185,22AW1185,22AW1185,22AW1185,22AW1185,22BB1282,22BB1282,22BB1282,22BB1282,22BB1282,22BB1282,22BW1284,22BW1284,22BW1284,22BW1284,22BW1284,22BW1284,22GK1188,22GK1188,22GK1188,22GK1188,22GK1188,22GK1188,22KF1283,22KF1283,22KF1283,22KF1283,22KF1283,22KF1283,22VW1151,22VW1151,22VW1151,22VW1151,22VW1151,22VW1151,26BH1161,26BH1161,26BH1161,26BH1161,26BH1161,26BH1161,26BS1176,26BS1176,26BS1176,26BS1176,26BS1176,26BS1176,26BW1201,26BW1201,26BW1201,26BW1201,26BW1201,26BW1201,26CB1202,26CB1202,26CB1202,26CB1202,26CB1202,26CB1202,26CS1154,26CS1154,26CS1154,26CS1154,26CS1154,26CS1154,26CS1165,26CS1165,26CS1165,26CS1165,26CS1165,26CS1165,26DA1193,26DA1193,26DA1193,26DA1193,26DA1193,26DA1193,26DL1132,26DL1132,26DL1132,26DL1132,26DL1132,26DL1132,26ED1139,26ED1139,26ED1139,26ED1139,26ED1139,26ED1139,26EE1157,26EE1157,26EE1157,26EE1157,26EE1157,26EE1157,26ES1140,26ES1140,26ES1140,26ES1140,26ES1140,26ES1140,26GH1181,26GH1181,26GH1181,26GH1181,26GH1181,26GH1181,26GW1196,26GW1196,26GW1196,26GW1196,26GW1196,26GW1196,26ID1179,26ID1179,26ID1179,26ID1179,26ID1179,26ID1179,26IT1169,26IT1169,26IT1169,26IT1169,26IT1169,26IT1169,26JB1205,26JB1205,26JB1205,26JB1205,26JB1205,26JB1205,26JG1215,26JG1215,26JG1215,26JG1215,26JG1215,26JG1215,26JM1155,26JM1155,26JM1155,26JM1155,26JM1155,26JM1155,26JW1182,26JW1182,26JW1182,26JW1182,26JW1182,26JW1182,26KE1137,26KE1137,26KE1137,26KE1137,26KE1137,26KE1137,26LG1175,26LG1175,26LG1175,26LG1175,26LG1175,26LG1175,26LM1174,26LM1174,26LM1174,26LM1174,26LM1174,26LM1174,26MH1213,26MH1213,26MH1213,26MH1213,26MH1213,26MH1213,26PH1135,26PH1135,26PH1135,26PH1135,26PH1135,26PH1135,26PL1207,26PL1207,26PL1207,26PL1207,26PL1207,26PL1207,26RG1162,26RG1162,26RG1162,26RG1162,26RG1162,26RG1162,26RN1152,26RN1152,26RN1152,26RN1152,26RN1152,26RN1152,26SH1138,26SH1138,26SH1138,26SH1138,26SH1138,26SH1138,26SH1192,26SH1192,26SH1192,26SH1192,26SH1192,26SH1192,26SS1216,26SS1216,26SS1216,26SS1216,26SS1216,26SS1216,26TS1203,26TS1203,26TS1203,26TS1203,26TS1203,26TS1203,Blank-0-161011,Blank-0-161011,Blank-0-161011,Blank-0-161011,Blank-0-161011,Blank-0-161011
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,AC,AF,AN,BaseQRankSum,DB,DP,DS,Dels,ExcessHet,FS,HaplotypeScore,InbreedingCoeff,MLEAC,MLEAF,MQ0,MQ,MQRankSum,QD,RPA,RU,ReadPosRankSum,SOR,STR,set,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB,GT,AD,DP,GQ,PL,AB
1:2160444-A/G,1,2160444,.,A,G,391.52,PASS,GT:AD:DP:GQ:PL,15,0.341,44,-2.391,0,53,0,0.0,0.4046,0.0,1.2037,-0.1143,12,0.273,0,59.44,0.0,27.96,0,0,-3.524,0.237,0,variant,0/0,10.0,1.0,3.0,335.0,0.0,0/1,12.0,3.0,25.0,25028.0,0.67,0/1,11.0,2.0,24.0,25024.0,0.5,./.,,,,,,0/0,31.0,5.0,9.0,998.0,0.2,./.,,,,,,0/0,30.0,4.0,6.0,669.0,0.0,0/0,10.0,1.0,3.0,330.0,0.0,./.,,,,,,./.,,,,,,./.,,,,,,./.,.,1.0,,,,0/0,10.0,1.0,3.0,334.0,0.0,0/1,21.0,3.0,23.0,23056.0,0.33,./.,,,,,,0/0,10.0,1.0,3.0,334.0,0.0,./.,,,,,,1/1,1.0,1.0,3.0,3230.0,1.0,0/1,11.0,2.0,26.0,26029.0,0.5,./.,,,,,,0/0,20.0,2.0,3.0,331.0,0.0,./.,.,2.0,,,,./.,,,,,,./.,,,,,,./.,,,,,,1/1,1.0,1.0,3.0,3130.0,1.0,0/1,11.0,2.0,25.0,25029.0,0.5,./.,.,1.0,,,,./.,,,,,,0/1,11,5.0,25.0,25029,0.2,./.,,,,,,./.,,,,,,./.,,,,,,0/0,10.0,2.0,3.0,335.0,0.0,./.,,,,,,./.,,,,,,0/0,10.0,1.0,3.0,335.0,0.0,0/1,11,2.0,25.0,25028.0,0.5,1/1,1.0,1.0,3.0,3130.0,1.0,./.,,,,,,./.,,,,,,0/0,10.0,2.0,3.0,335.0,0.0,1/1,1.0,1.0,3.0,3130.0,1.0,0/0,20.0,2.0,3.0,335.0,0.0,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,
20:45354702-GGA/G,20,45354702,.,GGA,G,153.27,PASS,GT:AD:DP:GQ:PL,11,0.12,92,1.262,0,117,0,0.0,0.2512,0.0,0.0,-0.0377,11,0.12,0,59.46,0.894,6.39,21,GA,-2.852,0.04,1,variant2,0/0,50.0,5.0,15.0,15219.0,0.0,0/0,10.0,1.0,3.0,353.0,0.0,0/0,10.0,3.0,3.0,339.0,0.0,0/1,41.0,5.0,30.0,300202.0,0.2,0/0,10.0,1.0,3.0,316.0,0.0,0/1,11.0,2.0,20.0,39020.0,0.5,0/0,40.0,4.0,12.0,12167.0,0.0,0/0,30.0,4.0,8.0,859.0,0.0,0/0,10.0,1.0,2.0,28.0,0.0,0/0,20.0,2.0,6.0,674.0,0.0,0/0,40.0,4.0,12.0,12162.0,0.0,0/0,20,2.0,6.0,687.0,0.0,0/0,10.0,1.0,3.0,333.0,0.0,0/0,30.0,4.0,9.0,9109.0,0.0,0/0,10,1.0,3.0,350.0,0.0,0/1,21.0,3.0,36.0,36075.0,0.33,0/0,10.0,1.0,3.0,352.0,0.0,0/1,41.0,5.0,4.0,40206.0,0.2,0/1,11.0,2.0,15.0,15048.0,0.5,0/0,20.0,2.0,6.0,668.0,0.0,0/0,20.0,2.0,6.0,654.0,0.0,0/0,20,2.0,6.0,6107.0,0.0,0/0,20.0,2.0,6.0,6105.0,0.0,0/0,10.0,1.0,3.0,355.0,0.0,0/0,10.0,1.0,3.0,354.0,0.0,0/0,20.0,2.0,6.0,6101.0,0.0,1/1,1.0,2.0,3.0,4530.0,0.5,0/0,40,4.0,12.0,12216.0,0.0,0/0,10.0,1.0,3.0,354.0,0.0,0/0,20,3.0,6.0,669,0.0,0/0,60.0,6.0,17.0,17247.0,0.0,0/0,30.0,4.0,9.0,9140.0,0.0,0/0,10.0,1.0,3.0,356.0,0.0,0/0,10.0,1.0,3.0,351.0,0.0,0/0,40.0,4.0,12.0,12184.0,0.0,0/0,30,3.0,9.0,9136.0,0.0,0/0,20.0,3.0,6.0,663.0,0.0,0/0,30,3.0,9.0,9116.0,0.0,0/0,20.0,3.0,6.0,694.0,0.0,0/0,30,3.0,9.0,9135.0,0.0,0/1,11.0,2.0,35.0,39035.0,0.5,1/1,1.0,1.0,3.0,4530.0,1.0,0/1,21.0,3.0,12.0,12080.0,0.33,0/0,10.0,1.0,3.0,352.0,0.0,0/0,40.0,4.0,12.0,12178.0,0.0,0/0,20.0,2.0,6.0,682.0,0.0,./.,,,,,,./.,,,,,
20:45354715-T/G,20,45354715,.,T,G,136.69,PASS,GT:AD:DP:GQ:PL,5,0.104,48,2.207,0,60,0,0.0,0.4885,0.0,2.2619,-0.0934,4,0.083,0,59.71,0.485,11.39,0,0,0.855,0.058,0,variant,0/0,20.0,2.0,3.0,330.0,0.0,./.,,,,,,1/1,1.0,1.0,3.0,3030.0,1.0,0/0,40.0,4.0,12.0,12113.0,0.0,./.,,,,,,0/0,20.0,2.0,6.0,658.0,0.0,0/0,10.0,1.0,3.0,328.0,0.0,0/1,31.0,4.0,21.0,21050.0,0.25,0/0,10.0,1.0,3.0,329.0,0.0,0/0,20.0,2.0,3.0,329.0,0.0,0/0,20.0,3.0,6.0,657.0,0.0,./.,,,,,,./.,,,,,,0/0,10.0,1.0,3.0,330.0,0.0,./.,.,1.0,,,,0/0,10.0,1.0,3.0,330.0,0.0,./.,,,,,,0/1,41.0,6.0,19.0,19080.0,0.17,0/0,30.0,3.0,6.0,657.0,0.0,./.,,,,,,./.,,,,,,0/1,11,2.0,23.0,24023.0,0.5,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,0/0,10,1.0,3.0,331.0,0.0,./.,,,,,,0/0,10,1.0,3.0,330,0.0,0/0,20.0,3.0,6.0,658.0,0.0,0/0,20.0,2.0,3.0,328.0,0.0,./.,,,,,,./.,,,,,,0/0,20.0,2.0,3.0,330.0,0.0,./.,,,,,,0/0,20.0,2.0,6.0,655.0,0.0,./.,.,1.0,,,,./.,,,,,,0/0,10,1.0,3.0,329.0,0.0,0/0,10.0,1.0,3.0,329.0,0.0,0/0,30.0,3.0,9.0,983.0,0.0,0/0,20.0,3.0,6.0,655.0,0.0,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,
20:45354724-ACTC/A,20,45354724,.,ACTC,A,218.97,PASS,GT:AD:DP:GQ:PL,7,0.269,26,0.713,0,19,0,0.0,0.3951,0.0,0.0,-0.046,7,0.269,0,60.0,0.0,30.07,21,CTC,2.406,0.223,1,variant2,1/1,1.0,1.0,3.0,5430.0,1.0,./.,,,,,,0/0,10.0,1.0,3.0,362.0,0.0,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,0/0,10.0,1.0,3.0,360.0,0.0,0/0,10.0,1.0,3.0,336.0,0.0,0/0,20.0,2.0,6.0,6123.0,0.0,0/0,10.0,1.0,3.0,363.0,0.0,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,0/0,10.0,1.0,3.0,364.0,0.0,./.,,,,,,./.,,,,,,0/1,12.0,3.0,54.0,99054.0,0.67,./.,,,,,,./.,,,,,,./.,,,,,,0/1,11.0,2.0,24.0,24056.0,0.5,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,0/1,11,2.0,47.0,48047,0.5,./.,,,,,,0/0,10.0,1.0,3.0,325.0,0.0,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,./.,,,,,,1/1,1.0,1.0,3.0,5430.0,1.0,./.,,,,,,./.,,,,,,0/0,20.0,2.0,6.0,6127.0,0.0,./.,,,,,,./.,,,,,,./.,,,,,
20:45354737-T/C,20,45354737,.,T,C,563.53,PASS,GT:AD:DP:GQ:PL,19,0.279,68,-2.096,0,81,0,0.0,0.1224,0.0,0.5162,-0.0226,16,0.235,0,60.0,0.0,26.83,0,0,-4.059,0.185,0,variant,0/0,20.0,2.0,6.0,665.0,0.0,0/0,10.0,1.0,3.0,331.0,0.0,0/1,11.0,3.0,25.0,25027.0,0.33,./.,,,,,,0/1,21.0,3.0,22.0,22052.0,0.33,0/0,10.0,1.0,3.0,333.0,0.0,./.,,,,,,0/0,30.0,3.0,9.0,997.0,0.0,./.,,,,,,0/0,30.0,4.0,9.0,996.0,0.0,0/0,10.0,2.0,3.0,321.0,0.0,0/0,10,2.0,3.0,329.0,0.0,./.,,,,,,0/0,10.0,1.0,3.0,332.0,0.0,./.,,,,,,0/1,11.0,2.0,25.0,25027.0,0.5,0/1,13.0,4.0,19.0,80019.0,0.75,0/0,30.0,4.0,9.0,997.0,0.0,1/1,2.0,2.0,6.0,6360.0,1.0,0/1,23.0,5.0,49.0,49055.0,0.6,0/0,30.0,4.0,9.0,991.0,0.0,0/0,10,1.0,3.0,331.0,0.0,0/0,10.0,1.0,3.0,331.0,0.0,./.,,,,,,0/0,30.0,3.0,6.0,662.0,0.0,0/0,10.0,1.0,3.0,331.0,0.0,0/0,20.0,3.0,6.0,661.0,0.0,0/0,20,2.0,6.0,661.0,0.0,./.,,,,,,0/1,32,6.0,46.0,46080,0.33,./.,,,,,,./.,,,,,,./.,,,,,,0/0,10.0,1.0,3.0,331.0,0.0,0/0,10.0,1.0,3.0,331.0,0.0,./.,,,,,,1/1,1.0,1.0,3.0,3230.0,1.0,./.,,,,,,1/1,2.0,2.0,6.0,6160.0,1.0,1/1,01,1.0,3.0,3230.0,1.0,0/0,10.0,1.0,3.0,333.0,0.0,0/1,12.0,3.0,24.0,54024.0,0.67,0/1,21.0,3.0,22.0,22055.0,0.33,./.,,,,,,0/0,20.0,3.0,6.0,665.0,0.0,1/1,1.0,1.0,3.0,3130.0,1.0,0/1,11.0,2.0,25.0,25025.0,0.5,./.,,,,,
20:45354829-C/G,20,45354829,rs79849424,C,G,3152.4,PASS,GT:AD:DP:GQ:PL,1,0.011,94,12.87,1,11749,0,0.0,3.0103,0.0,13.8239,-0.0108,1,0.011,0,59.98,-0.128,12.61,0,0,0.415,0.47,0,variant,0/0,2490.0,250.0,99.0,7077220.0,0.0,0/0,2490.0,250.0,99.0,7067077.0,0.0,0/0,2490.0,250.0,99.0,7037053.0,0.0,0/0,2500.0,250.0,99.0,7157171.0,0.0,0/0,2500.0,250.0,99.0,7067028.0,0.0,0/0,2500.0,250.0,99.0,7157132.0,0.0,0/0,2490.0,250.0,99.0,7157093.0,0.0,0/1,119131.0,250.0,99.0,319202706.0,0.52,0/0,2490.0,249.0,99.0,6886917.0,0.0,0/0,2490.0,250.0,99.0,7227420.0,0.0,0/0,2500.0,250.0,99.0,7127107.0,0.0,0/0,2500,250.0,99.0,7216918.0,0.0,0/0,2500.0,250.0,99.0,7157141.0,0.0,0/0,2470.0,249.0,99.0,7127108.0,0.0,0/0,2500,250.0,99.0,7247232.0,0.0,0/0,2500.0,250.0,99.0,7217124.0,0.0,0/0,2500.0,250.0,99.0,7187143.0,0.0,0/0,2500.0,250.0,99.0,7157201.0,0.0,0/0,2490.0,250.0,99.0,7217222.0,0.0,0/0,2490.0,250.0,99.0,6916927.0,0.0,0/0,2490.0,250.0,99.0,7187173.0,0.0,0/0,2490,250.0,99.0,7277213.0,0.0,0/0,2500.0,250.0,99.0,7157279.0,0.0,0/0,2500.0,250.0,99.0,5776458.0,0.0,0/0,2500.0,250.0,99.0,7157108.0,0.0,0/0,2490.0,249.0,99.0,7187056.0,0.0,0/0,2490.0,249.0,99.0,7006979.0,0.0,0/0,2491,250.0,99.0,6637038.0,0.0,0/0,2500.0,250.0,99.0,7277264.0,0.0,0/0,2491,250.0,99.0,7127115,0.0,0/0,2500.0,250.0,99.0,7187141.0,0.0,0/0,2490.0,250.0,99.0,7217212.0,0.0,0/0,2480.0,249.0,99.0,7127156.0,0.0,0/0,2500.0,250.0,99.0,7197290.0,0.0,0/0,2500.0,250.0,99.0,7157117.0,0.0,0/0,2500,250.0,99.0,7197339.0,0.0,0/0,2490.0,250.0,99.0,7127033.0,0.0,0/0,2500,250.0,99.0,6766873.0,0.0,0/0,2480.0,249.0,99.0,7107231.0,0.0,0/0,2500,250.0,99.0,7127044.0,0.0,0/0,2490.0,250.0,99.0,7187169.0,0.0,0/0,2500.0,250.0,99.0,7097112.0,0.0,0/0,2500.0,250.0,99.0,7187271.0,0.0,0/0,2500.0,250.0,99.0,7307124.0,0.0,0/0,2500.0,250.0,99.0,7127091.0,0.0,0/0,2490.0,249.0,99.0,7187136.0,0.0,0/0,2500.0,250.0,99.0,7067013.0,0.0,./.,,,,,
20:45354973-G/A,20,45354973,rs76315093,G,A,3678.37,PASS,GT:AD:DP:GQ:PL,1,0.01,96,17.396,1,11750,0,0.0,3.0103,0.0,12.8205,-0.0126,1,0.01,0,59.99,0.027,14.71,0,0,1.167,0.491,0,variant,0/0,2500.0,250.0,99.0,6747229.0,0.0,0/0,2500.0,250.0,99.0,6016729.0,0.0,0/0,2500.0,250.0,99.0,6917312.0,0.0,0/0,2500.0,250.0,99.0,6887237.0,0.0,0/0,2491.0,250.0,99.0,6557197.0,0.0,0/0,2500.0,250.0,99.0,6406983.0,0.0,0/0,2480.0,249.0,99.0,6346829.0,0.0,0/0,2500.0,250.0,99.0,7407520.0,0.0,0/0,2490.0,250.0,99.0,6717167.0,0.0,0/0,2500.0,250.0,99.0,6807363.0,0.0,0/0,2500.0,250.0,99.0,6837224.0,0.0,0/0,2500,250.0,99.0,6647081.0,0.0,0/0,2490.0,250.0,99.0,6466973.0,0.0,0/0,2481.0,250.0,99.0,6737056.0,0.0,0/0,2491,250.0,99.0,6136951.0,0.0,0/0,2500.0,250.0,99.0,6687147.0,0.0,0/0,2490.0,250.0,99.0,6767102.0,0.0,0/0,2491.0,250.0,99.0,6617218.0,0.0,0/0,2490.0,250.0,99.0,6927285.0,0.0,0/0,2490.0,250.0,99.0,6867219.0,0.0,0/0,2490.0,249.0,99.0,6797203.0,0.0,0/0,2500,250.0,99.0,6797145.0,0.0,0/0,2500.0,250.0,99.0,6497049.0,0.0,0/0,2500.0,250.0,99.0,6837245.0,0.0,0/0,2500.0,250.0,99.0,6737092.0,0.0,0/0,2500.0,250.0,99.0,6947240.0,0.0,0/0,2500.0,250.0,99.0,6737107.0,0.0,0/0,2500,250.0,99.0,6627193.0,0.0,0/0,2490.0,250.0,99.0,6507052.0,0.0,0/0,2500,250.0,99.0,6797138,0.0,0/0,2490.0,250.0,99.0,6496939.0,0.0,0/0,2500.0,250.0,99.0,6617102.0,0.0,0/0,2500.0,250.0,99.0,6316856.0,0.0,0/0,2490.0,249.0,99.0,6747196.0,0.0,0/0,2500.0,250.0,99.0,6736729.0,0.0,0/0,2500,250.0,99.0,6657245.0,0.0,0/0,2500.0,250.0,99.0,6737097.0,0.0,0/0,2491,250.0,99.0,6076921.0,0.0,0/0,2500.0,250.0,99.0,6827211.0,0.0,0/0,2500,250.0,99.0,6647057.0,0.0,0/0,2500.0,250.0,99.0,6737125.0,0.0,0/0,2491.0,250.0,99.0,6426868.0,0.0,0/1,103147.0,250.0,99.0,371802156.0,0.59,0/0,2500.0,250.0,99.0,6467009.0,0.0,0/0,2500.0,250.0,99.0,6917212.0,0.0,0/0,2500.0,250.0,99.0,6747138.0,0.0,0/0,2500.0,250.0,99.0,6747145.0,0.0,0/0,30.0,3.0,6.0,649.0,0.0
20:45354999-TTCTACC/T,20,45354999,rs377475615,TTCTACC,T,5804.3,PASS,GT:AD:DP:GQ:PL,1,0.01,96,-4.625,1,11676,0,0.0,3.0103,0.0,0.0,-0.0117,1,0.01,0,59.98,0.212,11.61,0,0,5.979,3.978,0,variant2,0/0,2490.0,249.0,99.0,75023919.0,0.0,0/0,2490.0,249.0,99.0,75023918.0,0.0,0/0,2470.0,248.0,99.0,74423833.0,0.0,0/0,2460.0,246.0,99.0,74124099.0,0.0,0/0,2500.0,250.0,99.0,75324188.0,0.0,0/0,2500.0,250.0,99.0,75324773.0,0.0,0/0,2480.0,248.0,99.0,74723718.0,0.0,0/1,16981.0,250.0,99.0,5853015188.0,0.32,0/0,2480.0,248.0,99.0,74723896.0,0.0,0/0,2500.0,250.0,99.0,75324364.0,0.0,0/0,2480.0,248.0,99.0,74724515.0,0.0,0/0,2480,248.0,99.0,74724312.0,0.0,0/0,2500.0,250.0,99.0,75324147.0,0.0,0/0,2470.0,247.0,99.0,74423765.0,0.0,0/0,2490,249.0,99.0,75023470.0,0.0,0/0,2470.0,247.0,99.0,74423214.0,0.0,0/0,2480.0,248.0,99.0,74723700.0,0.0,0/0,2470.0,247.0,99.0,74424693.0,0.0,0/0,2470.0,247.0,99.0,74424359.0,0.0,0/0,2480.0,248.0,99.0,74724784.0,0.0,0/0,2490.0,249.0,99.0,75024679.0,0.0,0/0,2500,250.0,99.0,75324340.0,0.0,0/0,2480.0,248.0,99.0,74723979.0,0.0,0/0,2490.0,249.0,99.0,75024639.0,0.0,0/0,2490.0,249.0,99.0,75024363.0,0.0,0/0,2500.0,250.0,99.0,75323597.0,0.0,0/0,2470.0,247.0,99.0,74424417.0,0.0,0/0,2460,246.0,99.0,74124364.0,0.0,0/0,2460.0,246.0,99.0,74124066.0,0.0,0/0,2480,248.0,99.0,74723770,0.0,0/0,2480.0,248.0,99.0,74724474.0,0.0,0/0,2500.0,250.0,99.0,75324014.0,0.0,0/0,2490.0,249.0,99.0,75024831.0,0.0,0/0,2480.0,248.0,99.0,74723651.0,0.0,0/0,2490.0,249.0,99.0,75024505.0,0.0,0/0,2480,248.0,99.0,74723893.0,0.0,0/0,2480.0,248.0,99.0,74724842.0,0.0,0/0,2480,248.0,99.0,74724003.0,0.0,0/0,2500.0,250.0,99.0,75324775.0,0.0,0/0,2490,249.0,99.0,75024055.0,0.0,0/0,2480.0,248.0,99.0,74724546.0,0.0,0/0,2440.0,244.0,99.0,73524049.0,0.0,0/0,2490.0,249.0,99.0,75023715.0,0.0,0/0,2490.0,249.0,99.0,75023945.0,0.0,0/0,2500.0,250.0,99.0,75324727.0,0.0,0/0,2490.0,249.0,99.0,75024383.0,0.0,0/0,2480.0,248.0,99.0,74724101.0,0.0,0/0,30.0,3.0,9.0,9222.0,0.0
20:45355007-C/T,20,45355007,rs534222573,C,T,1919.42,PASS,GT:AD:DP:GQ:PL,1,0.01,96,15.635,1,11682,0,0.0,3.0103,0.0,21.5185,-0.0101,1,0.01,0,59.99,0.015,7.71,0,0,5.849,2.935,0,variant,0/0,2471.0,248.0,99.0,6777312.0,0.0,0/0,2490.0,249.0,99.0,6076865.0,0.0,0/0,2480.0,248.0,99.0,6807378.0,0.0,0/0,2450.0,245.0,99.0,6807360.0,0.0,0/0,2490.0,249.0,99.0,6867442.0,0.0,0/0,2491.0,250.0,99.0,6127055.0,0.0,0/0,2490.0,249.0,99.0,6477115.0,0.0,0/1,17079.0,250.0,99.0,195904527.0,0.32,0/0,2480.0,248.0,99.0,6677169.0,0.0,0/0,2500.0,250.0,99.0,6837586.0,0.0,0/0,2470.0,247.0,99.0,6747225.0,0.0,0/0,2490,249.0,99.0,6747284.0,0.0,0/0,2480.0,249.0,99.0,6507101.0,0.0,0/0,2461.0,247.0,99.0,6487156.0,0.0,0/0,2480,248.0,99.0,6507248.0,0.0,0/0,2480.0,248.0,99.0,6717236.0,0.0,0/0,2462.0,249.0,99.0,6297116.0,0.01,0/0,2470.0,247.0,99.0,6837393.0,0.0,0/0,2480.0,248.0,99.0,7017627.0,0.0,0/0,2470.0,247.0,99.0,6837412.0,0.0,0/0,2480.0,248.0,99.0,6777238.0,0.0,0/0,2481,249.0,99.0,6467210.0,0.0,0/0,2480.0,248.0,99.0,6447028.0,0.0,0/0,2500.0,250.0,99.0,6807243.0,0.0,0/0,2490.0,249.0,99.0,6747211.0,0.0,0/0,2490.0,249.0,99.0,6957317.0,0.0,0/0,2470.0,247.0,99.0,6717237.0,0.0,0/0,2470,247.0,99.0,6627405.0,0.0,0/0,2470.0,247.0,99.0,6477160.0,0.0,0/0,2480,248.0,99.0,6747304,0.0,0/0,2470.0,247.0,99.0,6687226.0,0.0,0/0,2500.0,250.0,99.0,6687440.0,0.0,0/0,2500.0,250.0,99.0,6387072.0,0.0,0/0,2480.0,249.0,99.0,6807506.0,0.0,0/0,2500.0,250.0,99.0,6927346.0,0.0,0/0,2480,248.0,99.0,6717407.0,0.0,0/0,2490.0,249.0,99.0,6747226.0,0.0,0/0,2480,248.0,99.0,6387162.0,0.0,0/0,2500.0,250.0,99.0,6867595.0,0.0,0/0,2480,248.0,99.0,6747196.0,0.0,0/0,2482.0,250.0,99.0,6297154.0,0.01,0/0,2432.0,245.0,99.0,6207194.0,0.01,0/0,2500.0,250.0,99.0,6807231.0,0.0,0/0,2480.0,249.0,99.0,6467044.0,0.0,0/0,2500.0,250.0,99.0,6897286.0,0.0,0/0,2490.0,249.0,99.0,6777231.0,0.0,0/0,2470.0,249.0,99.0,6747233.0,0.0,0/0,20.0,3.0,3.0,324.0,0.0
20:45355437-T/C,20,45355437,.,T,C,1489.25,PASS,GT:AD:DP:GQ:PL,46,0.605,76,-5.194,0,104,0,0.0,5.1683,0.0,0.6292,-0.3412,36,0.474,0,60.0,0.0,34.63,0,0,-6.953,1.055,0,variant,0/1,11.0,2.0,26.0,26026.0,0.5,./.,,,,,,0/1,11.0,2.0,25.0,25026.0,0.5,0/0,30.0,3.0,9.0,9101.0,0.0,0/1,21.0,5.0,22.0,22059.0,0.2,1/1,1.0,2.0,3.0,3030.0,0.5,1/1,1.0,1.0,3.0,3130.0,1.0,0/1,12.0,3.0,23.0,53023.0,0.67,0/1,22.0,4.0,51.0,51055.0,0.5,1/1,1.0,1.0,3.0,3230.0,1.0,0/1,11.0,2.0,26.0,26028.0,0.5,1/1,01,2.0,3.0,3030.0,0.5,1/1,1.0,1.0,3.0,3230.0,1.0,1/1,2.0,2.0,6.0,6360.0,1.0,0/0,10,1.0,3.0,330.0,0.0,0/1,11.0,2.0,25.0,26025.0,0.5,0/1,21.0,4.0,22.0,22058.0,0.25,./.,,,,,,0/1,11.0,3.0,26.0,26026.0,0.33,0/0,10.0,2.0,3.0,335.0,0.0,0/1,22.0,4.0,23.0,54023.0,0.5,1/1,02,3.0,6.0,6160.0,0.67,0/0,10.0,1.0,3.0,331.0,0.0,1/1,2.0,2.0,6.0,6360.0,1.0,0/1,21.0,4.0,23.0,23054.0,0.25,./.,,,,,,1/1,1.0,2.0,3.0,3230.0,0.5,1/1,01,2.0,3.0,3330.0,0.5,1/1,3.0,3.0,9.0,9290.0,1.0,0/1,11,3.0,26.0,26027,0.33,1/1,1.0,1.0,3.0,3230.0,1.0,0/1,11.0,2.0,25.0,25028.0,0.5,0/1,22.0,4.0,51.0,51055.0,0.5,0/1,12.0,3.0,26.0,53026.0,0.67,0/0,10.0,1.0,3.0,331.0,0.0,./.,.,2.0,,,,0/1,55.0,10.0,99.0,1300137.0,0.5,./.,,,,,,./.,,,,,,./.,.,2.0,,,,0/1,11.0,2.0,26.0,26027.0,0.5,./.,,,,,,./.,,,,,,0/1,12.0,3.0,24.0,54024.0,0.67,1/1,3.0,3.0,9.0,9590.0,1.0,0/1,11.0,2.0,25.0,25027.0,0.5,0/1,11.0,2.0,26.0,26027.0,0.5,./.,,,,,


In [241]:

v = VCF(t, genotype_level=True, 
    info_level=True, UID=True)


test_vcf = FilterVCF(v.vcf)
test_vcf.get_genotype('DP')

#test_vcf.filter_genotype(minDP=10, minAB=0.1)#, minAB=0.1)
test_vcf.vcf#.loc[['1:1000-A/G']]

test_vcf.get_genotype('DP')

test_vcf.subset(['22MI1099'], remove_uncalled=True, exclude_ref=True)
test_vcf.filter_genotype(minDP=12, minGQ=51)
test_vcf.positions('1:2235000-2235600', include=False)
test_vcf.biallelic()

test_vcf.remove_indels()

['AC', 'LOLZ']




Unnamed: 0_level_0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,FORMAT,INFO,INFO,22MI1099,22MI1099,22MI1099,22MI1099,22MI1099,22MI1099
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,AC,LOLZ,GT,AD,DP,GQ,PL,AB
1:2235901-A/G,1,2235901,.,A,G,24.89,LowQual,GT:AD:DP:GQ:PL,0,0,1/1,200200,400.0,90.0,1000100,0.5


{'1:2235243-C/T', '1:2235501-A/GGT', '1:2239901-GA/G', '1:2239999-A/G,T'}