In [1]:
%run 08222019_functions.ipynb

In [2]:
chromosomes = ['3L', '3R']

In [3]:
calldata_chri= h5py.File('data/chri_ac_phase2_map.h5', mode='r')
calldata_chri.keys()

<KeysViewHDF5 ['2L', '2R', '3L', '3R', 'X']>

In [4]:
def abba_baba_test(chrom, a, b, c, out_ac, blen=10000, plot=False, ax=None):
    for chrom in chromosomes:
        
        ###### loading phase2 metadata ######
        metadata = pd.read_csv("samples.meta.txt", sep="\t")
        pop_select = metadata.population.isin({a, b, c}).values
        pop_subset = metadata[pop_select]
        
        ###### loading phase2 genome and subset ######
        pos_phase1 = allel.SortedIndex(calldata_phase1[chrom]["variants/POS"][:])
        pos_phase2 = allel.SortedIndex(calldata_phase2[chrom]["variants/POS"][:])
        loc1, loc2 = pos_phase2.locate_intersection(pos_phase1)
        pos_p2_sel = pos_phase2.compress(loc1)
        genotypes_phase2_call = calldata_phase2[chrom]["calldata/GT"]
        genotypes_phase2 = allel.GenotypeChunkedArray(genotypes_phase2_call)
        geno_p2_subset = genotypes_phase2.subset(sel0=loc1)

            
        ###### perform allele count to my subpopulations ######
        grp = pop_subset.groupby("population")
        grp_indices = grp.indices
        ac_subpops = geno_p2_subset.count_alleles_subpops(grp_indices)
        

        # run D test
        d, d_se, d_z, d_vb, d_vj = allel.average_patterson_d(ac_subpops[a], ac_subpops[b], ac_subpops[c], out_ac[chrom], blen=blen)
            
    return d, d_se, d_z, d_vb, d_vj


def abba_baba_tbl(As, Bs, Cs, Ds, chroms=chromosomes, blen=10000):
    
    # normalise inputs
    if not isinstance(chroms, (list, tuple)):
        chroms = [chroms]
    if not isinstance(As, (list, tuple)):
        As = [As]
    if not isinstance(Bs, (list, tuple)):
        Bs = [Bs]
    if not isinstance(Cs, (list, tuple)):
        Cs = [Cs]
    if not isinstance(Ds, (list, tuple)):
        Ds = [Ds]
        
    # setup output table
    tbl = [['chromosome', 'test', 'D', 'SE', 'Z']]
    for chrom in chroms:
        for A in As:
            for B in Bs:
                if A != B:
                    for C in Cs:
                        for D in Ds:
                            if C != D:
                                d, d_se, d_z, _, _ = abba_baba_test(chrom, A, B, C, D, blen=blen)
                                test = 'D(%s, %s; %s, %s)' % (A, B, C, D)
                                row = [chrom, test, d, d_se, d_z]
                                tbl += [row]
    
    # display results
    (etl
     .wrap(tbl)
     .interpolate('D', '%.3f')
     .interpolate('SE', '%.4f')
     .interpolate('Z', '%.1f')
     .displayall(index_header=False, tr_style=lambda row: 'background-color: %s' % ('#afa' if float(row.Z) > 5 else '#aaf' if float(row.Z) < -5 else 'white')))
    
    #append
    import csv
    with open('file.csv', 'a') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerow(tbl)
        csvFile.close()

Performing ABBA-BABA Test with AOcol population for each An.gambiae population:

In [5]:
abba_baba_tbl('GHgam', 'GAgam', 'AOcol', calldata_chri, chroms=['3L, 3R'])

chromosome,test,D,SE,Z
"3L, 3R","D(GHgam, GAgam; AOcol, )",-0.014,0.0051,-2.8


In [6]:
abba_baba_tbl('BFgam', 'GAgam', 'AOcol', calldata_chri, chroms=['3L, 3R'])

  vb = num_bsum / den_bsum


chromosome,test,D,SE,Z
"3L, 3R","D(BFgam, GAgam; AOcol, )",-0.03,0.0051,-5.8


In [5]:
abba_baba_tbl('CMgam', 'GAgam', 'AOcol', calldata_chri, chroms=['3L, 3R'])

  vb = num_bsum / den_bsum


chromosome,test,D,SE,Z
"3L, 3R","D(CMgam, GAgam; AOcol, )",-0.027,0.0048,-5.7


AttributeError: 'NoneType' object has no attribute 'tocsv'

In [6]:
abba_baba_tbl('FRgam', 'GAgam', 'AOcol', calldata_chri, chroms=['3L, 3R'])

  vb = num_bsum / den_bsum


chromosome,test,D,SE,Z
"3L, 3R","D(FRgam, GAgam; AOcol, )",-0.019,0.0083,-2.3


In [7]:
abba_baba_tbl('GNgam', 'GAgam', 'AOcol', calldata_chri, chroms=['3L, 3R'])

chromosome,test,D,SE,Z
"3L, 3R","D(GNgam, GAgam; AOcol, )",-0.022,0.005,-4.5


In [8]:
abba_baba_tbl('GQgam', 'GAgam', 'AOcol', calldata_chri, chroms=['3L, 3R'])

chromosome,test,D,SE,Z
"3L, 3R","D(GQgam, GAgam; AOcol, )",-0.018,0.0065,-2.8


----------------------------------------

Performing ABBA-BABA Test with BFcol population for each <i>An.gambiae</i> population:

In [9]:
abba_baba_tbl('UGgam', 'GAgam', 'BFcol', calldata_chri, chroms=['3L, 3R'])

chromosome,test,D,SE,Z
"3L, 3R","D(UGgam, GAgam; BFcol, )",-0.012,0.0046,-2.5


In [10]:
abba_baba_tbl('GHgam', 'GAgam', 'BFcol', calldata_chri, chroms=['3L, 3R'])

chromosome,test,D,SE,Z
"3L, 3R","D(GHgam, GAgam; BFcol, )",0.003,0.0053,0.5


In [5]:
abba_baba_tbl('CMgam', 'GAgam', 'BFcol', calldata_chri, chroms=['3L, 3R'])

  vb = num_bsum / den_bsum


chromosome,test,D,SE,Z
"3L, 3R","D(CMgam, GAgam; BFcol, )",-0.015,0.0047,-3.2


In [6]:
abba_baba_tbl('BFgam', 'GAgam', 'BFcol', calldata_chri, chroms=['3L, 3R'])

chromosome,test,D,SE,Z
"3L, 3R","D(BFgam, GAgam; BFcol, )",0.009,0.0004,20.9


In [5]:
abba_baba_tbl('FRgam', 'GAgam', 'BFcol', calldata_chri, chroms=['3L, 3R'])

chromosome,test,D,SE,Z
"3L, 3R","D(FRgam, GAgam; BFcol, )",0.032,0.0014,22.1


In [None]:
abba_baba_tbl('GNgam', 'GAgam', 'BFcol', calldata_chri, chroms=['3L, 3R'])

In [5]:
abba_baba_tbl('GQgam', 'GAgam', 'BFcol', calldata_chri, chroms=['3L, 3R'])

chromosome,test,D,SE,Z
"3L, 3R","D(GQgam, GAgam; BFcol, )",0.023,0.0012,18.7


---------------------------------------------------------------------------

Performing ABBA-BABA Test with CIcol population for each <i>An.gambiae</i> population:

In [None]:
abba_baba_tbl('UGgam', 'GAgam', 'CIcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('GHgam', 'GAgam', 'CIcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('CMgam', 'GAgam', 'CIcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('BFgam', 'GAgam', 'CIcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('FRgam', 'GAgam', 'CIcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('GNgam', 'GAgam', 'CIcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('GQgam', 'GAgam', 'CIcol', calldata_chri, chroms=['3L, 3R'])

--------------------------------------------------------------

Performing ABBA-BABA Test with GHcol population for each <i>An.gambiae</i> population:

In [None]:
abba_baba_tbl('UGgam', 'GAgam', 'GHcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('GHgam', 'GAgam', 'GHcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('CMgam', 'GAgam', 'GHcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('BFgam', 'GAgam', 'GHcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('FRgam', 'GAgam', 'GHcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('GNgam', 'GAgam', 'GHcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('GQgam', 'GAgam', 'GHcol', calldata_chri, chroms=['3L, 3R'])

--------------------------------------------------------

Performing ABBA-BABA Test with GNcol population for each <i>An.gambiae</i> population:

In [None]:
abba_baba_tbl('UGgam', 'GAgam', 'GNcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('GHgam', 'GAgam', 'GNcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('CMgam', 'GAgam', 'GNcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('BFgam', 'GAgam', 'GNcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('FRgam', 'GAgam', 'GNcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('GNgam', 'GAgam', 'GNcol', calldata_chri, chroms=['3L, 3R'])

In [None]:
abba_baba_tbl('GQgam', 'GAgam', 'GNcol', calldata_chri, chroms=['3L, 3R'])

--------------------------------------------------------

In [6]:
agam = 'GAgam','UGgam'

In [7]:
agam[0]

'GAgam'

In [8]:
acol = 'AOcol','BFcol','CIcol','GHcol','GNcol'

In [None]:
abba_baba_tbl(agam[0], agam[1], acol, calldata_chri, chroms=['3L'])

  vb = num_bsum / den_bsum
