In [1]:
import ipyrad.analysis as ipa
import toytree
import toyplot

In [2]:
print('ipyrad', ipa.__version__)
print('toytree', toytree.__version__)
! treemix --version | grep 'TreeMix v. '

ipyrad 0.9.43
toytree 1.1.2
TreeMix v. 1.13


In [3]:
# the path to your HDF5 formatted snps file
data = "H_st_11rm.snps.hdf5"

In [4]:
imap = {
#"ref": ["reference"],
"Puru": ["H_pe_A24346_pu"],
"JiGu": [ "H_oc_T15847_jigu","H_oc_A311_jigu"],
"Mach": [ "H_oc_T355_jigu","H_ro_A547_ma", "H_ro_J213_ma", "H_ro_J305_ma", "H_ro_J508_ma", "H_ro_J774_ma", "H_ro_J775_ma", "H_ro_J796_ma", "H_ro_T1842_ma", "H_ro_T366_ma", "H_ro_T385_ma", "H_ro_T471_ma"],#"H_ro_A8296_ma",
"RoArSuTa": ["H_st_J364_roar", "H_st_J621_roar", "H_st_J664_roar", "H_st_J665_roar", "H_st_J711_roar", "H_st_J762_roar", "H_st_J765_roar", "H_st_J368_roar", "H_st_J370_roar", "H_st_J374_roar", "H_st_J408_roar","H_ro_A409_ma", "H_ro_A551_ma", "H_ro_A410_ma", "H_ro_A521_ma","H_st_77860_arsu", "H_st_78249_arsu", "H_st_J525_arsu", "H_st_J530_arsu", "H_st_J536_arsu", "H_st_J572_arsu", "H_st_80727_arsu", "H_st_80800_arsu", "H_st_A272_arsu", "H_st_A273_arsu","H_st_81143_arsu","H_st_85680_arsu", "H_st_85970_arsu","H_st_T10207_suta", "H_st_T11900_suta", "H_st_A9955_pa", "H_st_T12194_suta","H_st_A4899_suta","H_st_81279_suta",  "H_st_86405_suta","H_st_A14546_suta","H_st_T24564_suta", "H_st_T7114_suta"],
"Para": ["H_st_A16571_pa","H_st_A7597_pa", "H_st_T16744_pa", "H_st_T17858_pa","H_st_A11597_pa", "H_st_A15208_pa", "H_st_A15210_pa"]
}

# minimum % of samples that must be present in each SNP from each group
minmap = {i: 0.5 for i in imap}

In [5]:
tmx1 = ipa.treemix(
    data=data, 
    imap=imap,
    minmap=minmap, 
    seed=123456,
    root="Puru",
    m=2,
)

Samples: 60
Sites before filtering: 1702661
Filtered (indels): 0
Filtered (bi-allel): 44386
Filtered (mincov): 0
Filtered (minmap): 1500155
Filtered (combined): 1507589
Sites after filtering: 195072
Sites containing missing values: 192105 (98.48%)
Missing values in SNP matrix: 1605124 (13.71%)
subsampled 16194 unlinked SNPs


In [6]:
# print the command string that will be called and run it
print(tmx1.command)
tmx1.run()

/home/lmusher/array1/miniconda3/envs/treemix/bin/treemix -i /array1/lmusher/rio_roosevelt_outfiles/H_st_11rm_outfiles/analysis-treemix/test.treemix.in.gz -o /array1/lmusher/rio_roosevelt_outfiles/H_st_11rm_outfiles/analysis-treemix/test -m 2 -seed 123456 -root Puru


In [7]:
# draw the resulting tree
tmx1.draw_tree();

In [8]:
# draw the covariance matrix
tmx1.draw_cov();

#We are now randomly sampling 1snp per locus, over 100 iterations to see how likelihood changes
#We are also sampling datasets that vary randomly in the percent of missing data ranging from 50% complete to 95% complete
#We do this for m = 1:5

In [9]:
import random
boots = range(250)

In [10]:
tests0 = {}
samp0 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Puru",
        global_=True,
        m=0,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests0[i] = tmx.results.llik
    samp0[i] = minSamp

In [11]:
tests1 = {}
samp1 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Puru",
        global_=True,
        m=1,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests1[i] = tmx.results.llik
    samp1[i] = minSamp

In [12]:
tests2 = {}
samp2 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Puru",
        global_=True,
        m=2,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests2[i] = tmx.results.llik
    samp2[i] = minSamp

In [13]:
tests3 = {}
samp3 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Puru",
        global_=True,
        m=3,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests3[i] = tmx.results.llik
    samp3[i] = minSamp

In [14]:
import numpy

test0 = numpy.mean([tests0[i] for i in boots])
test1 = numpy.mean([tests1[i] for i in boots])
test2 = numpy.mean([tests2[i] for i in boots])
test3 = numpy.mean([tests3[i] for i in boots])

tests = [test0, test1, test2, test3]

In [15]:
# plot the likelihood for different values of m
toyplot.plot(
    range(len(tests)),
    [tests[i] for i in range(len(tests))],
    width=350, 
    height=275,
    stroke_width=4,
    xlabel="# admixture edges",
    ylabel="ln(likelihood)",
    color="steelblue"
);

In [16]:
# import pandas as pd 
import pandas as pd 

# list of strings 
test0 = [tests0[i] for i in boots]
test1 = [tests1[i] for i in boots]
test2 = [tests2[i] for i in boots]
test3 = [tests3[i] for i in boots]
samps0 = [samp0[i] for i in boots]
samps1 = [samp1[i] for i in boots]
samps2 = [samp2[i] for i in boots]
samps3 = [samp3[i] for i in boots]

# Calling DataFrame constructor on list 
df = pd.DataFrame(list(zip(test0, samps0, test1, samps1, test2, samps2, test3, samps3)), 
               columns =['m0', 's0', 'm1', 's1', 'm2', 's2', 'm3', 's3']) 

df.to_csv(r'./analysis-treemix/G_cy_nadmix_tests.csv')

In [17]:
df.head()

Unnamed: 0,m0,s0,m1,s1,m2,s2,m3,s3
0,99.276,0.53,106.188,0.59,107.196,0.66,107.211,0.57
1,101.432,0.65,107.091,0.6,106.996,0.59,106.935,0.66
2,100.569,0.68,106.826,0.63,93.677,0.91,103.646,0.84
3,105.852,0.63,104.142,0.75,107.359,0.58,104.446,0.78
4,98.806,0.6,93.5,0.9,109.388,0.5,103.949,0.79


In [18]:
# a gridded canvas to plot trees on 
canvas = toyplot.Canvas(width=1200, height=200)
counts = 0
# iterate over multiple set of SNPs
for i in [0.5, 0.6, 0.7, 0.8, 0.9, 0.99]:
    
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={j: i for j in imap},
        root="Puru",
        global_=True,
        m=1,
        quiet=True
    )
    
    # run model fit
    tmx.run()

    # select a plot grid axis and add tree to axes
    axes = canvas.cartesian(grid=(1, 6, counts))
    tmx.draw_tree(axes)
    counts = counts+1