In [1]:
import ipyrad.analysis as ipa
import toytree
import toyplot

In [2]:
print('ipyrad', ipa.__version__)
print('toytree', toytree.__version__)
! treemix --version | grep 'TreeMix v. '

ipyrad 0.9.43
toytree 1.1.2
TreeMix v. 1.13


In [3]:
# the path to your HDF5 formatted snps file
data = "M_ru_3rm_v9.snps.hdf5"

In [20]:
imap = {
#"ref": ["reference"],
"Inam": ['M_ru_A7875_In','M_ru_T14456_In', 'M_ru_T23245_In', 'M_ru_T23246_In', 'M_ru_T23416_In', 'M_ru_T23478_In'],
"Puru": ['M_ru_A10311_pu','M_ru_A10329_pu', 'M_ru_A1380_pu', 'M_ru_A2741_pu', 'M_ru_A436_pu', 'M_ru_A440_pu'],
"JiMa": ['M_ru_A207_jigu', 'M_ru_T3228_jigu', 'M_ru_T7634_jigu','M_ru_T13253_ma','M_ru_A474_ma', 'M_ru_T3164_ma', 'M_ru_J265_ma'],
"MaArTa": ['M_ru_T368_ma', 'M_ru_T381_ma', 'M_ru_T476_ma', 'M_ru_T494_ma','M_ru_J640_roar','M_ru_J676_roar','M_ru_80819_arsu', 'M_ru_81347_arsu', 'M_ru_85426_arsu','M_ru_85919_suta','M_ru_77750_suta', 'M_ru_78182_suta', 'M_ru_A11834_suta', 'M_ru_A15176_suta', 'M_ru_A5487_suta', 'M_ru_T10184_suta', 'M_ru_T11780_suta', 'M_ru_T14532_suta', 'M_ru_T14622_suta', 'M_ru_T753_suta', 'M_ru_A9903_pa'],
"Para": ['M_ru_A16195_pa','M_ru_A9235_pa', 'M_ru_T11079_pa', 'M_ru_T11238_pa', 'M_ru_T12541_pa', 'M_ru_T1649_pa', 'M_ru_T16553_pa', 'M_ru_T19782_pa', 'M_ru_T6500_pa', 'M_ru_T6577_pa']
}

# minimum % of samples that must be present in each SNP from each group
minmap = {i: 0.5 for i in imap}

In [30]:
tmx1 = ipa.treemix(
    data=data, 
    imap=imap,
    minmap=minmap, 
    seed=123456,
    root="Inam,Puru,JiMa",
    global_=True,
    m=2,
)

Samples: 50
Sites before filtering: 819934
Filtered (indels): 0
Filtered (bi-allel): 14244
Filtered (mincov): 0
Filtered (minmap): 595019
Filtered (combined): 600784
Sites after filtering: 219150
Sites containing missing values: 186562 (85.13%)
Missing values in SNP matrix: 1302795 (11.89%)
subsampled 21951 unlinked SNPs


In [31]:
# print the command string that will be called and run it
print(tmx1.command)
tmx1.run()
tmx1.results.llik

/home/lmusher/array1/miniconda3/envs/treemix/bin/treemix -i /array1/lmusher/rio_roosevelt_outfiles/M_ru_3rm_v9_outfiles/analysis-treemix/test.treemix.in.gz -o /array1/lmusher/rio_roosevelt_outfiles/M_ru_3rm_v9_outfiles/analysis-treemix/test -m 2 -seed 123456 -root Inam,Puru,JiMa -global


113.3

In [32]:
# draw the resulting tree
tmx1.draw_tree();

In [19]:
# draw the covariance matrix
tmx1.draw_cov();

#We are now randomly sampling 1snp per locus, over 100 iterations to see how likelihood changes
#We are also sampling datasets that vary randomly in the percent of missing data ranging from 50% complete to 95% complete
#We do this for m = 1:5

In [9]:
import random
boots = range(250)

In [10]:
tests0 = {}
samp0 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Para,MaArTa",
        global_=True,
        m=0,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests0[i] = tmx.results.llik
    samp0[i] = minSamp

In [11]:
tests1 = {}
samp1 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Para,MaArTa",
        global_=True,
        m=1,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests1[i] = tmx.results.llik
    samp1[i] = minSamp

In [12]:
tests2 = {}
samp2 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Para,MaArTa",
        global_=True,
        m=2,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests2[i] = tmx.results.llik
    samp2[i] = minSamp

In [13]:
tests3 = {}
samp3 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Para,MaArTa",
        global_=True,
        m=3,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests3[i] = tmx.results.llik
    samp3[i] = minSamp

In [14]:
import numpy

test0 = numpy.mean([tests0[i] for i in boots])
test1 = numpy.mean([tests1[i] for i in boots])
test2 = numpy.mean([tests2[i] for i in boots])
test3 = numpy.mean([tests3[i] for i in boots])

tests = [test0, test1, test2, test3]

In [15]:
# plot the likelihood for different values of m
toyplot.plot(
    range(len(tests)),
    [tests[i] for i in range(len(tests))],
    width=350, 
    height=275,
    stroke_width=4,
    xlabel="# admixture edges",
    ylabel="ln(likelihood)",
    color="steelblue"
);

In [16]:
# import pandas as pd 
import pandas as pd 

# list of strings 
test0 = [tests0[i] for i in boots]
test1 = [tests1[i] for i in boots]
test2 = [tests2[i] for i in boots]
test3 = [tests3[i] for i in boots]
samps0 = [samp0[i] for i in boots]
samps1 = [samp1[i] for i in boots]
samps2 = [samp2[i] for i in boots]
samps3 = [samp3[i] for i in boots]

# Calling DataFrame constructor on list 
df = pd.DataFrame(list(zip(test0, samps0, test1, samps1, test2, samps2, test3, samps3)), 
               columns =['m0', 's0', 'm1', 's1', 'm2', 's2', 'm3', 's3']) 

df.to_csv(r'./analysis-treemix/M_ru_nadmix_tests.csv')

In [17]:
df.head()

Unnamed: 0,m0,s0,m1,s1,m2,s2,m3,s3
0,105.397,0.85,112.279,0.52,110.928,0.67,106.26,0.86
1,108.065,0.64,112.885,0.51,104.327,0.91,109.514,0.78
2,107.974,0.85,108.558,0.85,110.099,0.83,112.15,0.58
3,107.15,0.77,110.931,0.67,110.284,0.77,112.247,0.62
4,108.812,0.75,110.193,0.73,110.071,0.74,111.913,0.59


In [7]:
# a gridded canvas to plot trees on
canvas = toyplot.Canvas(width=1200, height=200)
counts = 0
# iterate over multiple set of SNPs
for i in [0.5, 0.6, 0.7, 0.8, 0.9, 0.99]:
    
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={j: i for j in imap},
        root="Para,MaArTa",
        global_=True,
        m=1,
        quiet=True
    )
    
    # run model fit
    tmx.run()

    # select a plot grid axis and add tree to axes
    axes = canvas.cartesian(grid=(1, 6, counts))
    tmx.draw_tree(axes)
    counts = counts+1