In [1]:
import ipyrad.analysis as ipa
import toytree
import toyplot

In [2]:
print('ipyrad', ipa.__version__)
print('toytree', toytree.__version__)
! treemix --version | grep 'TreeMix v. '

ipyrad 0.9.43
toytree 1.1.2
TreeMix v. 1.13


In [3]:
# the path to your HDF5 formatted snps file
data = "G_cy_5rm_v9.snps.hdf5"

In [4]:
imap = {
#"ref": ["reference"],
"Inam": ["G_cy_T12385_In","G_cy_T310_In","G_cy_T23196_In","G_cy_T7636_In","G_cy_T3343_jigu","G_cy_T3384_jigu","G_cy_T3385_jigu"],
"Puru": ["G_cy_82508_pu","G_cy_T12275_pu","G_cy_T12279_pu","G_cy_T12392_pu","G_cy_T13184_pu","G_cy_T26228_pu","G_cy_T26229_pu","G_cy_T26252_pu"],
"MaRo": ["G_cy_J296_ma","G_cy_J477_ma","G_cy_J773_ma","G_cy_T13251_ma","G_cy_T363_ma","G_cy_T364_ma","G_cy_J691_roar","G_cy_J694_roar"],
"ArSuTa": ["G_cy_80582_arsu","G_cy_85678_arsu","G_cy_80701_arsu","G_cy_80801_arsu","G_cy_80826_arsu","G_cy_81108_arsu","G_cy_81118_arsu","G_cy_85499_arsu","G_cy_85356_suta","G_cy_86297_suta","G_cy_86321_suta","G_cy_86458_suta","G_cy_86478_suta","G_cy_T14558_suta","G_cy_T16693_suta","G_cy_T18563_suta","G_cy_T18620_suta"],
"Para": ["G_cy_T10897_pa","G_cy_T11062_pa","G_cy_T16771_pa","G_cy_T1705_pa","G_cy_T18744_pa","G_cy_T19429_pa","G_cy_T19520_pa","G_cy_T19765_pa","G_cy_T2497_pa","G_cy_T6579_pa","G_cy_T9133_pa"]
}

# minimum % of samples that must be present in each SNP from each group
minmap = {i: 0.5 for i in imap}

In [5]:
tmx1 = ipa.treemix(
    data=data, 
    imap=imap,
    minmap=minmap, 
    seed=123456,
    root="Inam,Puru",
    m=2,
)

Samples: 51
Sites before filtering: 1130537
Filtered (indels): 0
Filtered (bi-allel): 21079
Filtered (mincov): 0
Filtered (minmap): 786521
Filtered (combined): 794722
Sites after filtering: 335815
Sites containing missing values: 328643 (97.86%)
Missing values in SNP matrix: 2548759 (14.88%)
subsampled 32472 unlinked SNPs


In [6]:
# print the command string that will be called and run it
print(tmx1.command)
tmx1.run()
tmx1.results.llik

/home/lmusher/array1/miniconda3/envs/treemix/bin/treemix -i /array1/lmusher/rio_roosevelt_outfiles/G_cy_5rm_v9_outfiles/analysis-treemix/test.treemix.in.gz -o /array1/lmusher/rio_roosevelt_outfiles/G_cy_5rm_v9_outfiles/analysis-treemix/test -m 2 -seed 123456 -root Inam,Puru


115.964

In [7]:
# draw the resulting tree
tmx1.draw_tree();

In [14]:
# draw the covariance matrix
tmx1.draw_cov();

In [9]:
import random
boots = range(250)

In [10]:
tests0 = {}
samp0 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Inam,Puru",
        global_=True,
        m=0,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests0[i] = tmx.results.llik
    samp0[i] = minSamp

In [11]:
tests1 = {}
samp1 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Inam,Puru",
        global_=True,
        m=1,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests1[i] = tmx.results.llik
    samp1[i] = minSamp

In [12]:
tests2 = {}
samp2 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Inam,Puru",
        global_=True,
        m=2,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests2[i] = tmx.results.llik
    samp2[i] = minSamp

In [13]:
tests3 = {}
samp3 = {}
for i in boots:
    minSamp = random.randrange(50,95)/100
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={i: minSamp for i in imap},
        root="Inam,Puru",
        global_=True,
        m=3,
        quiet=True
    )
    #print(minSamp),
    # run model fit
    tmx.run()
    tests3[i] = tmx.results.llik
    samp3[i] = minSamp

In [14]:
import numpy

test0 = numpy.mean([tests0[i] for i in boots])
test1 = numpy.mean([tests1[i] for i in boots])
test2 = numpy.mean([tests2[i] for i in boots])
test3 = numpy.mean([tests3[i] for i in boots])

tests = [test0, test1, test2, test3]

In [15]:
# plot the likelihood for different values of m
toyplot.plot(
    range(len(tests)),
    [tests[i] for i in range(len(tests))],
    width=350, 
    height=275,
    stroke_width=4,
    xlabel="# admixture edges",
    ylabel="ln(likelihood)",
    color="steelblue"
);

In [16]:
# import pandas as pd 
import pandas as pd 

# list of strings 
test0 = [tests0[i] for i in boots]
test1 = [tests1[i] for i in boots]
test2 = [tests2[i] for i in boots]
test3 = [tests3[i] for i in boots]
samps0 = [samp0[i] for i in boots]
samps1 = [samp1[i] for i in boots]
samps2 = [samp2[i] for i in boots]
samps3 = [samp3[i] for i in boots]

# Calling DataFrame constructor on list 
df = pd.DataFrame(list(zip(test0, samps0, test1, samps1, test2, samps2, test3, samps3)), 
               columns =['m0', 's0', 'm1', 's1', 'm2', 's2', 'm3', 's3']) 

df.to_csv(r'./analysis-treemix/G_cy_nadmix_tests.csv')

In [17]:
df.head()

Unnamed: 0,m0,s0,m1,s1,m2,s2,m3,s3
0,101.265,0.77,107.099,0.79,108.223,0.77,112.872,0.63
1,95.718,0.78,109.221,0.68,114.563,0.6,97.189,0.89
2,93.895,0.91,93.716,0.92,114.871,0.57,112.525,0.64
3,100.264,0.8,110.76,0.75,111.418,0.74,112.325,0.71
4,102.921,0.79,112.345,0.64,113.038,0.68,111.542,0.74


In [8]:
# a gridded canvas to plot trees on 
canvas = toyplot.Canvas(width=1200, height=200)
counts = 0

# iterate over multiple set of SNPs
for i in [0.5, 0.6, 0.7, 0.8, 0.9]:
    
    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data, 
        imap=imap,
        minmap={j: i for j in imap},
        root="Inam,Puru",
        global_=True,
        m=1,
        quiet=True
    )
    
    # run model fit
    tmx.run()

    # select a plot grid axis and add tree to axes
    axes = canvas.cartesian(grid=(1, 6, counts))
    tmx.draw_tree(axes)
    counts = counts+1