# Treemix analysis 

In [1]:
## imports
import ipyrad.analysis as ipa
import toytree
import toyplot
import ipyrad as ip
import ipyparallel as ipp
import numpy as np

In [2]:
##print versions
print('ipyrad', ipa.__version__)
print('toytree', toytree.__version__)
! treemix --version | grep 'TreeMix v. '

ipyrad 0.9.84
toytree 2.0.5
TreeMix v. 1.13


In [4]:
# After the cluster is running attach to it with ipyparallel
ipyclient = ipp.Client()
print(ip.cluster_info(ipyclient))


Parallel connection | Cristaria: 8 cores
None


In [5]:
## load the hdf5 data for the STRUCTURE analysis
data = "/home/marianna/Documents/Phacelia/Phac_Assembly/min12_clust90.snps.hdf5"

### group individuals into populations according to structure analysis


In [6]:
# group individuals into populations, according to my first structure analysis.
imap ={
    "sin": ["W6368","W6376"],
    "pinn_peru": ["W5599","W6027","W6028","W6037","W6078","W6024","W5637", "W5636"],
    "nana": ["W6373"],
    "argent": ["W5610","W6374", "W6080","W6369"],
    "set_arg": ["W6375", "W6031","W6370"],
    "set_ch": ["W5145", "W5612"],
    "set_pe": ["W6001", "W6079","W6029"],
   
}

## require that 50% of samples have data in each group
minmap = {i: 0.5 for i in imap} 

### Finding the best value for m

initiate a treemix analysis object with some param arguments

In [11]:
tmx = ipa.treemix(
    data=data,
    imap=imap,
    minmap=minmap,
    seed=123456,
    root="sin",
    m=3,
)

Samples: 23
Sites before filtering: 19359
Filtered (indels): 1020
Filtered (bi-allel): 210
Filtered (mincov): 0
Filtered (minmap): 5685
Filtered (subsample invariant): 6133
Filtered (minor allele frequency): 0
Filtered (combined): 10771
Sites after filtering: 8587
Sites containing missing values: 5548 (64.61%)
Missing values in SNP matrix: 13587 (6.88%)
SNPs (total): 8587
SNPs (unlinked): 4200
subsampled 4200 unlinked SNPs


In [12]:
# print the command string that will be called and run it
print(tmx.command)
tmx.run()

/home/marianna/anaconda3/envs/treemix/bin/treemix -i /home/marianna/Documents/Phacelia/analysis-treemix/test.treemix.in.gz -o /home/marianna/Documents/Phacelia/analysis-treemix/test -m 3 -seed 123456 -root sin


In [19]:
# draw the resulting tree
tmx.draw_tree();



In [23]:
# a gridded canvas to plot the tree
canvas = toyplot.Canvas(width=600, height=700)
## dissect canvas into multiple cartesian areas (x1, x2, y1, y2)
ax0 = canvas.cartesian(bounds=('2%',  '97%', '5%',  '97.5%'))

# select a plot grid axis and add tree to axes
    
tmx.draw_tree(axes=ax0);

In [24]:
import toyplot.pdf
toyplot.pdf.render(canvas, "/home/marianna/Documents/Phacelia/Figures/Treemix_m3_noiteration_20221130.pdf");

In [25]:
# draw the covariance matrix
tmx.draw_cov();

In [51]:
import toyplot.pdf
toyplot.pdf.render(canvas, "/home/marianna/Documents/Phacelia/Figures/Treemix_covariancematrix_20221130.pdf");

In [8]:
tests = {}
nadmix = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# iterate over n admixture edges and store results in a dictionary
for adm in nadmix:
    tmx.params.m = adm
    tmx.run()
    tests[adm] = tmx.results.llik

In [9]:
# plot the likelihood for different values of m
toyplot.plot(
    nadmix,
    [tests[i] for i in nadmix],
    width=350,
    height=275,
    stroke_width=3,
    xlabel="n admixture edges",
    ylabel="ln(likelihood)",
);


### Iterate over different subsamples of SNPs 

In [15]:
# a gridded canvas to plot trees on
canvas = toyplot.Canvas(width=600, height=700)

# iterate over multiple set of SNPs
for i in range(9):

    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data,
        imap=imap,
        minmap=minmap,
        root="sin",
        global_=True,
        m=2,
        quiet=True
    )

    # run model fit
    tmx.run()

    # select a plot grid axis and add tree to axes
    axes = canvas.cartesian(grid=(3, 3, i))
    tmx.draw_tree(axes)



In [16]:
import toyplot.pdf
toyplot.pdf.render(canvas, "/home/marianna/Documents/Phacelia/Figures/Treemix_m2_20221121.pdf");

In [17]:
# a gridded canvas to plot trees on
canvas = toyplot.Canvas(width=600, height=700)

# iterate over multiple set of SNPs
for i in range(9):

    # init a treemix analysis object with a random (no) seed
    tmx = ipa.treemix(
        data=data,
        imap=imap,
        minmap=minmap,
        root="sin",
        global_=True,
        m=3,
        quiet=True
    )

    # run model fit
    tmx.run()

    # create a grid axis and add tree to axes
    axes = canvas.cartesian(grid=(3, 3, i))
    tmx.draw_tree(axes)

In [18]:
import toyplot.pdf
toyplot.pdf.render(canvas, "/home/marianna/Documents/Phacelia/Figures/Treemix_m3_20221121.pdf");