
# STRUCTURE

In [1]:
import ipyrad.analysis as ipa
import toyplot
import ipyparallel as ipp

In [2]:
## look for running ipcluster instance, and create load-balancer
ipyclient = ipp.Client()
print("{} engines found".format(len(ipyclient)))

24 engines found


In [3]:
# the path to your HDF5 formatted snps file
data = "./T_ae_8rm_v9.snps.hdf5"

In [4]:
imap = {
#"ref": ["reference"],
"Inam": ["T_ae_T14205_In", "T_ae_A7968_In", "T_ae_A8087_In", "T_ae_28095_In", "T_ae_75520_In"],
"Puru": ["T_ae_T12313_pu", "T_ae_T13219_pu", "T_ae_A3559_pu", "T_ae_A439_pu", "T_ae_82491_pu", "T_ae_82606_pu", "T_ae_82608_pu", "T_ae_A21562_pu", "T_ae_A2720_pu", "T_ae_A2833_pu"], 
"JiGu": ["T_ae_T3376_jigu"],
#"JiGu": ["T_ae_T3376_jigu","T_ae_T3237_jigu", "T_ae_T3260_jigu", "T_ae_A317_jigu", "T_ae_A324_jigu"],
#"Mach": ["T_ae_T13278_ma", "T_ae_A458_ma", "T_ae_T2166_ma", "T_ae_T2207_ma", "T_ae_T4355_ma", "T_ae_A509_ma", "T_ae_J249_ma", "T_ae_J252_ma", "T_ae_J261_ma", "T_ae_J298_ma"],
#"Roar": ["T_ae_J319_roar", "T_ae_J419_roar", "T_ae_J678_roar", "T_ae_J683_roar"],
#"ArSu": ["T_ae_80508_arsu", "T_ae_80716_arsu", "T_ae_81278_arsu",   "T_ae_A522_ma",  "T_ae_86147_arsu", "T_ae_86229_arsu",  "T_ae_J598_arsu", "T_ae_J616_arsu","T_ae_86569_arsu","T_ae_81338_arsu"],#"T_ae_81290_arsu",
#"SuTa": ["T_ae_T14601_suta", "T_ae_T16625_suta", "T_ae_T16704_suta", "T_ae_85274_suta", "T_ae_A15069_suta", "T_ae_T742_suta", "T_ae_T10229_suta", "T_ae_T19436_suta", "T_ae_A16041_suta","T_ae_T7941_suta","T_ae_T24591_suta"],
#"Para": ["T_ae_A15279_pa", "T_ae_T10679_pa", "T_ae_T13575_pa", "T_ae_T17307_pa", "T_ae_T19420_pa", "T_ae_T8268_pa"]
}

# minimum % of samples that must be present in each SNP from each group
minmap = {i: 0.5 for i in imap}

In [5]:
# init analysis object with input data and (optional) parameter options
struct = ipa.structure(
    name="T_ae_str_west",
    data=data,
    imap=imap,
    minmap=minmap,
    mincov=0.95,
)

Samples: 16
Sites before filtering: 1641207
Filtered (indels): 0
Filtered (bi-allel): 14881
Filtered (mincov): 1622509
Filtered (minmap): 1622509
Filtered (combined): 1622761
Sites after filtering: 18446
Sites containing missing values: 0 (0.00%)
Missing values in SNP matrix: 0 (0.00%)


In [6]:
struct.mainparams.burnin = 50000
struct.mainparams.numreps = 200000
struct.write_structure_files(abs)

('/array1/lmusher/rio_roosevelt_outfiles/T_ae_8rm_v9_outfiles/analysis-structure/tmp-T_ae_str_west-<built-in function abs>-1.mainparams.txt',
 '/array1/lmusher/rio_roosevelt_outfiles/T_ae_8rm_v9_outfiles/analysis-structure/tmp-T_ae_str_west-<built-in function abs>-1.extraparams.txt',
 '/array1/lmusher/rio_roosevelt_outfiles/T_ae_8rm_v9_outfiles/analysis-structure/tmp-T_ae_str_west-<built-in function abs>-1.strfile.txt')

In [7]:
struct.run(nreps=10, kpop=[2, 3, 4, 5, 6, 7, 8], ipyclient=ipyclient, force=True)

Parallel connection | amnh-gen-001.internal.amnh.org: 24 cores
[####################] 100% 2:33:21 | running 70 structure jobs 


In [8]:
struct = ipa.structure(
    data=data, 
    name="T_ae_str_west", 
    workdir="analysis-structure",
    imap=imap,
    load_only=True,
)

70 previous results loaded for run [T_ae_str_west]


In [9]:
etable = struct.get_evanno_table([2, 3, 4, 5, 6, 7, 8])
etable

Unnamed: 0,Nreps,deltaK,estLnProbMean,estLnProbStdev,lnPK,lnPPK
2,10,0.0,-4713.87,165.752,0.0,0.0
3,10,4.535,-4743.85,138.641,-29.98,628.77
4,10,1.118,-5402.6,785.396,-658.75,877.7
5,10,0.119,-5183.65,470.216,218.95,56.1
6,10,2.706,-5020.8,184.325,162.85,498.7
7,10,0.185,-5356.65,774.483,-335.85,143.13
8,10,0.0,-5549.37,884.737,-192.72,0.0


In [10]:
# get canvas object and set size
canvas = toyplot.Canvas(width=400, height=300)

# plot the mean log probability of the models in red
axes = canvas.cartesian(ylabel="estLnProbMean")
axes.plot(etable.estLnProbMean * -1, color="darkred", marker="o")
axes.y.spine.style = {"stroke": "darkred"}

# plot delta K with its own scale bar of left side and in blue
axes = axes.share("x", ylabel="deltaK", ymax=etable.deltaK.max() + etable.deltaK.max() * .25)
axes.plot(etable.deltaK, color="steelblue", marker="o");
axes.y.spine.style = {"stroke": "steelblue"}

# set x labels
axes.x.ticks.locator = toyplot.locator.Explicit(range(len(etable.index)), etable.index)
axes.x.label.text = "K (N ancestral populations)"

In [11]:
k = 2
table = struct.get_clumpp_table(k)

[K2] 10/10 results permuted across replicates (max_var=0).


In [12]:
# sort list by columns
table.sort_values(by=list(range(k)), inplace=True)

# or, sort by a list of names (here taken from imap)
import itertools
onames = list(itertools.chain(*imap.values()))
table = table.loc[onames]

In [13]:
# build barplot
canvas = toyplot.Canvas(width=500, height=250)
axes = canvas.cartesian(bounds=("10%", "90%", "10%", "45%"))
axes.bars(table)

# add labels to x-axis
ticklabels = [i for i in table.index.tolist()]
axes.x.ticks.locator = toyplot.locator.Explicit(labels=ticklabels)
axes.x.ticks.labels.angle = -60
axes.x.ticks.show = True
axes.x.ticks.labels.offset = 10
axes.x.ticks.labels.style = {"font-size": "12px"}

In [14]:
k = 3
table = struct.get_clumpp_table(k)

[K3] 10/10 results permuted across replicates (max_var=0).


In [15]:
# sort list by columns
table.sort_values(by=list(range(k)), inplace=True)

# or, sort by a list of names (here taken from imap)
import itertools
onames = list(itertools.chain(*imap.values()))
table = table.loc[onames]

In [16]:
# build barplot
canvas = toyplot.Canvas(width=500, height=250)
axes = canvas.cartesian(bounds=("10%", "90%", "10%", "45%"))
axes.bars(table)

# add labels to x-axis
ticklabels = [i for i in table.index.tolist()]
axes.x.ticks.locator = toyplot.locator.Explicit(labels=ticklabels)
axes.x.ticks.labels.angle = -60
axes.x.ticks.show = True
axes.x.ticks.labels.offset = 10
axes.x.ticks.labels.style = {"font-size": "12px"}

In [17]:
k = 4
table = struct.get_clumpp_table(k)

[K4] 10/10 results permuted across replicates (max_var=0).


In [18]:
# sort list by columns
table.sort_values(by=list(range(k)), inplace=True)

# or, sort by a list of names (here taken from imap)
import itertools
onames = list(itertools.chain(*imap.values()))
table = table.loc[onames]

In [19]:
# build barplot
canvas = toyplot.Canvas(width=500, height=250)
axes = canvas.cartesian(bounds=("10%", "90%", "10%", "45%"))
axes.bars(table)

# add labels to x-axis
ticklabels = [i for i in table.index.tolist()]
axes.x.ticks.locator = toyplot.locator.Explicit(labels=ticklabels)
axes.x.ticks.labels.angle = -60
axes.x.ticks.show = True
axes.x.ticks.labels.offset = 10
axes.x.ticks.labels.style = {"font-size": "12px"}

In [20]:
k = 5
table = struct.get_clumpp_table(k)

[K5] 10/10 results permuted across replicates (max_var=0).


In [21]:
# sort list by columns
table.sort_values(by=list(range(k)), inplace=True)

# or, sort by a list of names (here taken from imap)
import itertools
onames = list(itertools.chain(*imap.values()))
table = table.loc[onames]

In [22]:
# build barplot
canvas = toyplot.Canvas(width=500, height=250)
axes = canvas.cartesian(bounds=("10%", "90%", "10%", "45%"))
axes.bars(table)

# add labels to x-axis
ticklabels = [i for i in table.index.tolist()]
axes.x.ticks.locator = toyplot.locator.Explicit(labels=ticklabels)
axes.x.ticks.labels.angle = -60
axes.x.ticks.show = True
axes.x.ticks.labels.offset = 10
axes.x.ticks.labels.style = {"font-size": "12px"}

In [23]:
k = 7
table = struct.get_clumpp_table(k)

[K7] 10/10 results permuted across replicates (max_var=0).


In [24]:
# sort list by columns
table.sort_values(by=list(range(k)), inplace=True)

# or, sort by a list of names (here taken from imap)
import itertools
onames = list(itertools.chain(*imap.values()))
table = table.loc[onames]

In [25]:
# build barplot
canvas = toyplot.Canvas(width=500, height=250)
axes = canvas.cartesian(bounds=("10%", "90%", "10%", "45%"))
axes.bars(table)

# add labels to x-axis
ticklabels = [i for i in table.index.tolist()]
axes.x.ticks.locator = toyplot.locator.Explicit(labels=ticklabels)
axes.x.ticks.labels.angle = -60
axes.x.ticks.show = True
axes.x.ticks.labels.offset = 10
axes.x.ticks.labels.style = {"font-size": "12px"}