In [2]:
import ipyrad.analysis as ipa
import toyplot
import pandas as pd

In [3]:
data = "/home/iovercast/hoplo_assembly/Hoplo-PE_outfiles/Hoplonew.snps.hdf5"

In [4]:
metadata = pd.read_csv("Hoplo_metadata_FIXED.csv", index_col="Seq ID")
#metadata= metadata.drop (['HOOC0024', 'HOOC0039', 'HOOC0047'])

metadata= metadata.drop (['HOOC0005', 'HOOC0006', 'HOOC0014', 'HOOC0015', 'HOOC0021', 
                          'HOOC0022', 'HOOC0023', 'HOOC0029', 'HOOC0030', 'HOOC0038', 
                          'HOOC0040', 'HOOC0046', 'HOOC0053', 'HOOC0054', 'HOOC0056', 
                          'HOOC0057', 'HOOC0062', 'HOOC0084', 'HOOC0085', 'HOOC0086', 
                          'HOOC0087', 'HOOC0088', 'HOOC0089', 'HOOC0090', 'HOOC0091', 
                          'HOOC0093'])


imap = metadata.groupby("Vegetation zone").groups
zones = ["Mangrove Forest", "Rainforest", "Derived savanna", "Giunea savanna"]
imap = {x:imap[x].tolist() for x in zones}
# require that 50% of samples have data in each group
minmap = {i: 0.5 for i in imap}
imap


{'Mangrove Forest': ['HOOC0001',
  'HOOC0002',
  'HOOC0003',
  'HOOC0009',
  'HOOC0010',
  'HOOC0011',
  'HOOC0017',
  'HOOC0018',
  'HOOC0019',
  'HOOC0025',
  'HOOC0026',
  'HOOC0027',
  'HOOC0033',
  'HOOC0034',
  'HOOC0035',
  'HOOC0041',
  'HOOC0042',
  'HOOC0049',
  'HOOC0050',
  'HOOC0058',
  'HOOC0065',
  'HOOC0066',
  'HOOC0073',
  'HOOC0074',
  'HOOC0081',
  'HOOC0082'],
 'Rainforest': ['HOOC0004',
  'HOOC0012',
  'HOOC0013',
  'HOOC0020',
  'HOOC0028',
  'HOOC0036',
  'HOOC0037',
  'HOOC0043',
  'HOOC0044',
  'HOOC0045',
  'HOOC0051',
  'HOOC0052',
  'HOOC0059',
  'HOOC0060',
  'HOOC0061',
  'HOOC0067',
  'HOOC0068',
  'HOOC0069',
  'HOOC0075',
  'HOOC0076',
  'HOOC0077',
  'HOOC0083',
  'HOOC0092'],
 'Derived savanna': ['HOOC0007',
  'HOOC0008',
  'HOOC0016',
  'HOOC0031',
  'HOOC0055',
  'HOOC0063',
  'HOOC0070',
  'HOOC0071',
  'HOOC0078',
  'HOOC0079',
  'HOOC0094',
  'HOOC0095'],
 'Giunea savanna': ['HOOC0032',
  'HOOC0048',
  'HOOC0064',
  'HOOC0072',
  'HOOC0080',
  '

In [9]:
# init analysis object with input data and (optional) parameter options
struct = ipa.structure(
    name="denovohoplostructurePE",
    data=data,
    imap=imap,
    minmap=minmap,
    mincov=0.9,
)

12 previous results loaded for run [denovohoplostructurePE]
Samples: 67
Sites before filtering: 934813
Filtered (indels): 56284
Filtered (bi-allel): 88837
Filtered (mincov): 725190
Filtered (minmap): 566019
Filtered (subsample invariant): 222203
Filtered (minor allele frequency): 0
Filtered (combined): 593705
Sites after filtering: 180651
Sites containing missing values: 134413 (74.40%)
Missing values in SNP matrix: 389885 (3.22%)
SNPs (total): 180651
SNPs (unlinked): 54199


In [10]:
struct.mainparams.burnin = 5000
struct.mainparams.numreps = 10000

In [11]:
struct.ipcluster['cores']=12
struct.ipcluster

{'cluster_id': '',
 'profile': 'default',
 'engines': 'Local',
 'quiet': 0,
 'timeout': 60,
 'cores': 12,
 'threads': 2,
 'pids': {}}

In [14]:
struct.run(nreps=3, kpop=[2, 3, 4, 5], auto=True)

12 finished jobs. No further jobs to run.


In [15]:
etable = struct.get_evanno_table([2, 3, 4, 5])
etable

  tab.loc[kpop, "lnPK"] = tab.loc[kpop, "estLnProbMean"] \
  tab.loc[kpop, "lnPPK"] = abs(tab.loc[kpop+1, "lnPK"]
  tab.loc[kpop, "deltaK"] = (abs(


Unnamed: 0,Nreps,lnPK,lnPPK,deltaK,estLnProbMean,estLnProbStdev
2,3,0.0,0.0,0.0,-130700000.0,108600000.0
3,3,104600000.0,82790000.0,1.944,-26080000.0,42600000.0
4,3,21840000.0,30000000.0,56.295,-4247000.0,532800.0
5,3,-8159000.0,0.0,0.0,-12410000.0,18650000.0


In [30]:
# get canvas object and set size
canvas = toyplot.Canvas(width=400, height=300)

# plot the mean log probability of the models in red
axes = canvas.cartesian(ylabel="estLnProbMean")
axes.plot(etable.estLnProbMean * -1, color="darkred", marker="o")
axes.y.spine.style = {"stroke": "darkred"}

# plot delta K with its own scale bar of left side and in blue
axes = axes.share("x", ylabel="deltaK", ymax=etable.deltaK.max() + etable.deltaK.max() * .25)
axes.plot(etable.deltaK, color="steelblue", marker="o");
axes.y.spine.style = {"stroke": "steelblue"}

# set x labels
axes.x.ticks.locator = toyplot.locator.Explicit(range(len(etable.index)), etable.index)
axes.x.label.text = "K (N ancestral populations)"

In [26]:
k = 3
table = struct.get_clumpp_table(k)

[K3] 3/3 results permuted across replicates (max_var=0).


  table = pd.read_csv(ofile, delim_whitespace=True, header=None)


In [27]:
# sort list by columns
table.sort_values(by=list(range(k)), inplace=True)

# or, sort by a list of names (here taken from imap)
import itertools
onames = list(itertools.chain(*imap.values()))
table = table.loc[onames]

In [28]:
# build barplot
canvas = toyplot.Canvas(width=1000, height=250)
axes = canvas.cartesian(bounds=("10%", "90%", "10%", "45%"))
axes.bars(table)

# add labels to x-axis
ticklabels = [i for i in table.index.tolist()]
axes.x.ticks.locator = toyplot.locator.Explicit(labels=ticklabels)
axes.x.ticks.labels.angle = -60
axes.x.ticks.show = True
axes.x.ticks.labels.offset = 10
axes.x.ticks.labels.style = {"font-size": "12px"}

In [31]:
import toyplot.png
toyplot.png.render(canvas,'ddRADstructurePE_K_graph.png')