### imports

In [1]:
import scipy.stats as st
import numpy as np
import scipy.integrate as integrate
import toyplot

import poolparty

## define a continuous pdf to sample from.

### make a cool-shaped recombination map with all values greater than 0.

In [2]:
toyplot.scatterplot(np.linspace(0,1,1000), 
                   (1+1*np.cos(21*np.linspace(0,1,num=1000)+np.sin(60*np.linspace(0,1,num=1000)))), # cool equation here
                   height=300,
                   width=500);

### does it integrate to 1 (the pdf underlying our recombination map should...)?

In [3]:
integrate.quad(lambda x: (1+1*np.cos(21*x+np.sin(60*x))), 0, 1)[0]

1.01519239365713

### nope (no surprise there), so let's find a scalar for it. 

In [4]:
# now define scaling by that previous number:
scalar = 1 / integrate.quad(lambda x: (1+1*np.cos(21*x+np.sin(60*x))), 0, 1)[0] # one over previous line

# now look at new result (should equal 1!)
integrate.quad(lambda x: (1+1*np.cos(21*x+np.sin(60*x))) * scalar, 0, 1)[0]

1.0

### now look at it scaled:

In [5]:
toyplot.scatterplot(np.linspace(0,1,1000), 
                   (1+1*np.cos(21*np.linspace(0,1,num=1000)+np.sin(60*np.linspace(0,1,num=1000))))*scalar, # cool equation here
                   height=300,
                   width=500);

### let's set this equation as our pdf.

In [6]:
class my_pdf(st.rv_continuous):
    def _pdf(self,x):
        expression = (1+1*np.cos(21*x+np.sin(60*x))) * scalar # scaling by the multiplier to bring max draw down to 1
        return (expression)  # Normalized over its range, in this case [0,1]

In [7]:
samps=my_pdf(a=0,b=1).rvs(size=10000)
np.max(samps) # what is the maximum x-coordinate that we sample

0.9999497888244988

In [8]:
toyplot.bars(np.histogram(samps,100),
             height=300,
             width=500);

## Now let's make an array of gametes!

In [2]:
scalar = 1 / 1.01519239365713
class my_pdf(st.rv_continuous):
    def _pdf(self,x):
        expression = (1+1*np.cos(21*x+np.sin(60*x))) * scalar # scaling by the multiplier to bring max draw down to 1
        return (expression)  # Normalized over its range, in this case [0,1]

In [9]:
sim_obj = poolparty.Sim_Gamete_Sequencing('/Volumes/My Passport/poolparty_sims/20gpa_500nali_1000loci_20e6reads',
                                pdf=my_pdf(a=0,b=1),
                      num_gams = 20*500,
                      gpa = 20,
                      nali=500,
                      ncutsites=1000,
                      num_reads = (20*500*1000) * 2, # amount for 1x coverage at each locus, times coverage amount
                 )

In [10]:
sim_obj.sim_gametes_and_sequencing()

Simulating gametes...
Sequencing gametes...


KeyboardInterrupt: 

In [3]:
sim_obj = poolparty.Sim_Gamete_Sequencing('./test/',
                                pdf=my_pdf(a=0,b=1),
                      num_gams = 50,
                      gpa = 5,
                      nali=10,
                      ncutsites=100,
                      num_reads = 1000, # amount for 1x coverage at each locus, times coverage amount
                 )

In [4]:
sim_obj.sim_gametes_and_sequencing()

Simulating gametes...
Sequencing gametes...
Demultiplexing...


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Assigning haplotypes to loci -- this might take a while...


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


