In [7]:
import pymc
import numpy as np
import pandas as pd

In [8]:
# Some data
n = 5 * np.ones(4, dtype=int)
x = np.array([-.86, -.3, -.05, .73])

# Priors on unknown parameters
alpha = pymc.Normal('alpha', mu=0, tau=.01)
beta = pymc.Normal('beta', mu=0, tau=.01)

# Arbitrary deterministic function of parameters
@pymc.deterministic
def theta(a=alpha, b=beta):
    """theta = logit^{-1}(a+b)"""
    return pymc.invlogit(a + b * x)

# Binomial likelihood for data
d = pymc.Binomial('d', n=n, p=theta, value=np.array([0., 1., 3., 5.]),
                  observed=True)

In [48]:
n_ind = 30
n_loci = 5
depth = 15
ploidy = 4

epsilon = 0.01
allele_freqs = np.random.random(n_loci)
allele_freqs

array([ 0.41867446,  0.48266555,  0.30948485,  0.08973674,  0.10687713])

In [51]:
def generate_genotype(afreq, ploidy):
    genotype = np.sum(np.random.random(ploidy) < afreq)
    return(genotype)

In [52]:
def generate_genotype_df(allele_freqs, n_ind, ploidy):
    my_df = pd.DataFrame([[generate_genotype(afreq, ploidy) for ind in range(n_ind)] for afreq in allele_freqs]).T
    return(my_df)

In [57]:
sim_genotypes = generate_genotype_df(allele_freqs, n_ind = n_ind, ploidy = ploidy)
sim_genotypes.head()

Unnamed: 0,0,1,2,3,4
0,3,2,2,0,1
1,0,3,1,1,0
2,2,1,1,0,0
3,4,2,0,0,0
4,1,1,1,0,0


## simulate read data

In [54]:
mean_depth = 15
# use negative binomial to simulate read data
def nb(n):
    return(np.random.negative_binomial(n=n, p=.5))

depth_df = sim_genotypes.copy()
depth_df[depth_df>-10] = mean_depth
depth_df=depth_df.applymap(nb)
depth_df.head()

Unnamed: 0,0,1,2,3,4
0,14,16,12,23,18
1,12,10,20,19,20
2,31,25,12,10,18
3,25,8,24,12,10
4,15,14,15,21,17


In [55]:
depth_df.dtypes

0    int64
1    int64
2    int64
3    int64
4    int64
dtype: object

In [58]:
a1_depth = pd.DataFrame(np.random.binomial(n=depth_df.astype(np.int32), p=sim_genotypes/ploidy))
a1_depth.head()

Unnamed: 0,0,1,2,3,4
0,11,8,7,0,3
1,0,8,4,5,0
2,17,6,4,0,0
3,25,4,0,0,0
4,3,2,5,0,0


In [None]:
a1

In [None]:
# Some data
n = 5 * np.ones(4, dtype=int)
x = np.array([-.86, -.3, -.05, .73])

# Priors on unknown parameters
alpha = pymc.Normal('alpha', mu=0, tau=.01)
beta = pymc.Normal('beta', mu=0, tau=.01)

# Arbitrary deterministic function of parameters
@pymc.deterministic
def theta(a=alpha, b=beta):
    """theta = logit^{-1}(a+b)"""
    return pymc.invlogit(a + b * x)

# Binomial likelihood for data
d = pymc.Binomial('d', n=n, p=theta, value=np.array([0., 1., 3., 5.]),
                  observed=True)