In [8]:
import numpy as np
from numpy.random import exponential
import matplotlib.pyplot as plt
import pandas as pd

### Algorithm sketch

* Carriers appear de novo with rate `mu`*`rho`
* Carriers give birth (split) with rate `1-s`
* Carriers die at rate `1`
* Take samples at rate `r`

Implement via Gillespie algorithm. At each step:
1. Draw waiting time to next event
2. Update locations of living carriers
3. Select event type and apply it

Data structures:
* During simulation, store individual level-data (continuously updated)
* At each sample point update the SFS distribution (array w/ pre-specified length)
* Output SFS distribution

In [4]:
### helper functions ###

def time_to_next(k,s,mu,rho,r):
    return exponential(k*(1-s)+k+mu*rho+r)

def choose_event(k,s,mu,rho,r):
    tot = k*(1-s)+k+mu*rho+r
    event = np.random.choice(['b','d','m','s'],p=[(k*(1-s)/tot),(k/tot),(mu*rho/tot),(r/tot)])
    return event # 0=birth, 1=death

In [232]:
### function to run simulation ###

def run_sim_spatial(s,mu,rho,r,sigma,num_iter,max_ind,L=50,sfs_len=100):
    # initialize array for SFS distribution
    sfs_dist = np.zeros(sfs_len)
    # keep track of time steps
    counter = 1 #initialize at 1
    # keep track of individual level data
    # [alive/dead, x coord, y coord, time @ birth, time @ death]
    ind_data = np.zeros((max_ind, 5)) 
    # initialize current time at 0
    curr_time=0
    while counter <= num_iter:
        k = np.sum(ind_data[:,0]) # number of alive particles
        # draw time to next event 
        t_next = time_to_next(k,s,mu,rho,r)
        # update time
        curr_time += t_next
        # draw event type
        e_type = choose_event(k,s,mu,rho,r)
        
        ### update spatial coordinates
        indices = np.where(ind_data[:, 0] == 1)[0]
        if len(indices)>0:
            ind_data[indices, 1] += np.random.normal(loc=0, scale=0.2*t_next, size=len(indices)) # update x
            ind_data[indices, 2] += np.random.normal(loc=0, scale=0.2*t_next, size=len(indices)) # update y

        ### mutation
        if e_type == 'm':
            # find next empty row
            empty_row_indices = np.where(np.all(ind_data == 0, axis=1))[0]
            if len(empty_row_indices)>0: # check that there is a row available
                next_row = empty_row_indices[0] # choose the first available row
            else:
                print("ERROR: ran out of room in array!") # print error message and stop
                break
            # add row for new lineage at random location
            ind_data[next_row,:] = [1,np.random.uniform(0,L),np.random.uniform(0,L),curr_time,0]
            
        ### death
        elif e_type == 'd':
            ## choose individual who dies
            indices = np.where(ind_data[:, 0] == 1) # all alive individuals
            random_index = np.random.choice(indices[0]) # choose one at random
            ind_data[random_index,0] = 0 # mark first column to dead
            ind_data[random_index,4] = curr_time # mark time of death
            
        ### birth
        elif e_type == 'b':
            # find next empty row
            empty_row_indices = np.where(np.all(ind_data == 0, axis=1))[0]
            if len(empty_row_indices)>0: # check that there is a row available
                next_row = empty_row_indices[0] # choose the first available row
            else:
                print("ERROR: ran out of room in array!") # print error message and stop
                break
            # choose parent at random from alive individuals
            indices = np.where(ind_data[:, 0] == 1)
            random_index = np.random.choice(indices[0]) 
            # add row for new (split) lineage with location of parent
            ind_data[next_row,:] = [1,ind_data[random_index,1],ind_data[random_index,2],curr_time,0]

        ### sample NOTE: WILL WANT TO UPDATE TO SPATIAL SAMPLING
        ### currently counts number of extant lineages & updates SFS
        elif e_type == 's':
            if int(k)<sfs_len:
                sfs_dist[int(k)]+=1
                # print for debugging
                if k>0:
                    print(k)
            else:
                print("Error: SFS entry out of bounds")
            
        # update counter
        counter+=1
        
    return sfs_dist, ind_data


In [248]:
counts,df = run_sim_spatial(s=1e-2,mu=1e-4,rho=2,r=0.1,sigma=0.2,num_iter=1000,max_ind=1000,L=50,sfs_len=1000)

1.0
2.0


In [249]:
counts[0:10]

array([948.,   1.,   1.,   0.,   0.,   0.,   0.,   0.,   0.,   0.])

In [250]:
df

array([[ 0.        , 39.34718477,  7.05375915, 10.01159898, 15.31347995],
       [ 0.        , 41.8367739 ,  0.6946392 , 11.36879145, 45.6173186 ],
       [ 0.        , 45.76539137,  0.50247148, 25.27989839, 65.25709843],
       ...,
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]])