## Setup

In [1]:
number_droplets = int(3e6)
seed = 42
number_batches = 5

In [2]:
from pathlib import Path

import numpy as np

from simulations.utils import random_copy_numbers
from simulations import dropletSimulation

In [3]:
seed_sequence = np.random.SeedSequence(seed)
simulation_seed = seed_sequence.spawn(1)[0]
rng = np.random.default_rng(simulation_seed)

In [4]:
base_relative_abundances = [1e-5, 1e-4, 1e-3, 1e-2]

relative_abundances = [relative_abundance * number
                       for relative_abundance 
                       in base_relative_abundances
                       for number in (1,2,5) 
                       for repeat in range(10)]

relative_abundances += [1-sum(relative_abundances)]
relative_abundances = np.array(relative_abundances)

In [5]:
size = len(relative_abundances)
# random numbers from interval [-1,1]
A = 2*(rng.random((size,size)) - 0.5)
# make interactions more sparse, so scientifically more interesting/plausible
A *= rng.integers(low=0, high=2, size=A.shape) 
beta = rng.random(size)

In [6]:
results_dir_name='simulation_results'
base_simulation_filename = results_dir_name + '/{{}}.{}_strains.seed_{}.{}_droplets.npz'
base_simulation_filename = base_simulation_filename.format(size, seed, number_droplets)
print(base_simulation_filename)

simulation_results/{}.121_strains.seed_42.3000000_droplets.npz


## Run Simulation

In [7]:
%%time
simulation = dropletSimulation(number_species=size, number_droplets=number_droplets, 
               number_batches=number_batches, copy_numbers=random_copy_numbers(size, rng), 
               frequency_vector=relative_abundances, glv_interaction_coefficients=A, 
               glv_baserate_coefficients=beta,  noise_scale=8, seed=simulation_seed,
               timestep = 0.0001, batch_window=200, carrying_capacity=10000,
               merging_error=.1, spikein_rate=300, pcr_noise=0.1) 
                # NON-ZERO MERGING ERROR, plus PCR amplification errors generated
                # Made PCR noise small b/c I don't really trust the log-normal model
                # of differential PCR amplification, and don't want to swamp out more important
                # point that error from Poisson normalization part should not be very large

simulation.run_simulation(number_processes=56, maxtasksperchild=10, chunksize=100, 
                          results_dir_name=results_dir_name, large_batches=True)

CPU times: user 55min 43s, sys: 8h 31min 14s, total: 9h 26min 58s
Wall time: 10h 19min 12s


## Wrap-Up

In [8]:
truth = simulation.glv_interaction_coefficients
true_baserates = simulation.glv_baserate_coefficients
copy_numbers = simulation.copy_numbers

simulation_filename = base_simulation_filename.format('simulation_info')

np.savez_compressed(simulation_filename, 
                    truth=truth, true_baserates=true_baserates, copy_numbers=copy_numbers,
                   )