In [1]:
from pathlib import Path

import numpy as np

from simulations.utils import random_copy_numbers
from simulations import dropletSimulation

In [2]:
base_relative_abundances = [1e-4, 1e-3, 1e-2]

relative_abundances = [relative_abundance * number
                       for relative_abundance 
                       in base_relative_abundances
                       for number in (1,2,5) 
                       for repeat in range(10)]

relative_abundances += [1-sum(relative_abundances)]
relative_abundances = np.array(relative_abundances)

In [3]:
results_dir_name = 'simulation_results'
number_droplets = 100000
number_simulations = 100
seed = 42
size = len(relative_abundances)
number_batches = 5
number_last_completed_simulation = 0

In [4]:
base_simulation_filename = results_dir_name + '/{}_strains.seed_{}.{}_droplets.iteration_{}.npz'

In [5]:
seed_sequence = np.random.SeedSequence(seed)
Path("./" + results_dir_name).mkdir(parents=True, exist_ok=True)

We can set `number_last_completed_simulation` to be the number of the last simulation (1-indexed) that was completed in the case that was disrupted.

In [6]:
if number_last_completed_simulation > 0:
    # have seed sequence go through `number_last_completed_simulation` spawn keys
    # so that next simulation starts at the correct spawn key,
    # namely the `number_last_completed_simulation + 1`th
    seed_sequence.spawn(number_last_completed_simulation)
    # not necessary though if we're not resuming a previously interrupted simulation

In [7]:
%%time
for simulation_number in range(number_last_completed_simulation, number_simulations):
    # Python is 0-indexed, so the `number_last_completed_simulation +1`th simulation
    # has the index `number_last_completed_simulation`
    simulation_seed = seed_sequence.spawn(1)[0]
    rng = np.random.default_rng(simulation_seed)
    
    A = 2*(rng.random((size,size)) - 0.5) # random numbers in (-1,1)
    A *= rng.integers(low=0, high=2, size=A.shape) # make interactions more sparse, so scientifically more interesting/plausible
    beta = rng.random(size) # random numbers in (0,1)

    simulation = dropletSimulation(number_species=size, number_droplets=number_droplets, 
                   number_batches=number_batches, copy_numbers=random_copy_numbers(size, rng), 
                   frequency_vector=relative_abundances, glv_interaction_coefficients=A, 
                   glv_baserate_coefficients=beta,  noise_scale=8, seed=simulation_seed,
                    timestep = 0.0001, batch_window=200, carrying_capacity=10000,
                    merging_error=.1, spikein_rate=300, pcr_noise=0.1) 
                    # NON-ZERO MERGING ERROR, plus PCR amplification errors generated
                    # Made PCR noise small b/c I don't really trust the log-normal model
                    # of differential PCR amplification, and don't want to swamp out important
                    # point that error from Poisson normalization part should not be very large

    simulation.run_simulation(number_processes=56)

    truth = simulation.glv_interaction_coefficients
    true_baserates = simulation.glv_baserate_coefficients
    copy_numbers = simulation.copy_numbers

    # making copies might be an over-abundance of caution but oh well
    true_cell_results = simulation.cells.counts.copy()
    raw_read_results = simulation.reads.counts.copy()
    pcr_errors = simulation.pcr_errors.counts
    merged_cells = simulation.create_merging_errors(simulation.cells.counts)
    merged_reads = simulation.create_merging_errors(simulation.reads.counts)
    
    # Save intermediate results in case there's a crash, so can resume progress by using the
    # `number_last_completed_simulation` variable defined at top of notebook above
    simulation_filename = base_simulation_filename.format(
        size,seed,number_droplets,simulation_number+1)
        
    np.savez_compressed(simulation_filename, 
                        truth=truth, true_baserates=true_baserates, copy_numbers=copy_numbers,
                        true_cell_results=true_cell_results, raw_read_results=raw_read_results,
                        pcr_errors=pcr_errors, 
                        merged_cells=merged_cells, merged_reads=merged_reads
                       )
    print('Computed and save results for simulation # {}.\n'.format(simulation_number))

Computed and save results for simulation # 0.

Computed and save results for simulation # 1.

Computed and save results for simulation # 2.

Computed and save results for simulation # 3.

Computed and save results for simulation # 4.

Computed and save results for simulation # 5.

Computed and save results for simulation # 6.

Computed and save results for simulation # 7.

Computed and save results for simulation # 8.

Computed and save results for simulation # 9.

Computed and save results for simulation # 10.

Computed and save results for simulation # 11.

Computed and save results for simulation # 12.

Computed and save results for simulation # 13.

Computed and save results for simulation # 14.

Computed and save results for simulation # 15.

Computed and save results for simulation # 16.

Computed and save results for simulation # 17.

Computed and save results for simulation # 18.

Computed and save results for simulation # 19.

Computed and save results for simulation # 20.

Co