In [None]:
pct = 0.0
number_droplets = 0
number_simulations = 0
seed = 0
number_last_completed_simulation = 0
size = 0
number_batches = 0
results_dir_base_name = ''

In [None]:
from itertools import product
from pathlib import Path

import numpy as np

from simulations.utils import random_copy_numbers
from simulations import dropletSimulation

In [None]:
results_dir_name = 'pct{}/{}'.format(str(100*pct), results_dir_base_name) # if change this, don't end it in a `/`
# or if change to end in `/`, edit definition of `base_simulation_filename`

base_simulation_filename = results_dir_name + '/{}_strains.seed_{}.{}_droplets.iteration_{}.npz'

In [None]:
seed_sequence = np.random.SeedSequence(seed)
Path("./" + results_dir_name).mkdir(parents=True, exist_ok=True)

We can set `number_last_completed_simulation` to be the number of the last simulation (1-indexed) that was completed in the case that was disrupted.

In [None]:
if number_last_completed_simulation > 0:
    # have seed sequence go through `number_last_completed_simulation` spawn keys
    # so that next simulation starts at the correct spawn key,
    # namely the `number_last_completed_simulation + 1`th
    seed_sequence.spawn(number_last_completed_simulation)
    # not necessary though if we're not resuming a previously interrupted simulation

In [None]:
%%time
for simulation_number in range(number_last_completed_simulation, number_simulations):
    # Python is 0-indexed, so the `number_last_completed_simulation +1`th simulation
    # has the index `number_last_completed_simulation`
    simulation_seed = seed_sequence.spawn(1)[0]
    rng = np.random.default_rng(simulation_seed)

    frequency_vector = pct*np.ones(size)
    frequency_vector[-1] = (1 - np.sum(frequency_vector[:-1]))
    assert np.sum(frequency_vector) == 1
    
    A = 2*(rng.random((size,size)) - 0.5)
    A *= rng.integers(low=0, high=2, size=A.shape) # make interactions more sparse, so scientifically more interesting/plausible
    beta = rng.random(size)

    simulation = dropletSimulation(number_species=size, number_droplets=number_droplets, 
                   number_batches=number_batches, copy_numbers=random_copy_numbers(size, rng), 
                   frequency_vector=frequency_vector, glv_interaction_coefficients=A, 
                   glv_baserate_coefficients=beta,  noise_scale=8, seed=simulation_seed,
                    timestep = 0.0001, batch_window=200, carrying_capacity=10000,
                    merging_error=.1, spikein_rate=300, pcr_noise=0.1) 
                    # NON-ZERO MERGING ERROR, plus PCR amplification errors generated
                    # Made PCR noise small b/c I don't really trust the log-normal model
                    # of differential PCR amplification, and don't want to swamp out important
                    # point that error from Poisson normalization part should not be very large

    simulation.run_simulation(number_processes=10)

    truth = simulation.glv_interaction_coefficients
    true_baserates = simulation.glv_baserate_coefficients
    copy_numbers = simulation.copy_numbers

    # making copies might be an over-abundance of caution but oh well
    true_cell_results = simulation.cells.counts.copy()
    raw_read_results = simulation.reads.counts.copy()
    pcr_errors = simulation.pcr_errors.counts
    merged_cells = simulation.create_merging_errors(simulation.cells.counts)
    merged_reads = simulation.create_merging_errors(simulation.reads.counts)
    
    # Save intermediate results in case there's a crash, so can resume progress by using the
    # `number_last_completed_simulation` variable defined at top of notebook above
    simulation_filename = base_simulation_filename.format(
        size,seed,number_droplets,simulation_number+1)
        
    np.savez_compressed(simulation_filename, 
                        truth=truth, true_baserates=true_baserates, copy_numbers=copy_numbers,
                        true_cell_results=true_cell_results, raw_read_results=raw_read_results,
                        pcr_errors=pcr_errors, 
                        merged_cells=merged_cells, merged_reads=merged_reads
                       )
    print('Computed and save results for simulation # {}.\n'.format(simulation_number))