In [None]:
pct = 
number_droplets = 100000
number_simulations = 100
seed = 42
number_last_completed_simulation = 0

In [None]:
from itertools import product
from pathlib import Path

import numpy as np

from simulations.utils import random_copy_numbers
from simulations.parallel import dropletSimulation

In [None]:
size = 11
number_batches = 5
results_dir_name = 'results' # if change this, don't end it in a `/`
# or if change to end in `/`, edit definition of `base_simulation_filename`

base_simulation_filename = results_dir_name + '/{}_strains.seed_{}.{}_droplets.iteration_{}.npz'

In [None]:
seed_sequence = np.random.SeedSequence(seed)
Path("./" + results_dir_name).mkdir(parents=True, exist_ok=True)

We can set `number_last_completed_simulation` to be the number of the last simulation (1-indexed) that was completed in the case that was disrupted.

In [None]:
if number_last_completed_simulation > 0:
    # have seed sequence go through `number_last_completed_simulation` spawn keys
    # so that next simulation starts at the correct spawn key,
    # namely the `number_last_completed_simulation + 1`th
    seed_sequence.spawn(number_last_completed_simulation)
    # not necessary though if we're not resuming a previously interrupted simulation

In [None]:
%%time
for simulation_number in range(number_last_completed_simulation, number_simulations):
    # Python is 0-indexed, so the `number_last_completed_simulation +1`th simulation
    # has the index `number_last_completed_simulation`
    simulation_seed = seed_sequence.spawn(1)[0]
    rng = np.random.default_rng(simulation_seed)

    frequency_vector = pct*np.ones(size)
    frequency_vector[-1] = (1 - np.sum(frequency_vector[:-1]))
    assert np.sum(frequency_vector) == 1
    
    A = 2*(rng.random((size,size)) - 0.5)
    A *= rng.integers(low=0, high=2, size=A.shape) # make interactions more sparse, so scientifically more interesting/plausible
    beta = rng.random(size)

    simulation = dropletSimulation(number_species=size, number_droplets=number_droplets, 
                   number_batches=number_batches, copy_numbers=random_copy_numbers(size, rng), 
                   frequency_vector=frequency_vector, glv_interaction_coefficients=A, 
                   glv_baserate_coefficients=beta, noise_scale=5, seed=simulation_seed,
                   timestep=0.01, batch_window=2, merging_error=.1) # NON-ZERO MERGING ERROR

    simulation.run_simulation()
    # Not necessary for phenopath
    # simulation.group_droplets()
    # Not a big time sink though for small number of species

    truth = simulation.glv_interaction_coefficients

    ### Save 'Cells' Results
    # NumPy doesn't offer enough control over reshaping (e.g. specifying which axes should be mapped to which axes)
    cell_counts = np.array([simulation.cells[...,i] for i in range(simulation.cells.shape[-1])])
    # So we have to do this somewhat 'manually' to ensure right result: both 'C' and 'F' give wrong results
    cell_counts = cell_counts.reshape((number_batches*number_droplets, size))

    cells_non_zero = (cell_counts != 0)
    cell_init_vectors = cells_non_zero.astype(int)
    # Return 0 for 0 otherwise log(x) if x > 0
    cell_log_counts = np.log(cell_counts + ~cells_non_zero)

    # Sanity check that the weird reshaping does what it's supposed to do
    # too often I treat reshaping like a black box and assume it works
    for i in range(simulation.cells.shape[-1]):
        np.all(cell_counts[i*simulation.cells.shape[0]:(i+1)*simulation.cells.shape[0]] == simulation.cells[:,:,i])

    ### Save 'Reads' Results
    read_counts = np.array([simulation.reads[...,i] for i in range(simulation.reads.shape[-1])])
    read_counts = read_counts.reshape((number_batches*simulation.reads.shape[0], size))

    reads_non_zero = (read_counts != 0)
    read_init_vectors = reads_non_zero.astype(int)
    read_log_counts = np.log(read_counts + ~reads_non_zero)

    for i in range(simulation.reads.shape[-1]):
        np.all(read_counts[i*simulation.reads.shape[0]:(i+1)*simulation.reads.shape[0]] == simulation.reads[:,:,i])

    # Save intermediate results in case there's a crash, so can resume progress by using the
    # `number_last_completed_simulation` variable defined at top of notebook above
    simulation_filename = base_simulation_filename.format(
        size,seed,number_droplets,simulation_number+1)
        
    np.savez_compressed(simulation_filename, 
                cell_log_counts=cell_log_counts,
                read_log_counts = read_log_counts,
                cell_init_vectors = cell_init_vectors,
                read_init_vectors = read_init_vectors,
                        truth=truth
                       )