# Simulating multiple large populations

In [7]:
import pickle
import gzip
import numpy as np
import pandas as pd

from ThesisScripts.MyModel import Population, Clone
from ThesisScripts.Analyses import *

Previous simulation of selection = 6 was interrupted before completion (**738** populations simulated), therefore the remaining **262 populations** are simulated separately here.

The script for `run_simulations()` is adapted so it starts indexing the populations from 738 up to 1000

In [8]:
def run_simulations(path_prefix, n, size_lim, mutprob, mutfactor, death_frac, selection):
    '''
    !!! ADJUSTED VERSION !!!
    To be used only in `Large-selection_6-1000_simulations.ipynb`
    
    Runs multiple Population simulations with the same parameters.

    Parameters:
    -----------
    * path_prefx : prefix of the filepath to target folder to save data
    * n : int, number of simulations to run
    * size_lim : int, size limit of populations
    * mutprob : float, starting mutation rate for populations
    * mutfactor : float, factor with which to raise the mutation rate for each subclone
    * death_frac : float, death rate for populations
    * selection : float, selection factor for populations

    Returns:
    --------
    * data_list : list, contains the final_data dataframe of each simulated population
    '''

    data_list = []

    for i in range(n):
        pop = Population(size_lim, mutprob, mutfactor, death_frac, selection)
        pop.simulate()
        data = final_data(pop)
        # pickle data
        file_path = path_prefix + 'population_' + str(i+738)  + '.pkl.gz'
        data.to_pickle(file_path, compression='gzip')
        data_list.append(data)
        if len(data_list) % 100 == 0:   # get an update after each 100th population simulated
            print("Populations simulated:", len(data_list))

    return data_list


## Simulations (1000 simulations of $10^8$ populations)

In [9]:
# parameters for both populations
l = 10**8
q = 10**(-3)
qfac = 1
alpha = 0.3

### Selective populations (selection = 6)

In [10]:
%ls saved_simulations/

[1m[34mlarge-neutral-1000_MultiSim[m[m/         [1m[34mlarge-selection_1p5-1000_MultiSim[m[m/
large-neutral-100_MultiSim.pkl.gz    [1m[34mlarge-selection_3-1000_MultiSim[m[m/
large-selection-100_MultiSim.pkl.gz  [1m[34mlarge-selection_6-1000_MultiSim[m[m/
[1m[34mlarge-selection_0p3-1000_MultiSim[m[m/


In [11]:
prefix2 = './saved_simulations/large-selection_6-1000_MultiSim/'

### 262 Simulations (pickling occurs simultaneously during simulation)


In [12]:
%%time
selection = run_simulations(prefix2, 262, l, q, qfac, alpha, selection=6)

Populations simulated: 100
Populations simulated: 200
CPU times: user 42min 36s, sys: 36.7 s, total: 43min 13s
Wall time: 43min 15s
