# Simulating multiple large populations

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from tqdm import tqdm
import pickle
import gzip
import numpy as np
import pandas as pd

from ThesisScripts.MyModel import Population, Clone
from ThesisScripts.Analyses import *

## Simulations (1000 simulations of $10^8$ populations)

In [2]:
# parameters for both populations
l = 10**8
q = 10**(-3)
qfac = 1
alpha = 0.3

### Selective populations (selection = 0.15)

In [3]:
%ls ../saved_simulations/

[1m[34mSAMPLED-Results-Large-Neutral_vs_Selection-HIGH_MUTRATE[m[m/
[1m[34mlarge-selection_0p1-1000_MultiSim[m[m/
[1m[34mlarge-selection_0p15-1000_MultiSim[m[m/


In [4]:
prefix = '../saved_simulations/large-selection_0p15-1000_MultiSim/'

### Simulations (pickling occurs simultaneously during simulation)

Only **932** populations simulated (due to full diskspace).

Remaining **68** populations simulated below.

In [5]:
def run_simulations(path_prefix, n, size_lim, mutprob, mutfactor, death_frac, selection):
    '''
    !!! ADJUSTED VERSION !!!
    To be used only in `Large-selection_0p15-1000_simulations.ipynb`
    
    Runs multiple Population simulations with the same parameters.

    Parameters:
    -----------
    * path_prefx : prefix of the filepath to target folder to save data
    * n : int, number of simulations to run
    * size_lim : int, size limit of populations
    * mutprob : float, starting mutation rate for populations
    * mutfactor : float, factor with which to raise the mutation rate for each subclone
    * death_frac : float, death rate for populations
    * selection : float, selection factor for populations

    Returns:
    --------
    * data_list : list, contains the final_data dataframe of each simulated population
    '''

    data_list = []

    for i in tqdm(range(n)):
        pop = Population(size_lim, mutprob, mutfactor, death_frac, selection)
        pop.simulate()
        data = final_data(pop)
        # pickle data
        file_path = path_prefix + 'population_' + str(i+932)  + '.pkl.gz'
        data.to_pickle(file_path, compression='gzip')
        data_list.append(data)
        if len(data_list) % 100 == 0:   # get an update after each 100th population simulated
            print("Populations simulated:", len(data_list))

    return data_list


### Remaining 68 populations

In [6]:
%%time
selection = run_simulations(prefix, 68, l, q, qfac, alpha, selection=0.15)

100%|██████████| 68/68 [12:32<00:00, 11.07s/it]

CPU times: user 12min 18s, sys: 11.1 s, total: 12min 29s
Wall time: 12min 32s



