In [1]:
import os
import itertools
import multiprocessing as mpp
from multiprocessing import Pool 
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from statsmodels.sandbox.stats.multicomp import multipletests
from typing import List
import screen_simulation as ss

### List of conditions you want to test

In [2]:
n_guides = 700*5
n_targets = [700]
n_guides_per_target = [5]
mean_coverages = [100]
n_total_cells = [i*n_guides for i in mean_coverages]
n_bulk_cells = [None]
sorting_mode = ["topbot", "bins"]
nreps = [1]
n_genome_per_sample = [int(10e4)]
n_reads_per_sample = [10**6]
effect_size_distribution = [None]
edit_rate_distribution = ["data"]
guide_coverage_distribution = ["data"]
has_reporter = [True]
measures = ["guide", "target_edit", "reporter_edit"]

In [4]:
params = [n_targets, n_guides_per_target,
          n_total_cells,
          n_bulk_cells,
          sorting_mode,
          nreps,
          n_genome_per_sample,
          n_reads_per_sample,
          effect_size_distribution,
          guide_coverage_distribution,
          edit_rate_distribution,
          has_reporter
          ]
zipped_params = list(itertools.product(*params))

### Run simulation and write read counts & mageck results

In [8]:
from screen_simulation.run_mageck import get_mageck_sensitivity

In [9]:
sens, res = get_mageck_sensitivity(*zipped_params[0], rerun = True)

In [None]:
from screen_simulation import istarmap  # import to apply patch
from multiprocessing import Pool 

def fn(*args):
    try: 
        sens, res = get_mageck_sensitivity(*args, rerun = True)
    except IOError:
        run_mageck(*args, rerun = True)
        sens, res = get_mageck_sensitivity(*args)
    return((sens, res))

reses = []
with Pool(20) as pool:
    for _ in tqdm.tqdm(pool.istarmap(fn, zipped_params)):
        reses.append(_)


In [None]:
ix_df = pd.DataFrame(data = zipped_params, 
                     columns = ["n_targets", "n_guides_per_target",
          "n_total_cells",
          "n_bulk_cells",
          "sorting_mode",
          "nreps",
          "n_genome_per_sample",
          "n_reads_per_sample",
          "effect_size_distribution",
          "guide_coverage_distribution",
          "edit_rate_distribution",
          "has_reporter"])
ix_df = pd.DataFrame(np.repeat(ix_df.values, 11, axis = 0), 
                    columns = ix_df.columns)

In [None]:
sensitivities, mageck_results = list(zip(*reses))

In [None]:
sens_df = pd.concat(sensitivities).reset_index()
sens_df = pd.concat((sens_df, ix_df), axis = 1)

In [None]:
sens_df

In [None]:
sens_df["ncells(K)"] = (sens_df["n_total_cells"] / 1000).astype(int)
sens_df["nreads(K)"] = (sens_df["n_reads_per_sample"] / 1000).astype(int)

### Plott

In [None]:
import matplotlib
import matplotlib.pyplot as plt

In [None]:
font = {'family' : 'normal',
        'weight' : 'bold',
        'size'   : 10}

matplotlib.rc('font', **font)