From cfaffb8739f3d60484d88a6a8a83fa4f89eb75eb Mon Sep 17 00:00:00 2001
From: Tamas Nagy <tamas@tamasnagy.com>
Date: Sat, 21 May 2016 16:46:17 -0700
Subject: [PATCH 1/2] initial implementation of crispr cutting code

Fixes #3. This some gnarly and complex stuff, needs lots of validation and
testing
---
 src/designs.jl      |  8 +++---
 src/library.jl      | 62 +++++++++++++++++++++++++++++++++++++--------
 src/selection.jl    | 36 ++++++++++++++++----------
 src/transfection.jl | 62 +++++++++++++++++++++++++++++++++++----------
 test/runtests.jl    | 45 ++++++++++++++++++++++++++++++++
 5 files changed, 171 insertions(+), 42 deletions(-)

diff --git a/src/designs.jl b/src/designs.jl
index feff841..d005057 100644
--- a/src/designs.jl
+++ b/src/designs.jl
@@ -28,9 +28,9 @@ function run_exp(setup::FacsScreen, lib::Library, processing_func::Function; run
     min_perc = minimum([range[2] - range[1] for (binname, range) in setup.bin_info])
     expand_to = round(Int64, (setup.num_cells_per_bin * length(guides))/min_perc)
 
-    cells = transfect(guides, guide_freqs_dist, length(guides)*setup.representation, setup.moi, expand_to)
+    cells, cell_phenotypes = transfect(lib, guides, guide_freqs_dist, length(guides)*setup.representation, setup.moi, expand_to)
 
-    bin_cells = facs_sort(cells, guides, setup.bin_info, setup.σ)
+    bin_cells = facs_sort(cells, cell_phenotypes, guides, setup.bin_info, setup.σ)
 
     [teardown_screen(setup, guides, bin_cells, processing_func)...; as_array(setup)...; run_idx]
 end
@@ -43,9 +43,9 @@ function run_exp(setup::GrowthScreen, lib::Library, processing_func::Function; r
 
     guides, guide_freqs_dist = setup_screen(setup, lib)
 
-    cells, num_doublings = transfect(setup, guides, guide_freqs_dist)
+    cells, cell_phenotypes = transfect(setup, lib, guides, guide_freqs_dist)
 
-    bin_cells = growth_assay(cells, guides, setup.num_bottlenecks, setup.bottleneck_representation)
+    bin_cells = growth_assay(cells, cell_phenotypes, guides, setup.num_bottlenecks, setup.bottleneck_representation)
 
     [teardown_screen(setup, guides, bin_cells, processing_func)...; as_array(setup)...; run_idx]
 end
diff --git a/src/library.jl b/src/library.jl
index c8e7d88..6032eae 100644
--- a/src/library.jl
+++ b/src/library.jl
@@ -36,12 +36,24 @@ function response(sig::Sigmoidal)
     (x, l) -> sigmoid(x, l, slope, inflection)
 end
 
+abstract Cas9Behavior
+
+type CRISPRi <: Cas9Behavior end
+
+type CRISPRKO <: Cas9Behavior
+    knockout_dist::Categorical
+
+    CRISPRKO(dist::Categorical) = new(dist)
+end
+CRISPRKO() = CRISPRKO(Categorical([1/9, 4/9, 4/9]))
+
 """
 Wrapper containing all library construction parameters
 """
 type Library
     "Distribution of guide knockdown efficiencies"
-    knockdown_dist::Distribution
+    knockdown_dist::Dict{Int64, Tuple{Symbol, Sampleable}}
+    knockdown_probs::Categorical
 
     """
     Maximum phenotype categories mapped to their probability of being
@@ -57,36 +69,63 @@ type Library
     kd_phenotype_relationships::Dict{Int64, Tuple{Symbol, KDPhenotypeRelationship}}
     relationship_probs::Categorical
 
-    function Library(knockdown_dist::Distribution,
+    """
+    Whether this library is CRISPRi or CRISPR cutting.
+    """
+    cas9_behavior::Cas9Behavior
+
+    function Library(knockdown_dist::Dict{Symbol, Tuple{Float64, Sampleable}},
                      max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}},
-                     kd_phenotype_relationships::Dict{Symbol, Tuple{Float64, KDPhenotypeRelationship}})
+                     kd_phenotype_relationships::Dict{Symbol, Tuple{Float64, KDPhenotypeRelationship}},
+                     cas9_behavior::Cas9Behavior)
 
+        kd = unroll(knockdown_dist)
         max_p = unroll(max_phenotype_dists)
         rela = unroll(kd_phenotype_relationships)
-        new(knockdown_dist, max_p[1], max_p[2], rela[1], rela[2])
+        new(kd[1], kd[2], max_p[1], max_p[2], rela[1], rela[2], cas9_behavior)
     end
 end
 
-function Library()
+function Library(cas9_behavior::Cas9Behavior)
     max_phenotype_dists = Dict{Symbol, Tuple{Float64, Sampleable}}(
         :inactive => (0.75, Delta(0.0)),
         :negcontrol => (0.05, Delta(0.0)),
         :increasing => (0.1, TruncatedNormal(0.55, 0.2, 0.1, 1)),
         :decreasing => (0.1, TruncatedNormal(-0.55, 0.2, -1, -0.1))
     )
-    Library(max_phenotype_dists)
+    Library(max_phenotype_dists, cas9_behavior)
 end
 
-function Library(max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}})
+function Library(max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}},
+                 cas9_behavior::CRISPRi)
     # Assuming a high quality library has mostly good guides with some bad ones
-    knockdown_dist = MixtureModel([TruncatedNormal(0.90, 0.1, 0, 1),
-                                   TruncatedNormal(0.05, 0.07, 0, 1)], [0.9, 0.1])
+    knockdown_dist = Dict{Symbol, Tuple{Float64, Sampleable}}(
+        :high => (0.9, TruncatedNormal(0.90, 0.1, 0, 1)),
+        :low => (0.1, TruncatedNormal(0.05, 0.07, 0, 1))
+    )
+    Library(max_phenotype_dists, knockdown_dist, cas9_behavior)
+end
+
+function Library(max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}},
+                 cas9_behavior::CRISPRKO)
+    # For CRISPR KO assume that if guide is "high quality" than it will a
+    # maximum knockdown of 100%
+    knockdown_dist = Dict{Symbol, Tuple{Float64, Sampleable}}(
+        :high => (0.9, Delta(1.0)),
+        :low => (0.1, TruncatedNormal(0.05, 0.07, 0, 1))
+    )
+    Library(max_phenotype_dists, knockdown_dist, cas9_behavior)
+end
+
+function Library(max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}},
+                 knockdown_dist::Dict{Symbol, Tuple{Float64, Sampleable}},
+                 cas9_behavior::Cas9Behavior)
 
     kd_phenotype_relationships = Dict{Symbol, Tuple{Float64, KDPhenotypeRelationship}}(
         :linear => (0.75, Linear()),
         :sigmoidal => (0.25, Sigmoidal())
     )
-    Library(knockdown_dist, max_phenotype_dists, kd_phenotype_relationships)
+    Library(knockdown_dist, max_phenotype_dists, kd_phenotype_relationships, cas9_behavior)
 end
 
 function unroll{T}(data::Dict{Symbol, Tuple{Float64, T}})
@@ -121,7 +160,8 @@ function construct_library(lib::Library, N::Int64, coverage::Int64)
         kd_response = response(relationship)
 
         for i in 1:coverage
-            barcode_knockdown = rand(lib.knockdown_dist)
+            barcode_quality, barcode_knockdown_dist = lib.knockdown_dist[rand(lib.knockdown_probs)]
+            barcode_knockdown = rand(barcode_knockdown_dist)
             # phenotype of barcode given its knockdown efficiency
             barcode_phenotype = kd_response(barcode_knockdown, max_phenotype)
             push!(barcodes, Barcode(gene, barcode_knockdown, barcode_phenotype,
diff --git a/src/selection.jl b/src/selection.jl
index a482be1..f24a008 100644
--- a/src/selection.jl
+++ b/src/selection.jl
@@ -3,15 +3,16 @@ Given `cells`, a vector of integers, and `guides`, a vector of barcodes
 performs simulated facs sorting of the cells into `bins` with the given
 cutoffs. `σ` refers to the spread of the phenotype during the FACS screen.
 """
-function facs_sort(cells::Vector{Int64}, guides::Vector{Barcode},
+function facs_sort(cells::Vector{Int64}, cell_phenotypes::Vector{Float64},
+                   guides::Vector{Barcode},
                    bins::Dict{Symbol, Tuple{Float64, Float64}}, σ::Float64)
 
+    @assert size(cells) == size(cell_phenotypes)
     n_cells = length(cells)
     observed = zeros(n_cells)
-    for i in 1:n_cells
-        cell = cells[i]
-        observed[i] = rand(Normal(guides[cell].theo_phenotype, σ))
-        guides[cell].obs_phenotype = observed[i]
+    @inbounds for i in 1:n_cells
+        observed[i] = rand(Normal(cell_phenotypes[i], σ))
+        guides[cells[i]].obs_phenotype = observed[i]
     end
     indices = sortperm(observed)
     cells = cells[indices]
@@ -25,19 +26,22 @@ function facs_sort(cells::Vector{Int64}, guides::Vector{Barcode},
     results
 end
 
-function grow!(cells::AbstractArray{Int64}, guides::Vector{Barcode}, output)
+function grow!(cells::AbstractArray{Int64}, cell_phenotypes::AbstractArray{Float64},
+               output_c::AbstractArray{Int64}, output_p::AbstractArray{Float64})
     num_inserted::Int = 0
     @inbounds for i in 1:length(cells)
-        id::Int64 = cells[i]
-        ρ::Float64 = guides[id].theo_phenotype
+        ρ::Float64 = cell_phenotypes[i]
         decision = abs(ρ) < rand() ? 2 : 2^trunc(Int, 1 + sign(ρ))
-        output[num_inserted+1:num_inserted+decision] = id
+        rng = num_inserted+1:num_inserted+decision
+        output_c[rng] = cells[i]
+        output_p[rng] = ρ
         num_inserted+=decision
     end
     num_inserted
 end
 
 function growth_assay(initial_cells::AbstractArray{Int64},
+                      initial_cell_phenotypes::AbstractArray{Float64},
                       guides::Vector{Barcode},
                       num_bottlenecks::Int64,
                       bottleneck_representation::Int64;
@@ -45,13 +49,19 @@ function growth_assay(initial_cells::AbstractArray{Int64},
 
     # all cells at all timepoints
     cellmat = zeros(Int64, length(guides)*bottleneck_representation, num_bottlenecks)
-    output = Array(Int64, length(initial_cells)*4);
+    cpmat = zeros(Float64, size(cellmat))
+    output_c = Array(Int64, length(initial_cells)*4);
+    output_p = Array(Float64, size(output_c));
     cells = initial_cells # 1st timepoint slice
+    picked = Array(Int64, size(cellmat, 1))
+    cell_phenotypes = initial_cell_phenotypes
 
     for k in 1:num_bottlenecks
-        num_inserted = grow!(cells, guides, output)
-        cells = sub(cellmat, :, k)
-        sample!(sub(output, 1:num_inserted), cells, replace=false)
+        num_inserted = grow!(cells, cell_phenotypes, output_c, output_p)
+        cells, cell_phenotypes = sub(cellmat, :, k), sub(cpmat, :, k)
+        sample!(collect(1:num_inserted), picked, replace=false)
+        copy!(sub(cellmat, :, k), output_c[picked])
+        copy!(sub(cpmat, :, k), output_p[picked])
     end
     if debug
         d = Dict([symbol("bin", i+1)=>cellmat[:, i] for i in 1:num_bottlenecks])
diff --git a/src/transfection.jl b/src/transfection.jl
index f051111..2dfa743 100644
--- a/src/transfection.jl
+++ b/src/transfection.jl
@@ -1,18 +1,47 @@
-function transfect(guides::Vector{Barcode}, guide_freqs_dist::Distributions.Categorical,
+function build_cells(::CRISPRi, guides::Vector{Barcode},
+                     guide_freq_dist::Categorical, n::Int64)
+    cells = rand(guide_freq_dist, n)
+    phenotypes = Array(Float64, n)
+    @inbounds @fastmath for i in 1:n
+        phenotypes[i] = guides[cells[i]].theo_phenotype
+    end
+    cells, phenotypes
+end
+
+function build_cells(behav::CRISPRKO, guides::Vector{Barcode},
+                     guide_freq_dist::Categorical, n::Int64)
+
+    phenotypes = Array(Float64, n);
+    cells = rand(guide_freq_dist, n)
+    ko_dist = behav.knockout_dist
+    dist = rand(ko_dist, n)
+    @inbounds @fastmath for i in 1:n
+        phenotypes[i] = guides[cells[i]].theo_phenotype*(dist[i] - 1)/2
+    end
+    cells, phenotypes
+end
+
+function transfect(lib::Library, guides::Vector{Barcode}, guide_freqs_dist::Categorical,
                    cell_count::Int64, moi::Float64, expand_to::Int64)
-    cells = rand(guide_freqs_dist, round(Int64, pdf(Poisson(moi), 1)*cell_count))
+
+    cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist,
+                                         round(Int64, pdf(Poisson(moi), 1)*cell_count) )
     num_cells = length(cells)
 
     multiples = 1
     if expand_to > num_cells
         multiples = ceil(Int64, expand_to/num_cells)
-        expansion = Array(Int64, num_cells*multiples)
+        expansion_c = Array(Int64, num_cells*multiples)
+        expansion_p = Array(Float64, num_cells*multiples)
         for rep in 1:multiples
-            @inbounds expansion[(rep-1)*num_cells+1:rep*num_cells] = cells
+            rng = (rep-1)*num_cells+1:rep*num_cells
+            expansion_c[rng] = cells
+            expansion_p[rng] = cell_phenotypes
         end
-        cells = expansion
+        cells, cell_phenotypes = expansion_c, expansion_p
     else
-        cells = sample(cells, expand_to)
+        picked = sample(collect(1:num_cells), expand_to, replace=false)
+        cells, cell_phenotypes = cells[picked], cell_phenotypes[picked]
     end
 
     initial_freqs = StatsBase.counts(cells, 1:length(guides)) ./ length(cells)
@@ -20,30 +49,35 @@ function transfect(guides::Vector{Barcode}, guide_freqs_dist::Distributions.Cate
     for i in 1:length(guides)
         @inbounds guides[i].initial_freq = initial_freqs[i]
     end
-    cells
+    cells, cell_phenotypes
 end
 
 function transfect(setup::GrowthScreen,
+                   lib::Library,
                    guides::Vector{Barcode},
                    guide_freqs_dist::Categorical)
 
     num_guides = length(guides)
     cell_count = num_guides * setup.representation
-    initial_cells = rand(guide_freqs_dist, round(Int64, pdf(Poisson(setup.moi), 1)*cell_count))
+    initial_cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist,
+                                         round(Int64, pdf(Poisson(setup.moi), 1)*cell_count) )
     target = num_guides * setup.bottleneck_representation
 
     if target < length(initial_cells)
-        cells = sample(initial_cells, target)
+        picked = sample(collect(1:length(initial_cells)), target, replace=false)
+        cells, cell_phenotypes = initial_cells[picked], cell_phenotypes[picked]
         num_doublings = -1
     else
-        cells = copy(initial_cells)
+        cells, cell_phenotypes = copy(initial_cells), copy(cell_phenotypes)
         num_inserted = length(cells)
         num_doublings = 0
-        output = Array(Int64, target*4);
+        output_c = Array(Int64, target*4)
+        output_p = Array(Float64, target*4)
 
         while num_inserted < target
-            num_inserted = grow!(cells, guides, output)
-            cells = copy(sub(output, 1:num_inserted))
+            num_inserted = grow!(cells, cell_phenotypes, output_c, output_p)
+            cells = copy(sub(output_c, 1:num_inserted))
+            cell_phenotypes = copy(sub(output_p, 1:num_inserted))
             num_doublings += 1
         end
     end
@@ -53,5 +87,5 @@ function transfect(setup::GrowthScreen,
     for i in 1:length(guides)
         @inbounds guides[i].initial_freq = initial_freqs[i]
     end
-    cells, num_doublings
+    cells, cell_phenotypes
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 51bdba9..75d86d7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,4 +1,5 @@
 using Base.Test
+using DataStructures
 
 include("../src/run.jl")
 
@@ -16,4 +17,48 @@ sigmoidal_response = response(Sigmoidal())
 @test sigmoidal_response(0.0, -1.0) > -0.001
 @test sigmoidal_response(1.0, -1.0) < -0.999
 
+# Test the two different CRISPR types
+lib = Library(CRISPRi())
+setup = FacsScreen()
+guides, guide_freqs_dist = setup_screen(setup, lib)
+cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist, 10^7)
+
+function convert_cells_to_pop(cells, cell_phenotypes)
+    cells_to_phenotypes = [DefaultDict(Float64, Int64, 0) for _ in 1:length(guides)]
+
+    @inbounds for i in eachindex(cells)
+        cells_to_phenotypes[cells[i]][cell_phenotypes[i]] += 1
+    end
+
+    cells_to_phenotypes
+end
+
+# In a CRISPRi screen all cells should have the same phenotype
+@test all(map(length, convert_cells_to_pop(cells, cell_phenotypes)) .== 1)
+
+lib = Library(CRISPRKO())
+guides, guide_freqs_dist = setup_screen(setup, lib)
+cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist, 10^7)
+
+arr = convert_cells_to_pop(cells, cell_phenotypes);
+nonzeros = arr[find(x->length(x) != 1, arr)]
+results = zeros(CRISPRKO().knockout_dist.K, length(nonzeros))
+for (idx, guide) in enumerate(nonzeros)
+    if -0.0 in keys(guide)
+        @assert guide[0.0] == 0
+        delete!(guide, 0.0)
+        ks = sort(collect(keys(guide)))
+        tot = sum(values(guide))
+        results[:, idx] = [guide[key]/tot for key in ks]
+    else
+        @assert guide[0.0] != 0
+        ks = sort(collect(keys(guide)), rev=true)
+        tot = sum(values(guide))
+        results[:, idx] = [guide[key]/tot for key in ks]
+    end
+end
+
+@test abs(mean(results[1, :] ./ results[2, :]) - 1) < 0.025
+@test abs(mean(results[1, :] ./ results[3, :])/4 - 1) < 0.025
+
 println("All tests pass")

From 05c0bfe9eb40991d8b0fc0e78399ad1e735b35e8 Mon Sep 17 00:00:00 2001
From: Tamas Nagy <tamas@tamasnagy.com>
Date: Sat, 21 May 2016 18:06:53 -0700
Subject: [PATCH 2/2] completely generalize run_exp() finally closing #10

---
 src/designs.jl      | 53 ++++++++++-----------------------------------
 src/library.jl      |  6 +++--
 src/selection.jl    | 26 ++++++++++++++--------
 src/transfection.jl | 16 ++++++++++----
 4 files changed, 44 insertions(+), 57 deletions(-)

diff --git a/src/designs.jl b/src/designs.jl
index d005057..a2c9114 100644
--- a/src/designs.jl
+++ b/src/designs.jl
@@ -1,51 +1,20 @@
-function setup_screen(setup::ScreenSetup, lib::Library)
-    guides, guide_freqs = construct_library(lib, setup.num_genes, setup.coverage)
-    guide_freqs_dist = Categorical(guide_freqs)
-    guides, guide_freqs_dist
-end
-
-function teardown_screen(setup::ScreenSetup,
-                         guides::Vector{Barcode},
-                         bin_cells::Dict{Symbol,Vector{Int64}},
-                         processing_func::Function)
-
-    freqs = counts_to_freqs(bin_cells, length(guides))
-    # uniform for now for all bins
-    seq_depths = Dict{Symbol, Int64}([binname=>setup.seq_depth for binname in keys(bin_cells)])
-    raw_data = sequencing(seq_depths, guides, freqs)
-    genes = differences_between_bins(raw_data)
-    processing_func(genes)
-end
-
-"""
-Runs a FACS screen given the parameters specified in `setup` using the
-library `lib` and applies the `processing_func` function to the result.
-"""
-function run_exp(setup::FacsScreen, lib::Library, processing_func::Function; run_idx=-1)
-
-    guides, guide_freqs_dist = setup_screen(setup, lib)
-
-    min_perc = minimum([range[2] - range[1] for (binname, range) in setup.bin_info])
-    expand_to = round(Int64, (setup.num_cells_per_bin * length(guides))/min_perc)
-
-    cells, cell_phenotypes = transfect(lib, guides, guide_freqs_dist, length(guides)*setup.representation, setup.moi, expand_to)
-
-    bin_cells = facs_sort(cells, cell_phenotypes, guides, setup.bin_info, setup.σ)
-
-    [teardown_screen(setup, guides, bin_cells, processing_func)...; as_array(setup)...; run_idx]
-end
-
 """
-Runs a growth screen given the parameters specified in `setup` using the
+Runs a screen given the parameters specified in `setup` using the
 library `lib` and applies the `processing_func` function to the result.
 """
-function run_exp(setup::GrowthScreen, lib::Library, processing_func::Function; run_idx=-1)
+function run_exp(setup::ScreenSetup, lib::Library, processing_func::Function; run_idx=-1)
 
-    guides, guide_freqs_dist = setup_screen(setup, lib)
+    guides, guide_freqs_dist = construct_library(setup, lib)
 
     cells, cell_phenotypes = transfect(setup, lib, guides, guide_freqs_dist)
 
-    bin_cells = growth_assay(cells, cell_phenotypes, guides, setup.num_bottlenecks, setup.bottleneck_representation)
+    bin_cells = select(setup, cells, cell_phenotypes, guides)
+
+    freqs = counts_to_freqs(bin_cells, length(guides))
+    # uniform for now for all bins
+    seq_depths = Dict{Symbol, Int64}([binname=>setup.seq_depth for binname in keys(bin_cells)])
+    raw_data = sequencing(seq_depths, guides, freqs)
+    genes = differences_between_bins(raw_data)
 
-    [teardown_screen(setup, guides, bin_cells, processing_func)...; as_array(setup)...; run_idx]
+    [processing_func(genes)...; as_array(setup)...; run_idx]
 end
diff --git a/src/library.jl b/src/library.jl
index 6032eae..00ebcca 100644
--- a/src/library.jl
+++ b/src/library.jl
@@ -150,8 +150,10 @@ end
 Constructs the guide library for `N` genes with `coverage` number of guides per
 gene. Returns a tuple of guides and their relative frequencies (assigned randomly).
 """
-function construct_library(lib::Library, N::Int64, coverage::Int64)
+function construct_library(setup::ScreenSetup, lib::Library)
     barcodes = Barcode[]
+    N = setup.num_genes
+    coverage = setup.coverage
 
     for gene in 1:N
 
@@ -174,7 +176,7 @@ function construct_library(lib::Library, N::Int64, coverage::Int64)
     # a 10-fold difference between the 95th/5th percentiles
     vals = 10.^rand(Normal(0, 1/3.29), N*coverage)
     guide_freqs = vals/sum(vals)
-    barcodes, guide_freqs
+    barcodes, Categorical(guide_freqs)
 end
 
 """
diff --git a/src/selection.jl b/src/selection.jl
index f24a008..3fba08c 100644
--- a/src/selection.jl
+++ b/src/selection.jl
@@ -3,10 +3,14 @@ Given `cells`, a vector of integers, and `guides`, a vector of barcodes
 performs simulated facs sorting of the cells into `bins` with the given
 cutoffs. `σ` refers to the spread of the phenotype during the FACS screen.
 """
-function facs_sort(cells::Vector{Int64}, cell_phenotypes::Vector{Float64},
-                   guides::Vector{Barcode},
-                   bins::Dict{Symbol, Tuple{Float64, Float64}}, σ::Float64)
+function select(setup::FacsScreen,
+                cells::Vector{Int64},
+                cell_phenotypes::Vector{Float64},
+                guides::Vector{Barcode};
+                debug = false)
 
+    σ = setup.σ
+    bins = setup.bin_info
     @assert size(cells) == size(cell_phenotypes)
     n_cells = length(cells)
     observed = zeros(n_cells)
@@ -40,13 +44,17 @@ function grow!(cells::AbstractArray{Int64}, cell_phenotypes::AbstractArray{Float
     num_inserted
 end
 
-function growth_assay(initial_cells::AbstractArray{Int64},
-                      initial_cell_phenotypes::AbstractArray{Float64},
-                      guides::Vector{Barcode},
-                      num_bottlenecks::Int64,
-                      bottleneck_representation::Int64;
-                      debug=false)
+"""
+Growth Screen selection
+"""
+function select(setup::GrowthScreen,
+                initial_cells::AbstractArray{Int64},
+                initial_cell_phenotypes::AbstractArray{Float64},
+                guides::Vector{Barcode};
+                debug=false)
 
+    bottleneck_representation = setup.bottleneck_representation
+    num_bottlenecks = setup.num_bottlenecks
     # all cells at all timepoints
     cellmat = zeros(Int64, length(guides)*bottleneck_representation, num_bottlenecks)
     cpmat = zeros(Float64, size(cellmat))
diff --git a/src/transfection.jl b/src/transfection.jl
index 2dfa743..2738ad8 100644
--- a/src/transfection.jl
+++ b/src/transfection.jl
@@ -21,8 +21,16 @@ function build_cells(behav::CRISPRKO, guides::Vector{Barcode},
     cells, phenotypes
 end
 
-function transfect(lib::Library, guides::Vector{Barcode}, guide_freqs_dist::Categorical,
-                   cell_count::Int64, moi::Float64, expand_to::Int64)
+function transfect(setup::FacsScreen,
+                   lib::Library,
+                   guides::Vector{Barcode},
+                   guide_freqs_dist::Categorical)
+
+    moi = setup.moi
+    num_guides = length(guides)
+    cell_count = num_guides * setup.representation
+    min_perc = minimum([range[2] - range[1] for (binname, range) in setup.bin_info])
+    expand_to = round(Int64, (setup.num_cells_per_bin * length(guides))/min_perc)
 
     cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist,
                                          round(Int64, pdf(Poisson(moi), 1)*cell_count) )
@@ -44,9 +52,9 @@ function transfect(lib::Library, guides::Vector{Barcode}, guide_freqs_dist::Cate
         cells, cell_phenotypes = cells[picked], cell_phenotypes[picked]
     end
 
-    initial_freqs = StatsBase.counts(cells, 1:length(guides)) ./ length(cells)
+    initial_freqs = StatsBase.counts(cells, 1:num_guides) ./ length(cells)
 
-    for i in 1:length(guides)
+    for i in 1:num_guides
         @inbounds guides[i].initial_freq = initial_freqs[i]
     end
     cells, cell_phenotypes