From cfaffb8739f3d60484d88a6a8a83fa4f89eb75eb Mon Sep 17 00:00:00 2001 From: Tamas Nagy Date: Sat, 21 May 2016 16:46:17 -0700 Subject: [PATCH 1/2] initial implementation of crispr cutting code Fixes #3. This some gnarly and complex stuff, needs lots of validation and testing --- src/designs.jl | 8 +++--- src/library.jl | 62 +++++++++++++++++++++++++++++++++++++-------- src/selection.jl | 36 ++++++++++++++++---------- src/transfection.jl | 62 +++++++++++++++++++++++++++++++++++---------- test/runtests.jl | 45 ++++++++++++++++++++++++++++++++ 5 files changed, 171 insertions(+), 42 deletions(-) diff --git a/src/designs.jl b/src/designs.jl index feff841..d005057 100644 --- a/src/designs.jl +++ b/src/designs.jl @@ -28,9 +28,9 @@ function run_exp(setup::FacsScreen, lib::Library, processing_func::Function; run min_perc = minimum([range[2] - range[1] for (binname, range) in setup.bin_info]) expand_to = round(Int64, (setup.num_cells_per_bin * length(guides))/min_perc) - cells = transfect(guides, guide_freqs_dist, length(guides)*setup.representation, setup.moi, expand_to) + cells, cell_phenotypes = transfect(lib, guides, guide_freqs_dist, length(guides)*setup.representation, setup.moi, expand_to) - bin_cells = facs_sort(cells, guides, setup.bin_info, setup.σ) + bin_cells = facs_sort(cells, cell_phenotypes, guides, setup.bin_info, setup.σ) [teardown_screen(setup, guides, bin_cells, processing_func)...; as_array(setup)...; run_idx] end @@ -43,9 +43,9 @@ function run_exp(setup::GrowthScreen, lib::Library, processing_func::Function; r guides, guide_freqs_dist = setup_screen(setup, lib) - cells, num_doublings = transfect(setup, guides, guide_freqs_dist) + cells, cell_phenotypes = transfect(setup, lib, guides, guide_freqs_dist) - bin_cells = growth_assay(cells, guides, setup.num_bottlenecks, setup.bottleneck_representation) + bin_cells = growth_assay(cells, cell_phenotypes, guides, setup.num_bottlenecks, setup.bottleneck_representation) [teardown_screen(setup, guides, bin_cells, processing_func)...; as_array(setup)...; run_idx] end diff --git a/src/library.jl b/src/library.jl index c8e7d88..6032eae 100644 --- a/src/library.jl +++ b/src/library.jl @@ -36,12 +36,24 @@ function response(sig::Sigmoidal) (x, l) -> sigmoid(x, l, slope, inflection) end +abstract Cas9Behavior + +type CRISPRi <: Cas9Behavior end + +type CRISPRKO <: Cas9Behavior + knockout_dist::Categorical + + CRISPRKO(dist::Categorical) = new(dist) +end +CRISPRKO() = CRISPRKO(Categorical([1/9, 4/9, 4/9])) + """ Wrapper containing all library construction parameters """ type Library "Distribution of guide knockdown efficiencies" - knockdown_dist::Distribution + knockdown_dist::Dict{Int64, Tuple{Symbol, Sampleable}} + knockdown_probs::Categorical """ Maximum phenotype categories mapped to their probability of being @@ -57,36 +69,63 @@ type Library kd_phenotype_relationships::Dict{Int64, Tuple{Symbol, KDPhenotypeRelationship}} relationship_probs::Categorical - function Library(knockdown_dist::Distribution, + """ + Whether this library is CRISPRi or CRISPR cutting. + """ + cas9_behavior::Cas9Behavior + + function Library(knockdown_dist::Dict{Symbol, Tuple{Float64, Sampleable}}, max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}}, - kd_phenotype_relationships::Dict{Symbol, Tuple{Float64, KDPhenotypeRelationship}}) + kd_phenotype_relationships::Dict{Symbol, Tuple{Float64, KDPhenotypeRelationship}}, + cas9_behavior::Cas9Behavior) + kd = unroll(knockdown_dist) max_p = unroll(max_phenotype_dists) rela = unroll(kd_phenotype_relationships) - new(knockdown_dist, max_p[1], max_p[2], rela[1], rela[2]) + new(kd[1], kd[2], max_p[1], max_p[2], rela[1], rela[2], cas9_behavior) end end -function Library() +function Library(cas9_behavior::Cas9Behavior) max_phenotype_dists = Dict{Symbol, Tuple{Float64, Sampleable}}( :inactive => (0.75, Delta(0.0)), :negcontrol => (0.05, Delta(0.0)), :increasing => (0.1, TruncatedNormal(0.55, 0.2, 0.1, 1)), :decreasing => (0.1, TruncatedNormal(-0.55, 0.2, -1, -0.1)) ) - Library(max_phenotype_dists) + Library(max_phenotype_dists, cas9_behavior) end -function Library(max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}}) +function Library(max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}}, + cas9_behavior::CRISPRi) # Assuming a high quality library has mostly good guides with some bad ones - knockdown_dist = MixtureModel([TruncatedNormal(0.90, 0.1, 0, 1), - TruncatedNormal(0.05, 0.07, 0, 1)], [0.9, 0.1]) + knockdown_dist = Dict{Symbol, Tuple{Float64, Sampleable}}( + :high => (0.9, TruncatedNormal(0.90, 0.1, 0, 1)), + :low => (0.1, TruncatedNormal(0.05, 0.07, 0, 1)) + ) + Library(max_phenotype_dists, knockdown_dist, cas9_behavior) +end + +function Library(max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}}, + cas9_behavior::CRISPRKO) + # For CRISPR KO assume that if guide is "high quality" than it will a + # maximum knockdown of 100% + knockdown_dist = Dict{Symbol, Tuple{Float64, Sampleable}}( + :high => (0.9, Delta(1.0)), + :low => (0.1, TruncatedNormal(0.05, 0.07, 0, 1)) + ) + Library(max_phenotype_dists, knockdown_dist, cas9_behavior) +end + +function Library(max_phenotype_dists::Dict{Symbol, Tuple{Float64, Sampleable}}, + knockdown_dist::Dict{Symbol, Tuple{Float64, Sampleable}}, + cas9_behavior::Cas9Behavior) kd_phenotype_relationships = Dict{Symbol, Tuple{Float64, KDPhenotypeRelationship}}( :linear => (0.75, Linear()), :sigmoidal => (0.25, Sigmoidal()) ) - Library(knockdown_dist, max_phenotype_dists, kd_phenotype_relationships) + Library(knockdown_dist, max_phenotype_dists, kd_phenotype_relationships, cas9_behavior) end function unroll{T}(data::Dict{Symbol, Tuple{Float64, T}}) @@ -121,7 +160,8 @@ function construct_library(lib::Library, N::Int64, coverage::Int64) kd_response = response(relationship) for i in 1:coverage - barcode_knockdown = rand(lib.knockdown_dist) + barcode_quality, barcode_knockdown_dist = lib.knockdown_dist[rand(lib.knockdown_probs)] + barcode_knockdown = rand(barcode_knockdown_dist) # phenotype of barcode given its knockdown efficiency barcode_phenotype = kd_response(barcode_knockdown, max_phenotype) push!(barcodes, Barcode(gene, barcode_knockdown, barcode_phenotype, diff --git a/src/selection.jl b/src/selection.jl index a482be1..f24a008 100644 --- a/src/selection.jl +++ b/src/selection.jl @@ -3,15 +3,16 @@ Given `cells`, a vector of integers, and `guides`, a vector of barcodes performs simulated facs sorting of the cells into `bins` with the given cutoffs. `σ` refers to the spread of the phenotype during the FACS screen. """ -function facs_sort(cells::Vector{Int64}, guides::Vector{Barcode}, +function facs_sort(cells::Vector{Int64}, cell_phenotypes::Vector{Float64}, + guides::Vector{Barcode}, bins::Dict{Symbol, Tuple{Float64, Float64}}, σ::Float64) + @assert size(cells) == size(cell_phenotypes) n_cells = length(cells) observed = zeros(n_cells) - for i in 1:n_cells - cell = cells[i] - observed[i] = rand(Normal(guides[cell].theo_phenotype, σ)) - guides[cell].obs_phenotype = observed[i] + @inbounds for i in 1:n_cells + observed[i] = rand(Normal(cell_phenotypes[i], σ)) + guides[cells[i]].obs_phenotype = observed[i] end indices = sortperm(observed) cells = cells[indices] @@ -25,19 +26,22 @@ function facs_sort(cells::Vector{Int64}, guides::Vector{Barcode}, results end -function grow!(cells::AbstractArray{Int64}, guides::Vector{Barcode}, output) +function grow!(cells::AbstractArray{Int64}, cell_phenotypes::AbstractArray{Float64}, + output_c::AbstractArray{Int64}, output_p::AbstractArray{Float64}) num_inserted::Int = 0 @inbounds for i in 1:length(cells) - id::Int64 = cells[i] - ρ::Float64 = guides[id].theo_phenotype + ρ::Float64 = cell_phenotypes[i] decision = abs(ρ) < rand() ? 2 : 2^trunc(Int, 1 + sign(ρ)) - output[num_inserted+1:num_inserted+decision] = id + rng = num_inserted+1:num_inserted+decision + output_c[rng] = cells[i] + output_p[rng] = ρ num_inserted+=decision end num_inserted end function growth_assay(initial_cells::AbstractArray{Int64}, + initial_cell_phenotypes::AbstractArray{Float64}, guides::Vector{Barcode}, num_bottlenecks::Int64, bottleneck_representation::Int64; @@ -45,13 +49,19 @@ function growth_assay(initial_cells::AbstractArray{Int64}, # all cells at all timepoints cellmat = zeros(Int64, length(guides)*bottleneck_representation, num_bottlenecks) - output = Array(Int64, length(initial_cells)*4); + cpmat = zeros(Float64, size(cellmat)) + output_c = Array(Int64, length(initial_cells)*4); + output_p = Array(Float64, size(output_c)); cells = initial_cells # 1st timepoint slice + picked = Array(Int64, size(cellmat, 1)) + cell_phenotypes = initial_cell_phenotypes for k in 1:num_bottlenecks - num_inserted = grow!(cells, guides, output) - cells = sub(cellmat, :, k) - sample!(sub(output, 1:num_inserted), cells, replace=false) + num_inserted = grow!(cells, cell_phenotypes, output_c, output_p) + cells, cell_phenotypes = sub(cellmat, :, k), sub(cpmat, :, k) + sample!(collect(1:num_inserted), picked, replace=false) + copy!(sub(cellmat, :, k), output_c[picked]) + copy!(sub(cpmat, :, k), output_p[picked]) end if debug d = Dict([symbol("bin", i+1)=>cellmat[:, i] for i in 1:num_bottlenecks]) diff --git a/src/transfection.jl b/src/transfection.jl index f051111..2dfa743 100644 --- a/src/transfection.jl +++ b/src/transfection.jl @@ -1,18 +1,47 @@ -function transfect(guides::Vector{Barcode}, guide_freqs_dist::Distributions.Categorical, +function build_cells(::CRISPRi, guides::Vector{Barcode}, + guide_freq_dist::Categorical, n::Int64) + cells = rand(guide_freq_dist, n) + phenotypes = Array(Float64, n) + @inbounds @fastmath for i in 1:n + phenotypes[i] = guides[cells[i]].theo_phenotype + end + cells, phenotypes +end + +function build_cells(behav::CRISPRKO, guides::Vector{Barcode}, + guide_freq_dist::Categorical, n::Int64) + + phenotypes = Array(Float64, n); + cells = rand(guide_freq_dist, n) + ko_dist = behav.knockout_dist + dist = rand(ko_dist, n) + @inbounds @fastmath for i in 1:n + phenotypes[i] = guides[cells[i]].theo_phenotype*(dist[i] - 1)/2 + end + cells, phenotypes +end + +function transfect(lib::Library, guides::Vector{Barcode}, guide_freqs_dist::Categorical, cell_count::Int64, moi::Float64, expand_to::Int64) - cells = rand(guide_freqs_dist, round(Int64, pdf(Poisson(moi), 1)*cell_count)) + + cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist, + round(Int64, pdf(Poisson(moi), 1)*cell_count) ) num_cells = length(cells) multiples = 1 if expand_to > num_cells multiples = ceil(Int64, expand_to/num_cells) - expansion = Array(Int64, num_cells*multiples) + expansion_c = Array(Int64, num_cells*multiples) + expansion_p = Array(Float64, num_cells*multiples) for rep in 1:multiples - @inbounds expansion[(rep-1)*num_cells+1:rep*num_cells] = cells + rng = (rep-1)*num_cells+1:rep*num_cells + expansion_c[rng] = cells + expansion_p[rng] = cell_phenotypes end - cells = expansion + cells, cell_phenotypes = expansion_c, expansion_p else - cells = sample(cells, expand_to) + picked = sample(collect(1:num_cells), expand_to, replace=false) + cells, cell_phenotypes = cells[picked], cell_phenotypes[picked] end initial_freqs = StatsBase.counts(cells, 1:length(guides)) ./ length(cells) @@ -20,30 +49,35 @@ function transfect(guides::Vector{Barcode}, guide_freqs_dist::Distributions.Cate for i in 1:length(guides) @inbounds guides[i].initial_freq = initial_freqs[i] end - cells + cells, cell_phenotypes end function transfect(setup::GrowthScreen, + lib::Library, guides::Vector{Barcode}, guide_freqs_dist::Categorical) num_guides = length(guides) cell_count = num_guides * setup.representation - initial_cells = rand(guide_freqs_dist, round(Int64, pdf(Poisson(setup.moi), 1)*cell_count)) + initial_cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist, + round(Int64, pdf(Poisson(setup.moi), 1)*cell_count) ) target = num_guides * setup.bottleneck_representation if target < length(initial_cells) - cells = sample(initial_cells, target) + picked = sample(collect(1:length(initial_cells)), target, replace=false) + cells, cell_phenotypes = initial_cells[picked], cell_phenotypes[picked] num_doublings = -1 else - cells = copy(initial_cells) + cells, cell_phenotypes = copy(initial_cells), copy(cell_phenotypes) num_inserted = length(cells) num_doublings = 0 - output = Array(Int64, target*4); + output_c = Array(Int64, target*4) + output_p = Array(Float64, target*4) while num_inserted < target - num_inserted = grow!(cells, guides, output) - cells = copy(sub(output, 1:num_inserted)) + num_inserted = grow!(cells, cell_phenotypes, output_c, output_p) + cells = copy(sub(output_c, 1:num_inserted)) + cell_phenotypes = copy(sub(output_p, 1:num_inserted)) num_doublings += 1 end end @@ -53,5 +87,5 @@ function transfect(setup::GrowthScreen, for i in 1:length(guides) @inbounds guides[i].initial_freq = initial_freqs[i] end - cells, num_doublings + cells, cell_phenotypes end diff --git a/test/runtests.jl b/test/runtests.jl index 51bdba9..75d86d7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using Base.Test +using DataStructures include("../src/run.jl") @@ -16,4 +17,48 @@ sigmoidal_response = response(Sigmoidal()) @test sigmoidal_response(0.0, -1.0) > -0.001 @test sigmoidal_response(1.0, -1.0) < -0.999 +# Test the two different CRISPR types +lib = Library(CRISPRi()) +setup = FacsScreen() +guides, guide_freqs_dist = setup_screen(setup, lib) +cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist, 10^7) + +function convert_cells_to_pop(cells, cell_phenotypes) + cells_to_phenotypes = [DefaultDict(Float64, Int64, 0) for _ in 1:length(guides)] + + @inbounds for i in eachindex(cells) + cells_to_phenotypes[cells[i]][cell_phenotypes[i]] += 1 + end + + cells_to_phenotypes +end + +# In a CRISPRi screen all cells should have the same phenotype +@test all(map(length, convert_cells_to_pop(cells, cell_phenotypes)) .== 1) + +lib = Library(CRISPRKO()) +guides, guide_freqs_dist = setup_screen(setup, lib) +cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist, 10^7) + +arr = convert_cells_to_pop(cells, cell_phenotypes); +nonzeros = arr[find(x->length(x) != 1, arr)] +results = zeros(CRISPRKO().knockout_dist.K, length(nonzeros)) +for (idx, guide) in enumerate(nonzeros) + if -0.0 in keys(guide) + @assert guide[0.0] == 0 + delete!(guide, 0.0) + ks = sort(collect(keys(guide))) + tot = sum(values(guide)) + results[:, idx] = [guide[key]/tot for key in ks] + else + @assert guide[0.0] != 0 + ks = sort(collect(keys(guide)), rev=true) + tot = sum(values(guide)) + results[:, idx] = [guide[key]/tot for key in ks] + end +end + +@test abs(mean(results[1, :] ./ results[2, :]) - 1) < 0.025 +@test abs(mean(results[1, :] ./ results[3, :])/4 - 1) < 0.025 + println("All tests pass") From 05c0bfe9eb40991d8b0fc0e78399ad1e735b35e8 Mon Sep 17 00:00:00 2001 From: Tamas Nagy Date: Sat, 21 May 2016 18:06:53 -0700 Subject: [PATCH 2/2] completely generalize run_exp() finally closing #10 --- src/designs.jl | 53 ++++++++++----------------------------------- src/library.jl | 6 +++-- src/selection.jl | 26 ++++++++++++++-------- src/transfection.jl | 16 ++++++++++---- 4 files changed, 44 insertions(+), 57 deletions(-) diff --git a/src/designs.jl b/src/designs.jl index d005057..a2c9114 100644 --- a/src/designs.jl +++ b/src/designs.jl @@ -1,51 +1,20 @@ -function setup_screen(setup::ScreenSetup, lib::Library) - guides, guide_freqs = construct_library(lib, setup.num_genes, setup.coverage) - guide_freqs_dist = Categorical(guide_freqs) - guides, guide_freqs_dist -end - -function teardown_screen(setup::ScreenSetup, - guides::Vector{Barcode}, - bin_cells::Dict{Symbol,Vector{Int64}}, - processing_func::Function) - - freqs = counts_to_freqs(bin_cells, length(guides)) - # uniform for now for all bins - seq_depths = Dict{Symbol, Int64}([binname=>setup.seq_depth for binname in keys(bin_cells)]) - raw_data = sequencing(seq_depths, guides, freqs) - genes = differences_between_bins(raw_data) - processing_func(genes) -end - -""" -Runs a FACS screen given the parameters specified in `setup` using the -library `lib` and applies the `processing_func` function to the result. -""" -function run_exp(setup::FacsScreen, lib::Library, processing_func::Function; run_idx=-1) - - guides, guide_freqs_dist = setup_screen(setup, lib) - - min_perc = minimum([range[2] - range[1] for (binname, range) in setup.bin_info]) - expand_to = round(Int64, (setup.num_cells_per_bin * length(guides))/min_perc) - - cells, cell_phenotypes = transfect(lib, guides, guide_freqs_dist, length(guides)*setup.representation, setup.moi, expand_to) - - bin_cells = facs_sort(cells, cell_phenotypes, guides, setup.bin_info, setup.σ) - - [teardown_screen(setup, guides, bin_cells, processing_func)...; as_array(setup)...; run_idx] -end - """ -Runs a growth screen given the parameters specified in `setup` using the +Runs a screen given the parameters specified in `setup` using the library `lib` and applies the `processing_func` function to the result. """ -function run_exp(setup::GrowthScreen, lib::Library, processing_func::Function; run_idx=-1) +function run_exp(setup::ScreenSetup, lib::Library, processing_func::Function; run_idx=-1) - guides, guide_freqs_dist = setup_screen(setup, lib) + guides, guide_freqs_dist = construct_library(setup, lib) cells, cell_phenotypes = transfect(setup, lib, guides, guide_freqs_dist) - bin_cells = growth_assay(cells, cell_phenotypes, guides, setup.num_bottlenecks, setup.bottleneck_representation) + bin_cells = select(setup, cells, cell_phenotypes, guides) + + freqs = counts_to_freqs(bin_cells, length(guides)) + # uniform for now for all bins + seq_depths = Dict{Symbol, Int64}([binname=>setup.seq_depth for binname in keys(bin_cells)]) + raw_data = sequencing(seq_depths, guides, freqs) + genes = differences_between_bins(raw_data) - [teardown_screen(setup, guides, bin_cells, processing_func)...; as_array(setup)...; run_idx] + [processing_func(genes)...; as_array(setup)...; run_idx] end diff --git a/src/library.jl b/src/library.jl index 6032eae..00ebcca 100644 --- a/src/library.jl +++ b/src/library.jl @@ -150,8 +150,10 @@ end Constructs the guide library for `N` genes with `coverage` number of guides per gene. Returns a tuple of guides and their relative frequencies (assigned randomly). """ -function construct_library(lib::Library, N::Int64, coverage::Int64) +function construct_library(setup::ScreenSetup, lib::Library) barcodes = Barcode[] + N = setup.num_genes + coverage = setup.coverage for gene in 1:N @@ -174,7 +176,7 @@ function construct_library(lib::Library, N::Int64, coverage::Int64) # a 10-fold difference between the 95th/5th percentiles vals = 10.^rand(Normal(0, 1/3.29), N*coverage) guide_freqs = vals/sum(vals) - barcodes, guide_freqs + barcodes, Categorical(guide_freqs) end """ diff --git a/src/selection.jl b/src/selection.jl index f24a008..3fba08c 100644 --- a/src/selection.jl +++ b/src/selection.jl @@ -3,10 +3,14 @@ Given `cells`, a vector of integers, and `guides`, a vector of barcodes performs simulated facs sorting of the cells into `bins` with the given cutoffs. `σ` refers to the spread of the phenotype during the FACS screen. """ -function facs_sort(cells::Vector{Int64}, cell_phenotypes::Vector{Float64}, - guides::Vector{Barcode}, - bins::Dict{Symbol, Tuple{Float64, Float64}}, σ::Float64) +function select(setup::FacsScreen, + cells::Vector{Int64}, + cell_phenotypes::Vector{Float64}, + guides::Vector{Barcode}; + debug = false) + σ = setup.σ + bins = setup.bin_info @assert size(cells) == size(cell_phenotypes) n_cells = length(cells) observed = zeros(n_cells) @@ -40,13 +44,17 @@ function grow!(cells::AbstractArray{Int64}, cell_phenotypes::AbstractArray{Float num_inserted end -function growth_assay(initial_cells::AbstractArray{Int64}, - initial_cell_phenotypes::AbstractArray{Float64}, - guides::Vector{Barcode}, - num_bottlenecks::Int64, - bottleneck_representation::Int64; - debug=false) +""" +Growth Screen selection +""" +function select(setup::GrowthScreen, + initial_cells::AbstractArray{Int64}, + initial_cell_phenotypes::AbstractArray{Float64}, + guides::Vector{Barcode}; + debug=false) + bottleneck_representation = setup.bottleneck_representation + num_bottlenecks = setup.num_bottlenecks # all cells at all timepoints cellmat = zeros(Int64, length(guides)*bottleneck_representation, num_bottlenecks) cpmat = zeros(Float64, size(cellmat)) diff --git a/src/transfection.jl b/src/transfection.jl index 2dfa743..2738ad8 100644 --- a/src/transfection.jl +++ b/src/transfection.jl @@ -21,8 +21,16 @@ function build_cells(behav::CRISPRKO, guides::Vector{Barcode}, cells, phenotypes end -function transfect(lib::Library, guides::Vector{Barcode}, guide_freqs_dist::Categorical, - cell_count::Int64, moi::Float64, expand_to::Int64) +function transfect(setup::FacsScreen, + lib::Library, + guides::Vector{Barcode}, + guide_freqs_dist::Categorical) + + moi = setup.moi + num_guides = length(guides) + cell_count = num_guides * setup.representation + min_perc = minimum([range[2] - range[1] for (binname, range) in setup.bin_info]) + expand_to = round(Int64, (setup.num_cells_per_bin * length(guides))/min_perc) cells, cell_phenotypes = build_cells(lib.cas9_behavior, guides, guide_freqs_dist, round(Int64, pdf(Poisson(moi), 1)*cell_count) ) @@ -44,9 +52,9 @@ function transfect(lib::Library, guides::Vector{Barcode}, guide_freqs_dist::Cate cells, cell_phenotypes = cells[picked], cell_phenotypes[picked] end - initial_freqs = StatsBase.counts(cells, 1:length(guides)) ./ length(cells) + initial_freqs = StatsBase.counts(cells, 1:num_guides) ./ length(cells) - for i in 1:length(guides) + for i in 1:num_guides @inbounds guides[i].initial_freq = initial_freqs[i] end cells, cell_phenotypes