Skip to content

Commit

Permalink
Add Recombination mutation_positions
Browse files Browse the repository at this point in the history
  • Loading branch information
soldatmat committed Mar 13, 2024
1 parent fba2810 commit 0154290
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 52 deletions.
37 changes: 26 additions & 11 deletions src/mutagenesis/recombination.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,50 @@ using StatsBase
"""
Creates all recombinations of parents sequences.
Recombination{T}(alphabet_extractor::T, n::Union{Int,Nothing})
Recombination(alphabet_extractor::T, n::Union{Int,Nothing})
Recombination{T}(alphabet_extractor::T; n::Union{Int,Nothing}=nothing)
Recombination(alphabet_extractor::T; n::Union{Int,Nothing}=nothing)
Recombination{T}(alphabet_extractor::T, mutation_positions::Union{Vector{Int},Nothing}, n::Union{Int,Nothing})
Recombination(alphabet_extractor::T, mutation_positions::Union{Vector{Int},Nothing}, n::Union{Int,Nothing})
Recombination{T}(alphabet_extractor::T; mutation_positions::Union{Vector{Int},Nothing}, n::Union{Int,Nothing}=nothing)
Recombination(alphabet_extractor::T; mutation_positions::Union{Vector{Int},Nothing}, n::Union{Int,Nothing}=nothing)
Constructs `Recombination{T}`.
# Arguments
- `alphabet_extractor::T`: Structure called to obtained positional alphabets from parent sequences.
- `n::Union{Int,Nothing}`: If not `nothing`, `n` sequences will be sampled randomly from the recombined mutants.
- `mutation_positions::Union{Vector{Int},Nothing}`: If provided, `parents` will be recombined only at `mutation_positions`.
Rest of the sequences will be taken from the first parent.
- `n::Union{Int,Nothing}`: If provided, `n` sequences will be sampled randomly from the recombined mutants.
Recombination(; n=nothing)
Recombination(; mutation_positions::Union{Vector{Int},Nothing}, n=nothing)
Constructs `Recombination{AlphabetExtractor}`.
# Keywords
- `n::Union{Int,Nothing}`: If not `nothing`, `n` sequences will be sampled randomly from the recombined mutants.
- `mutation_positions::Union{Vector{Int},Nothing}`: If provided, `parents` will be recombined only at `mutation_positions`.
Rest of the sequences will be taken from the first parent.
- `n::Union{Int,Nothing}`: If provided, `n` sequences will be sampled randomly from the recombined mutants.
"""
struct Recombination{T} <: Mutagenesis where {T<:AbstractAlphabetExtractor}
alphabet_extractor::T
mutation_positions::Union{Vector{Int},Nothing}
n::Union{Int,Nothing}
end

Recombination{T}(alphabet_extractor::T; n=nothing) where {T} = Recombination(alphabet_extractor, n)
Recombination(alphabet_extractor::AbstractAlphabetExtractor; n=nothing) = Recombination(alphabet_extractor, n)
Recombination(; n=nothing) = Recombination(AlphabetExtractor(); n)
Recombination{T}(alphabet_extractor::T; mutation_positions=nothing, n=nothing) where {T} = Recombination(alphabet_extractor, mutation_positions, n)
Recombination(alphabet_extractor::AbstractAlphabetExtractor; mutation_positions=nothing, n=nothing) = Recombination(alphabet_extractor, mutation_positions, n)
Recombination(; mutation_positions=nothing, n=nothing) = Recombination(AlphabetExtractor(); mutation_positions, n)

function (m::Recombination)(parents::AbstractVector{Vector{Char}})
@assert DESilico.same_length_sequences(parents)
length(parents) == 0 && return Vector{Vector{Char}}([])
alphabets = m.alphabet_extractor(parents)
parent_residues = isnothing(m.mutation_positions) ? parents : map(p -> p[m.mutation_positions], parents)
alphabets = m.alphabet_extractor(parent_residues)
mutants = _recombine_symbols(alphabets)
if !isnothing(m.n)
mutants = sample(mutants, m.n, replace=false)
end
if !isnothing(m.mutation_positions)
mutants = _build_mutants(parents[1], m.mutation_positions, mutants)
end
return mutants
end

Expand All @@ -58,3 +67,9 @@ function _recombine_symbols(alphabets::Vector{Set{Char}})
end
return mutant_library
end

function _build_mutants(parent::AbstractVector{Char}, mutation_positions::AbstractVector{Int}, mutant_residues::AbstractVector{Vector{Char}})
part_borders = [0, (pos for pos in mutation_positions)..., length(parent) + 1]
parent_parts = map(p -> parent[part_borders[p]+1:part_borders[p+1]-1], 1:length(part_borders)-1)
map(mutant -> vcat(mapreduce(i -> vcat(parent_parts[i], mutant[i]), vcat, eachindex(mutant)), parent_parts[end]), mutant_residues)
end
164 changes: 123 additions & 41 deletions test/unit/mutagenesis/recombination.jl
Original file line number Diff line number Diff line change
@@ -1,100 +1,182 @@
@testset "recombination.jl" begin
@testset "_recombine_symbols" begin
alphabets = [
Set(['A', 'C']),
Set(['A']),
Set(['B', 'D']),
]
mutants = DESilico._recombine_symbols(alphabets)
@test length(mutants) == 4
@test Set(mutants) == Set([
['A', 'A', 'B'],
['A', 'A', 'D'],
['C', 'A', 'B'],
['C', 'A', 'D'],
])
end

@testset "Constructors" begin
m = DESilico.Recombination()
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test isnothing(m.mutation_positions)
@test isnothing(m.n)

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination(ae)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test isnothing(m.mutation_positions)
@test isnothing(m.n)

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination{DESilico.AlphabetExtractor}(ae)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test isnothing(m.mutation_positions)
@test isnothing(m.n)

m = DESilico.Recombination(; n=3)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test isnothing(m.mutation_positions)
@test m.n == 3

m = DESilico.Recombination(; mutation_positions=[1, 13])
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test m.mutation_positions == [1, 13]
@test isnothing(m.n)

m = DESilico.Recombination(; mutation_positions=[1, 13], n=3)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test m.mutation_positions == [1, 13]
@test m.n == 3

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination(ae, 3)
m = DESilico.Recombination(ae, [1, 13], 3)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test m.mutation_positions == [1, 13]
@test m.n == 3

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination(ae; n=3)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test isnothing(m.mutation_positions)
@test m.n == 3

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination(ae; mutation_positions=[1, 13])
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test m.mutation_positions == [1, 13]
@test isnothing(m.n)

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination(ae; mutation_positions=[1, 13], n=3)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test m.mutation_positions == [1, 13]
@test m.n == 3

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination{DESilico.AlphabetExtractor}(ae, 3)
m = DESilico.Recombination{DESilico.AlphabetExtractor}(ae, [1, 13], 3)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test m.mutation_positions == [1, 13]
@test m.n == 3

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination{DESilico.AlphabetExtractor}(ae; n=3)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test isnothing(m.mutation_positions)
@test m.n == 3

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination{DESilico.AlphabetExtractor}(ae; mutation_positions=[1, 13])
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test m.mutation_positions == [1, 13]
@test isnothing(m.n)

ae = DESilico.AlphabetExtractor()
m = DESilico.Recombination{DESilico.AlphabetExtractor}(ae; mutation_positions=[1, 13], n=3)
@test typeof(m) == DESilico.Recombination{DESilico.AlphabetExtractor}
@test typeof(m.alphabet_extractor) == DESilico.AlphabetExtractor
@test m.mutation_positions == [1, 13]
@test m.n == 3
end

@testset "Recombination call" begin
parents = [
['A', 'A', 'B'],
['C', 'A', 'D'],
@testset "_recombine_symbols" begin
alphabets = [
Set(['A', 'C']),
Set(['A']),
Set(['B', 'D']),
]
m = DESilico.Recombination()
mutants = m(parents)
mutants = DESilico._recombine_symbols(alphabets)
@test length(mutants) == 4
@test Set(mutants) == Set([
['A', 'A', 'B'],
['A', 'A', 'D'],
['C', 'A', 'B'],
['C', 'A', 'D'],
])
@test m(Vector{Vector{Char}}([])) == Vector{Vector{Char}}([])
@test parents == [['A', 'A', 'B'], ['C', 'A', 'D']]
end

m = DESilico.Recombination(; n=5)
@test_throws Exception m(parents)
@testset "Recombination call" begin
@testset "without mutation_positions" begin
parents = [
['A', 'A', 'B'],
['C', 'A', 'D'],
]
m = DESilico.Recombination()
mutants = m(parents)
@test length(mutants) == 4
@test Set(mutants) == Set([
['A', 'A', 'B'],
['A', 'A', 'D'],
['C', 'A', 'B'],
['C', 'A', 'D'],
])
@test m(Vector{Vector{Char}}([])) == Vector{Vector{Char}}([])
@test parents == [['A', 'A', 'B'], ['C', 'A', 'D']]

parents = [
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'],
['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B'],
]
m = DESilico.Recombination(; n=1023)
mutants = m(parents)
@test length(mutants) == 1023
@test length(Set(mutants)) == 1023
@test m(Vector{Vector{Char}}([])) == Vector{Vector{Char}}([])
@test parents == [
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'],
['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B'],
]
m = DESilico.Recombination(; n=5)
@test_throws Exception m(parents)

parents = [
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'],
['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B'],
]
m = DESilico.Recombination(; n=1023)
mutants = m(parents)
@test length(mutants) == 1023
@test length(Set(mutants)) == 1023
@test m(Vector{Vector{Char}}([])) == Vector{Vector{Char}}([])
@test parents == [
['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'],
['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B'],
]
end
@testset "with mutation_positions" begin
parents = [
['A', 'B', 'C'],
['D', 'E', 'F'],
]
m = DESilico.Recombination(; mutation_positions=[1, 3])
mutants = m(parents)
@test length(mutants) == 4
@test Set(mutants) == Set([
['A', 'B', 'C'],
['A', 'B', 'F'],
['D', 'B', 'C'],
['D', 'B', 'F'],
])
@test m(Vector{Vector{Char}}([])) == Vector{Vector{Char}}([])
@test parents == [['A', 'B', 'C'], ['D', 'E', 'F']]

parents = [
['A', 'B', 'C', 'D', 'E', 'F', 'G'],
['H', 'I', 'J', 'K', 'L', 'M', 'N'],
]
m = DESilico.Recombination(; mutation_positions=[3, 6])
mutants = m(parents)
@test length(mutants) == 4
@test Set(mutants) == Set([
['A', 'B', 'C', 'D', 'E', 'F', 'G'],
['A', 'B', 'C', 'D', 'E', 'M', 'G'],
['A', 'B', 'J', 'D', 'E', 'F', 'G'],
['A', 'B', 'J', 'D', 'E', 'M', 'G'],
])
@test parents == [['A', 'B', 'C', 'D', 'E', 'F', 'G'], ['H', 'I', 'J', 'K', 'L', 'M', 'N'],]
end
end
end

0 comments on commit 0154290

Please sign in to comment.