Skip to content

Commit

Permalink
Add positions argument to alphabet extractors
Browse files Browse the repository at this point in the history
  • Loading branch information
soldatmat committed Mar 13, 2024
1 parent 0154290 commit 93d8c30
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 18 deletions.
14 changes: 14 additions & 0 deletions src/alphabet_extractor/abstract_alphabet_extractor.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Extracts an alphabet for each position of sequences with same length.
Structures derived from this type have to implement the following method:
`(::CustomAlphabetExtractor)(sequences::AbstractVector{Vector{Char}}, positions::AbstractVector{Int})`
This method should return an alphabet from `sequences` for each position in `positions` as a subtype of `AbstractVector{Set{Char}}`.
This method can assume that `sequences` have the same length.
To extract alphabets at all positions, the following call can be used:
`(::CustomAlphabetExtractor)(sequences::AbstractVector{Vector{Char}})`
"""
abstract type AbstractAlphabetExtractor end

(ae::AbstractAlphabetExtractor)(sequences::AbstractVector{Vector{Char}}) = ae(sequences, collect(eachindex(sequences[1])))
8 changes: 4 additions & 4 deletions src/alphabet_extractor/alphabet_extractor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ Constructs `AlphabetExtractor`.
"""
struct AlphabetExtractor <: AbstractAlphabetExtractor end

function (::AlphabetExtractor)(sequences::AbstractVector{Vector{Char}})
alphabets = Vector{Set{Char}}(undef, length(sequences[1]))
for position in 1:length(sequences[1])
function (::AlphabetExtractor)(sequences::AbstractVector{Vector{Char}}, positions::AbstractVector{Int})
alphabets = Vector{Set{Char}}(undef, length(positions))
for (pos, position) in enumerate(positions)
symbols = Vector{Char}(undef, length(sequences))
for (p, parent) in enumerate(sequences)
symbols[p] = parent[position]
end
alphabets[position] = Set(symbols)
alphabets[pos] = Set(symbols)
end
return alphabets
end
1 change: 1 addition & 0 deletions src/alphabet_extractor/include.jl
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
include("abstract_alphabet_extractor.jl")
include("alphabet_extractor.jl")
5 changes: 2 additions & 3 deletions src/mutagenesis/recombination.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,13 @@ Recombination(; mutation_positions=nothing, n=nothing) = Recombination(AlphabetE
function (m::Recombination)(parents::AbstractVector{Vector{Char}})
@assert DESilico.same_length_sequences(parents)
length(parents) == 0 && return Vector{Vector{Char}}([])
parent_residues = isnothing(m.mutation_positions) ? parents : map(p -> p[m.mutation_positions], parents)
alphabets = m.alphabet_extractor(parent_residues)
alphabets = isnothing(m.mutation_positions) ? m.alphabet_extractor(parents) : m.alphabet_extractor(parents, m.mutation_positions)
mutants = _recombine_symbols(alphabets)
if !isnothing(m.n)
mutants = sample(mutants, m.n, replace=false)
end
if !isnothing(m.mutation_positions)
mutants = _build_mutants(parents[1], m.mutation_positions, mutants)
mutants = _build_mutants(parents[1], m.mutation_positions, mutants)
end
return mutants
end
Expand Down
10 changes: 0 additions & 10 deletions src/types/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,3 @@ Structures derived from this type have to implement the following method:
This method should return the sequences' fitness values as a subtype of `AbstarctVector{Float64}`.
"""
abstract type Screening end

"""
Extracts an alphabet for each position of sequences with same length.
Structures derived from this type have to implement the following method:
`(::CustomAlphabetExtractor)(sequences::AbstractVector{Vector{Char}})`
This method should return an alphabet for each position in `sequences` as a subtype of `AbstractVector{Set{Char}}`.
This method can assume that `sequences` have the same length.
"""
abstract type AbstractAlphabetExtractor end
7 changes: 6 additions & 1 deletion test/unit/alphabet_extractor/alphabet_extractor.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
@testset "alphabet_extractor.jl" begin
ae = DESilico.AlphabetExtractor()
@test typeof(ae) == DESilico.AlphabetExtractor

parents = [
['A', 'A', 'A'],
['A', 'B', 'C'],
]

alphabets = ae(parents)
@test length(alphabets) == 3
@test alphabets[1] == Set(['A'])
@test alphabets[2] == Set(['A', 'B'])
@test alphabets[3] == Set(['A', 'C'])

alphabets = ae(parents, [3, 1])
@test length(alphabets) == 2
@test alphabets[1] == Set(['A', 'C'])
@test alphabets[2] == Set(['A'])
end

0 comments on commit 93d8c30

Please sign in to comment.