# Prototype new concepts before introduction into test and src

In [1]:
##########
#Global Constants
##########
const H2O = Float32(18.010565)
const PROTON = Float32(1.0072764)
const NEUTRON = Float32(1.00335)
const AA_to_mass =
Dict{Char, Float32}(
        'A' => 71.03711,
        'R' => 156.10111,
        'N' => 114.04293,
        'D' => 115.02694,
        'C' => 103.00919,
        'E' => 129.04259,
        'Q' => 128.05858,
        'G' => 57.02146,
        'H' => 137.05891,
        'I' => 113.08406,
        'L' => 113.08406,
        'K' => 128.09496,
        'M' => 131.04049,
        'F' => 147.06841,
        'P' => 97.05276,
        'S' => 87.03203,
        'T' => 101.04768,
        'W' => 186.07931,
        'Y' => 163.06333,
        'V' => 99.06841,
        'U' => 150.95363,
        'O' => 237.14773
)

const default_mods = 
Dict{String, Float32}(
    "Carb" => 57.021464
)
##########
#Representation of Amino Acid
##########
struct AA 
    aa::Char
    mass::Float32
    #Constructor for amino acid. Restric inputs
    #to valid amino acid symbols and assign correct mass
    function AA(aa::Char)
        m = try
            AA_to_mass[aa]
        catch
            throw(ErrorException("The character $aa cannot be interpreted as an amino acid!"))
        end
        return new(aa, m)
    end
end

#Getter methods
getMass(aa::AA) = aa.mass
getAA(aa::AA) = aa.aa

export AA
##########
#Modification. Simple representation of mass modifiction
##########
struct Mod
    name::String
    mass::Float32
end

function Mod(mod::String, mods_dict::Dict{String, Float32})
    """
    Given a string, first parse by the regular expression  
        Example: "K[+8.014199]" or "C[Carb]"
        
    In the first case, "K[+8.014199]", "K[+8.014199]" is the modification name
        and 8.014199 is the modification mass. 

    In the second case, "C[Carb]" is the modification name
        and "Carb" is a key to the dictionary "mods_dict".
        mods_dict["Carb"] returns the modification mass. 

    If the Mod string can't be parsed, returns an error. 
    """
    m = match(r"^[A-Z]\[(.*)\]$", mod)

    try
        if m == nothing
            Mod(
                mod,
                0.0
            )
        elseif startswith(m[1], "+")
            Mod(
                mod,                    #"K[+8.014199]"
                parse(Float32, m[1][2:end]) #8.014199
                )
        else 
            Mod(
                mod,                #getAA("C[Carb]")
                mods_dict[m[1]]         #57.021464
                )
        end
    catch
        throw(ErrorException("$mod could not be parsed as given"))
    end 
end

#Optionally parse mods without a mods_dict
Mod(mod::String) = Mod(mod, Dict{String, Float32}())
Mod(name::Char, mass::Float32) = Mod(string(name), mass)
#Empty modification
Mod() = Mod("", 0.0)

#Getter Functions
getMass(mod::Mod) = mod.mass    
getName(mod::Mod) = mod.name
export Mod

##########
#Residue. Implementation of amino acid with custom mass modifications
##########
struct Residue
    aa::AA
    mod::Mod
    mass::Float32
end

#Residue(AA('A'))
function Residue(aa::AA)
    Residue(aa, Mod(), getMass(aa))
end

#Residue('A')
Residue(aa::Char) = Residue(AA(aa))

Residue(aa::AA, mod::Mod) = Residue(aa::AA, mod, getMass(mod)+getMass(aa))

function Residue(residue::String, mods_dict::Dict{String, Float32}) 
    if length(residue)>1
        Residue(AA(residue[1]), Mod(residue, mods_dict))
    else
        Residue(AA(residue[1]), Mod())
    end
end
function Residue(residue::String)
    if length(residue)>1
        Residue(AA(residue[1]), Mod(residue))
    else
        Residue(AA(residue[1]), Mod())
    end
end


Residue(residue::String, mod_mass::Float32) = Residue(AA(residue[1]), Mod(residue, mod_mass))

Residue(residue::Char, mod_mass::Float32) = Residue(AA(residue), Mod(residue, mod_mass))
#    """
#    Residue('K', )
#    
#    """
#    Residue(
#            AA(residue), 
#            Mod(join([residue,"[+", string(mod_mass),"]"]), mod_mass),
#            mod_mass
#            )
#end
#Getter methods
getMass(residue::Residue) = residue.mass
getMod(residue::Residue) = residue.mod
getAA(residue::Residue) = residue.aa

export Residue
export getMass
export getMod
export getAA
export default_mods

##########
#Frag
##########
struct Frag
    charge::Int32
    type::Char
    mz::Float32
    isotope::Int32
    function Frag(residues::Array{Residue, 1}, type::Char, charge::Int32, isotope::Int32)
        if type=='b'
            new(charge, type, (sum(residue->getMass(residue), residues) + PROTON*charge + isotope*NEUTRON)/charge, isotope)
        elseif type∈('y','p')
            new(charge, type, (sum(residue->getMass(residue), residues) + PROTON*charge + H2O + isotope*NEUTRON)/charge, isotope)
        #Could add functionality for a/x/c/z ions here
        end
    end
end

Frag(residues::Array{Residue, 1}, type::Char, charge::Int32) = Frag(residues, type, charge, Int32(0))

getCharge(frag::Frag) = frag.charge
getMZ(frag::Frag) = frag.mz
getType(frag::Frag) = frag.type
getIso(frag::Frag) = frag.isotope

export Frag
export getMZ
export getCharge
export getType
export getIso

In [None]:
test = Array{Residue, 1}([Residue('P'), Residue('E'), Residue('P')])

In [None]:
sum(v->getMass(v),test)

In [None]:
t = findall(r"[A-Z]\[(.*?)\]", "C[Carb]TIDEK[+8.014199]")

In [None]:
sequence = "C[Carb]TIDEK[+8.014199]"

In [None]:
 map(mod -> sequence[mod], findall(r"[A-Z]\[(.*?)\]", sequence))

In [None]:
"C[Carb]TIDEK[+8.014199]"[t[1]]

In [None]:
('b', 1, 4)

In [None]:
x = split("b(1-2,1-N);y(1-2,1-N);p(2-3,0-2)", ";")

In [None]:
x[1][1]

In [None]:
x[1]

In [None]:
a = match(r"([a-z])\((.*?),(.*?)\)", x[1])

In [2]:
frags = Array{NamedTuple, 1}([
 (ion_type = 'b', ind = Int32(3), charge = Int32(1)),
 (ion_type = 'b', ind = Int32(4), charge = Int32(2)),
 (ion_type = 'b', ind = Int32(5), charge = Int32(1)),
 (ion_type = 'b', ind = Int32(3), charge = Int32(2)),
 (ion_type = 'b', ind = Int32(4), charge = Int32(1)),
 (ion_type = 'b', ind = Int32(5), charge = Int32(2))])
    
    

6-element Vector{NamedTuple}:
 (ion_type = 'b', ind = 3, charge = 1)
 (ion_type = 'b', ind = 4, charge = 2)
 (ion_type = 'b', ind = 5, charge = 1)
 (ion_type = 'b', ind = 3, charge = 2)
 (ion_type = 'b', ind = 4, charge = 1)
 (ion_type = 'b', ind = 5, charge = 2)

In [3]:
mutable struct Peptide
    sequence::String
    fragments::Array{Frag, 1}
    charge::Int32
    mz::Float32
    mods::Array{String, 1}
    function Peptide(sequence::String, charge::Int32, mods_dict::Dict{String, Float32})
        new(
            sequence,
            Array{Frag, 1}(),
            charge,
            getMZ(Frag(map(mod -> Residue(sequence[mod], mods_dict), findall(r"[A-Z]\[.*?\]|[A-Z]", sequence)), 'p', charge)),
            map(mod -> sequence[mod], findall(r"[A-Z]\[.*?\]", sequence))
        )
    end

end

getSequence(peptide::Peptide) = peptide.sequence

function getResidues(peptide::Peptide, mods_dict::Dict{String, Float32})
    map(mod -> Residue(getSequence(peptide)[mod], mods_dict), findall(r"[A-Z]\[.*?\]|[A-Z]", getSequence(peptide)))
end

function frag!(peptide::Peptide, frags::Vector{NamedTuple}, mods_dict::Dict{String, Float32})
    
    function getFrag(residues::Vector{Residue}, frag::NamedTuple)
        #get combinations of b, y, and p ions and charges that don't violate the filters
        #Loop through them. 
        if frag.ion_type == 'b'
            Frag(residues[1:frag.ind], frag.ion_type, frag.charge)
        elseif frag.ion_type == 'y'
            Frag(reverse(reverse(residues)[1:frag.ind]), frag.ion_type, frag.charge)
        elseif frag.ion_type == 'p'
            Frag(residues, frag.ion_type, frag.charge, frag.isotope)
        else
            throw(ErrorException(string("Ion type ", frag.ion_type," not recognized")))
        end
    end
    peptide.fragments = map(frag -> getFrag(getResidues(peptide, mods_dict), frag), frags)
end

frag! (generic function with 1 method)

In [7]:
all([true, true])

true

In [21]:
struct A
    a::Int
    b::Int
end

struct B
    a::A
    c::Int
end

In [22]:
test_A = A(1, 1)

A(1, 1)

In [24]:
B(test_A, 2)

B(A(1, 1), 2)

In [36]:
sequence = "C[Carb]TIDEK[+8.014199]"
a = findall(r"^(\[.*?\])", sequence)

UnitRange{Int64}[]

In [44]:
map(mod -> sequence[mod], findall(r"[A-Z]\[.*?\]", sequence))

2-element Vector{String}:
 "C[Carb]"
 "K[+8.014199]"

In [42]:
replace(sequence, r"(\[.*?\])"=>"")

"CTIDEK"

In [41]:
sequence[a[1]]

LoadError: BoundsError: attempt to access 0-element Vector{UnitRange{Int64}} at index [1]

In [17]:
function test(a, b)
    println(string(a),'-',string(b))
end

test (generic function with 1 method)

In [20]:
test.([1, 2], ['b','a'])

1-b
2-a


2-element Vector{Nothing}:
 nothing
 nothing

In [14]:
Base.product(zip([1, 2], ['a','b'])...)

Base.Iterators.ProductIterator{Tuple{Tuple{Int64, Char}, Tuple{Int64, Char}}}(((1, 'a'), (2, 'b')))

In [4]:
1getResidues(Peptide("C[Carb]TIDEK[+8.014199]", Int32(2), default_mods), default_mods)

6-element Vector{Residue}:
 Residue(AA('C', 103.00919f0), Mod("C[Carb]", 57.021465f0), 160.03065f0)
 Residue(AA('T', 101.04768f0), Mod("", 0.0f0), 101.04768f0)
 Residue(AA('I', 113.08406f0), Mod("", 0.0f0), 113.08406f0)
 Residue(AA('D', 115.02694f0), Mod("", 0.0f0), 115.02694f0)
 Residue(AA('E', 129.04259f0), Mod("", 0.0f0), 129.04259f0)
 Residue(AA('K', 128.09496f0), Mod("K[+8.014199]", 8.014199f0), 136.10916f0)

In [5]:
frag!(Peptide("C[Carb]TIDEK[+8.014199]", Int32(2), default_mods), frags, default_mods)

6-element Vector{Frag}:
 Frag(1, 'b', 375.16968f0, 0)
 Frag(2, 'b', 245.60196f0, 0)
 Frag(1, 'b', 619.2392f0, 0)
 Frag(2, 'b', 188.08849f0, 0)
 Frag(1, 'b', 490.19662f0, 0)
 Frag(2, 'b', 310.12323f0, 0)

In [None]:
reverse(frags)

In [None]:
frags

In [None]:
string("Test", sequence, 10, "bob")

In [None]:
throw(ErrorException("test, %s", sequence))

In [None]:
frags[1].ion_type

In [None]:
a = match(r"([a-z])\(([0-9]|[A-Z]).([0-9]|[A-Z]),([0-9]|[A-Z]).([0-9]|[A-Z])\)", x[1])

In [None]:
a[1]

In [None]:
parse(Int16, a[2])

In [None]:
a[2]

In [None]:
for r in eachindex(sequence)
    println(sequence[1:r])
end

In [None]:
test_str = "C[Carb]TIDEK[+8.014199]"
a = findall(r"([A-Z]\[.*?\]|[A-Z])", test_str)

In [None]:
a = match(r"([A-Z]\[(.*?)\]|[A-Z])", sequence)

In [None]:
match

In [None]:
a

In [None]:
a = map(mod -> sequence[mod], findall(r"([A-Z]\[(.*?)\]|[A-Z])", sequence))

In [None]:
a[1]

In [None]:
a[2]

In [None]:
Residue("K[+8.014199]", default_mods)

In [None]:
for i in 1:10
    print(i)
end

In [None]:
'y'∈("yp")

In [None]:
(AA_to_mass['P'] + AA_to_mass['E'] + AA_to_mass['P'] +
AA_to_mass['T'] + AA_to_mass['I'] + AA_to_mass['D'] +
AA_to_mass['E'] + proton*2 + H2O)/2

In [None]:
324.155397 - (AA_to_mass['P'] + AA_to_mass['E'] + AA_to_mass['P'] + proton)

In [None]:
const H2O = Float32(18.010565)

In [None]:
477.219119 - (AA_to_mass['T'] + AA_to_mass['I'] + AA_to_mass['D'] +  AA_to_mass['E'] + H2O + proton)




In [None]:
239.113198 - (AA_to_mass['T'] + AA_to_mass['I'] + AA_to_mass['D'] +  AA_to_mass['E'] + H2O + proton + proton)/2


In [None]:
[test_str[ind] for ind in a]

In [None]:
Mod("K", Float32(100.0))

In [None]:
Residue("K", Float32(8.014199))

In [None]:
typeof(AA("K"[1]))

In [None]:
Residue(AA("K"[1]), Mod("K")) 

In [None]:
Residue('A', convert(Float32, 10.0))

In [None]:
join([1, 2])

In [None]:
Mod("K")

In [None]:
Residue('K')

In [None]:
getName(Mod("L"))

In [None]:
getMass(Mod("L"))

In [None]:
Residue(AA("L"[1]), Mod("L"))

In [None]:
AA("L"[1])

In [None]:
Residue("L")

In [None]:
Residue('K', convert(Float32, 10.0))

In [None]:
#Residue('A')
Residue(aa::Char) = Residue(AA(aa))

#Residue('C', 'Carb', 57.021464)
Residue(aa::AA, mod_string::String, mod_mass::Float32) = Residue(aa, getMass(aa)+mod_mass, mod_string)

Residue(aa::AA, mod::Mod) = Residue(aa, getName(mod), getMass(mod))

Residue(residue::String, mods_dict::Dict{String, Float32}) = Residue(AA(residue[1]), Mod(residue, mods_dict))

Residue(residue::String) = Residue(residue::String, default_mods)

function Residue(residue::String, mod_mass::Float32)
    Residue(AA(residue[1]), 
            join[residue[1],"[+", string(mod_mass),"]"],
            mod_mass)
end

In [None]:
function Residue(residue::String, mods_dict::Dict{String, Float32})
    Residue(AA(residue[1]), Mod(residue, mods_dict))
end

In [None]:
Residue("K", convert(Float32, 8.0))

In [None]:
float(8.0)

In [None]:
#export(AA)

In [None]:
const default_mods = 
Dict{String, Float32}(
    "Carb" => 57.021464
)

In [None]:
struct AA 
    aa::Char
    mass::Float32
    #Constructor for amino acid. Restric inputs
    #to valid amino acid symbols and assign correct mass
    function AA(aa::Char)
        m = try
            AA_to_mass[aa]
        catch
            throw(ErrorException("The character $aa cannot be interpreted as an amino acid!"))
        end
        return new(aa, m)
    end
end

#Getter methods
getMass(aa::AA) = aa.mass
getAA(aa::AA) = aa.aa

In [None]:
AA('Z')

In [None]:
AA('A')

In [None]:
struct Mod
    name::String
    mass::Float32

    function Mod(mod::String, mods_dict::Dict{String, Float32})
        m = match(r"^[A-Z]\[(.*)\]$", mod)
        println("Entered Mod")
        try
            if m == nothing
                new(
                    mod,
                    0.0
                )
            elseif startswith(m[1], "+")
                new(
                    mod,                    #"K[+8.014199]"
                    parse(Float32, m[1][2:end]) #8.014199
                    )
            else 
                new(
                    mod,                #getAA("C[Carb]")
                    mods_dict[m[1]]         #57.021464
                    )
            end
        catch
            throw(ErrorException("$m could not be parsed as given"))
        end 
    end
end

getMass(mod::Mod) = mod.mass
getName(mod::Mod) = mod.name
Mod(mod::String) = Mod(mod, default_mods)
Mod() = Mod("", Dict{String, Float32}())

In [None]:
Mod("K")

In [None]:
Mod()

In [None]:
join(["A","_",string(8.0)])

In [None]:
Mod("C", default_mods)

In [None]:
struct Residue
    amino_acid::AA
    mass::Float32
    mod::String
end

function Residue(aa::Char)
    AA_ = AA(aa)
    Residue(AA_, getMass(AA_), string(aa))
end

In [None]:
regext =  r"^[A-Z]\[(.*)\]$"

In [None]:
m = match(regext, "L")
mk = match(regext, "K[+8.014199]")
mc = match(regext, "C[carb]")
mk = match(regext, "K[+8.014199]")

In [None]:
m==nothing

In [None]:
mk

In [None]:
parse(Float32, mk[1][2:end])

In [None]:
startswith(mc[1], "+")

In [None]:
startswith(mk[1], "+")

In [None]:
join(["[","ten", "t"])

In [None]:
Residue('A')

In [None]:
test = AA('A')

In [None]:
getMass(test)

In [None]:
test = AA('A')

In [None]:
test.mass