# Prototype new concepts before introduction into test and src

In [2]:
##########
#Global Constants
##########
const H2O = Float32(18.010565)
const PROTON = Float32(1.0072764)
const NEUTRON = Float32(1.00335)
const AA_to_mass =
Dict{Char, Float32}(
        'A' => 71.03711,
        'R' => 156.10111,
        'N' => 114.04293,
        'D' => 115.02694,
        'C' => 103.00919,
        'E' => 129.04259,
        'Q' => 128.05858,
        'G' => 57.02146,
        'H' => 137.05891,
        'I' => 113.08406,
        'L' => 113.08406,
        'K' => 128.09496,
        'M' => 131.04049,
        'F' => 147.06841,
        'P' => 97.05276,
        'S' => 87.03203,
        'T' => 101.04768,
        'W' => 186.07931,
        'Y' => 163.06333,
        'V' => 99.06841,
        'U' => 150.95363,
        'O' => 237.14773
)

const default_mods = 
Dict{String, Float32}(
    "Carb" => 57.021464
)
##########
#Representation of Amino Acid
##########
struct AA 
    aa::Char
    mass::Float32
    #Constructor for amino acid. Restric inputs
    #to valid amino acid symbols and assign correct mass
    function AA(aa::Char)
        m = try
            AA_to_mass[aa]
        catch
            throw(ErrorException("The character $aa cannot be interpreted as an amino acid!"))
        end
        return new(aa, m)
    end
end

#Getter methods
getMass(aa::AA) = aa.mass
getAA(aa::AA) = aa.aa

export AA
##########
#Modification. Simple representation of mass modifiction
##########
struct Mod
    name::String
    mass::Float32
end

function Mod(mod::String, mods_dict::Dict{String, Float32})
    """
    Given a string, first parse by the regular expression  
        Example: "K[+8.014199]" or "C[Carb]"
        
    In the first case, "K[+8.014199]", "K[+8.014199]" is the modification name
        and 8.014199 is the modification mass. 

    In the second case, "C[Carb]" is the modification name
        and "Carb" is a key to the dictionary "mods_dict".
        mods_dict["Carb"] returns the modification mass. 

    If the Mod string can't be parsed, returns an error. 
    """
    m = match(r"^[A-Z]\[(.*)\]$", mod)

    try
        if m == nothing
            Mod(
                mod,
                0.0
            )
        elseif startswith(m[1], "+")
            Mod(
                mod,                    #"K[+8.014199]"
                parse(Float32, m[1][2:end]) #8.014199
                )
        else 
            Mod(
                mod,                #getAA("C[Carb]")
                mods_dict[m[1]]         #57.021464
                )
        end
    catch
        throw(ErrorException("$mod could not be parsed as given"))
    end 
end

#Optionally parse mods without a mods_dict
Mod(mod::String) = Mod(mod, Dict{String, Float32}())
Mod(name::Char, mass::Float32) = Mod(string(name), mass)
#Empty modification
Mod() = Mod("", 0.0)

#Getter Functions
getMass(mod::Mod) = mod.mass    
getName(mod::Mod) = mod.name
export Mod

##########
#Residue. Implementation of amino acid with custom mass modifications
##########
struct Residue
    aa::AA
    mod::Mod
    mass::Float32
end

#Residue(AA('A'))
function Residue(aa::AA)
    Residue(aa, Mod(), getMass(aa))
end

#Residue('A')
Residue(aa::Char) = Residue(AA(aa))

Residue(aa::AA, mod::Mod) = Residue(aa::AA, mod, getMass(mod)+getMass(aa))

Residue(residue::String, mods_dict::Dict{String, Float32}) = Residue(AA(residue[1]), Mod(residue, mods_dict))

Residue(residue::String) = Residue(AA(residue[1]), Mod(residue))

Residue(residue::String, mod_mass::Float32) = Residue(AA(residue[1]), Mod(residue, mod_mass))

Residue(residue::Char, mod_mass::Float32) = Residue(AA(residue), Mod(residue, mod_mass))
#    """
#    Residue('K', )
#    
#    """
#    Residue(
#            AA(residue), 
#            Mod(join([residue,"[+", string(mod_mass),"]"]), mod_mass),
#            mod_mass
#            )
#end
#Getter methods
getMass(residue::Residue) = residue.mass
getMod(residue::Residue) = residue.mod
getAA(residue::Residue) = residue.aa

export Residue
export getMass
export getMod
export getAA
export default_mods

##########
#Frag
##########
struct Frag
    charge::Int32
    type::Char
    mz::Float32
    isotope::Int32
    function Frag(residues::Array{Residue, 1}, type::Char, charge::Int32, isotope::Int32)
        if type=='b'
            new(charge, type, (sum(residue->getMass(residue), residues) + PROTON*charge + isotope*NEUTRON)/charge, isotope)
        elseif type∈('y','p')
            new(charge, type, (sum(residue->getMass(residue), residues) + PROTON*charge + H2O + isotope*NEUTRON)/charge, isotope)
        #Could add functionality for a/x/c/z ions here
        end
    end
end

Frag(residues::Array{Residue, 1}, type::Char, charge::Int32) = Frag(residues, type, charge, Int32(0))

getCharge(frag::Frag) = frag.charge
getMZ(frag::Frag) = frag.mz
getType(frag::Frag) = frag.type
getIso(frag::Frag) = frag.isotope

export Frag
export getMZ
export getCharge
export getType
export getIso




In [58]:
test = Array{Residue, 1}([Residue('P'), Residue('E'), Residue('P')])

3-element Vector{Residue}:
 Residue(AA('P', 97.05276f0), Mod("", 0.0f0), 97.05276f0)
 Residue(AA('E', 129.04259f0), Mod("", 0.0f0), 129.04259f0)
 Residue(AA('P', 97.05276f0), Mod("", 0.0f0), 97.05276f0)

In [44]:
sum(v->getMass(v),test)

323.1481f0

In [None]:
sequence = "TIDEK[+8.014199]"

In [4]:
sequence = "TIDEK[+8.014199]"
Frag(map(v -> Residue(sequence[v]), findall(r"([A-Z]\[(.*)\]|[A-Z])", sequence)))

LoadError: MethodError: no method matching Frag(::Vector{Residue})
[0mClosest candidates are:
[0m  Frag(::Vector{Residue}, [91m::Char[39m, [91m::Int32[39m) at In[2]:182
[0m  Frag(::Vector{Residue}, [91m::Char[39m, [91m::Int32[39m, [91m::Int32[39m) at In[2]:172

In [23]:
test_str = "TIDEK[+8.014199]"
a = findall(r"([A-Z]\[(.*)\]|[A-Z])", test_str)

5-element Vector{UnitRange{Int64}}:
 1:1
 2:2
 3:3
 4:4
 5:16

In [60]:
for i in 1:10
    print(i)
end

12345678910

In [53]:
'y'∈("yp")

true

In [51]:
(AA_to_mass['P'] + AA_to_mass['E'] + AA_to_mass['P'] +
AA_to_mass['T'] + AA_to_mass['I'] + AA_to_mass['D'] +
AA_to_mass['E'] + proton*2 + H2O)/2

400.68723f0

In [34]:
324.155397 - (AA_to_mass['P'] + AA_to_mass['E'] + AA_to_mass['P'] + proton)

3.2009765618568053e-5

In [37]:
const H2O = Float32(18.010565)

18.010565f0

In [41]:
477.219119 - (AA_to_mass['T'] + AA_to_mass['I'] + AA_to_mass['D'] +  AA_to_mass['E'] + H2O + proton)




2.789062477859261e-6

In [46]:
239.113198 - (AA_to_mass['T'] + AA_to_mass['I'] + AA_to_mass['D'] +  AA_to_mass['E'] + H2O + proton + proton)/2


8.302734386234079e-6

In [24]:
[test_str[ind] for ind in a]

5-element Vector{String}:
 "T"
 "I"
 "D"
 "E"
 "K[+8.014199]"

In [None]:
Mod("K", Float32(100.0))

In [None]:
Residue("K", Float32(8.014199))

In [30]:
typeof(AA("K"[1]))

AA

In [31]:
Residue(AA("K"[1]), Mod("K")) 

Entered Mod


Residue(AA('K', 128.09496f0), Mod("K", 0.0f0), 128.09496f0)

In [32]:
Residue('A', convert(Float32, 10.0))

Residue(AA('A', 71.03711f0), Mod("A[+10.0]", 10.0f0), 10.0f0)

In [13]:
join([1, 2])

"12"

In [14]:
Mod("K")

Entered Mod


Mod("K", 0.0f0)

In [15]:
Residue('K')

Residue(AA('K', 128.09496f0), Mod("", 0.0f0), 128.09496f0)

In [21]:
getName(Mod("L"))

Entered Mod


"L"

In [22]:
getMass(Mod("L"))

Entered Mod


0.0f0

In [20]:
Residue(AA("L"[1]), Mod("L"))

Entered Mod


LoadError: StackOverflowError:

In [17]:
AA("L"[1])

AA('L', 113.08406f0)

In [16]:
Residue("L")

Entered Mod


LoadError: StackOverflowError:

In [3]:
Residue('K', convert(Float32, 10.0))

LoadError: StackOverflowError:

In [None]:
#Residue('A')
Residue(aa::Char) = Residue(AA(aa))

#Residue('C', 'Carb', 57.021464)
Residue(aa::AA, mod_string::String, mod_mass::Float32) = Residue(aa, getMass(aa)+mod_mass, mod_string)

Residue(aa::AA, mod::Mod) = Residue(aa, getName(mod), getMass(mod))

Residue(residue::String, mods_dict::Dict{String, Float32}) = Residue(AA(residue[1]), Mod(residue, mods_dict))

Residue(residue::String) = Residue(residue::String, default_mods)

function Residue(residue::String, mod_mass::Float32)
    Residue(AA(residue[1]), 
            join[residue[1],"[+", string(mod_mass),"]"],
            mod_mass)
end

In [None]:
function Residue(residue::String, mods_dict::Dict{String, Float32})
    Residue(AA(residue[1]), Mod(residue, mods_dict))
end

In [None]:
Residue("K", convert(Float32, 8.0))

In [None]:
float(8.0)

In [None]:
#export(AA)

In [None]:
const default_mods = 
Dict{String, Float32}(
    "Carb" => 57.021464
)

In [None]:
struct AA 
    aa::Char
    mass::Float32
    #Constructor for amino acid. Restric inputs
    #to valid amino acid symbols and assign correct mass
    function AA(aa::Char)
        m = try
            AA_to_mass[aa]
        catch
            throw(ErrorException("The character $aa cannot be interpreted as an amino acid!"))
        end
        return new(aa, m)
    end
end

#Getter methods
getMass(aa::AA) = aa.mass
getAA(aa::AA) = aa.aa

In [None]:
AA('Z')

In [None]:
AA('A')

In [None]:
struct Mod
    name::String
    mass::Float32

    function Mod(mod::String, mods_dict::Dict{String, Float32})
        m = match(r"^[A-Z]\[(.*)\]$", mod)
        println("Entered Mod")
        try
            if m == nothing
                new(
                    mod,
                    0.0
                )
            elseif startswith(m[1], "+")
                new(
                    mod,                    #"K[+8.014199]"
                    parse(Float32, m[1][2:end]) #8.014199
                    )
            else 
                new(
                    mod,                #getAA("C[Carb]")
                    mods_dict[m[1]]         #57.021464
                    )
            end
        catch
            throw(ErrorException("$m could not be parsed as given"))
        end 
    end
end

getMass(mod::Mod) = mod.mass
getName(mod::Mod) = mod.name
Mod(mod::String) = Mod(mod, default_mods)
Mod() = Mod("", Dict{String, Float32}())

In [None]:
Mod("K")

In [None]:
Mod()

In [None]:
join(["A","_",string(8.0)])

In [None]:
Mod("C", default_mods)

In [None]:
struct Residue
    amino_acid::AA
    mass::Float32
    mod::String
end

function Residue(aa::Char)
    AA_ = AA(aa)
    Residue(AA_, getMass(AA_), string(aa))
end

In [None]:
regext =  r"^[A-Z]\[(.*)\]$"

In [None]:
m = match(regext, "L")
mk = match(regext, "K[+8.014199]")
mc = match(regext, "C[carb]")
mk = match(regext, "K[+8.014199]")

In [None]:
m==nothing

In [None]:
mk

In [None]:
parse(Float32, mk[1][2:end])

In [None]:
startswith(mc[1], "+")

In [None]:
startswith(mk[1], "+")

In [None]:
join(["[","ten", "t"])

In [None]:
Residue('A')

In [None]:
test = AA('A')

In [None]:
getMass(test)

In [None]:
test = AA('A')

In [None]:
test.mass