In [1]:
#############  Amino Acid Sequences
############## The AASequence class handles amino acid sequences in OpenMS



from pyopenms import *


seq = AASequence.fromString("DFPIANGER")     # create AASequence object from string representation


prefix = seq.getPrefix(4)                    # extract prefix of length 4


suffix = seq.getSuffix(5)                    # extract suffix of length 5


concat = seq + seq                          # concatenate two sequences


print("Sequence:", seq)
print("Prefix:", prefix)
print("Suffix:", suffix)
print("Concatenated:", concat)

ModuleNotFoundError: No module named 'pyopenms'

In [2]:
!pip install pyopenms



In [7]:
from pyopenms import *

seq = AASequence.fromString("SOHAILA")  

prefix = seq.getPrefix(3)                   

print("Sequence:", seq)
print("Prefix:", prefix)

Sequence: SOHAILA
Prefix: SOH


In [9]:
suffix = seq.getSuffix(4)
print(suffix)

AILA


In [15]:
concat = seq + seq
print(concat)

SOHAILASOHAILA


In [19]:
mfull = seq.getMonoWeight()
print("Monoisotopic mass of peptide [M] is", mfull)

Monoisotopic mass of peptide [M] is 847.4915910735


In [20]:
seq = AASequence.fromString("SOHAILA") 
print("The peptide", str(seq), "consists of the following amino acids:")
for aa in seq:
    print(aa.getName(), ":", aa.getMonoWeight())

The peptide SOHAILA consists of the following amino acids:
Serine : 105.0425942233
Pyrrolysine : 255.15829266990002
Histidine : 155.0694772871
Alanine : 89.0476792233
Isoleucine : 131.0946294147
Leucine : 131.0946294147
Alanine : 89.0476792233


In [21]:
seq = AASequence.fromString("SOHAILA")
seq_formula = seq.getFormula()
print("Peptide", seq, "has molecular formula", seq_formula)

Peptide SOHAILA has molecular formula C39H65N11O10


In [22]:
suffix = seq.getSuffix(2)                                       


print("y3 ion sequence:", suffix)

y3_formula = suffix.getFormula(Residue.ResidueType.YIon, 2)        

suffix.getMonoWeight(Residue.ResidueType.YIon, 2) / 2.0            

suffix.getMonoWeight(Residue.ResidueType.XIon, 2) / 2.0            



print("y3 mz:", suffix.getMonoWeight(Residue.ResidueType.YIon, 2) / 2.0 )

print("y3 molecular formula:", y3_formula)

y3 ion sequence: LA
y3 mz: 102.07314825387101
y3 molecular formula: C9H18N2O3


In [23]:
seq = AASequence.fromString("ESSAM(Oxidation)LA")
print(seq.toUnmodifiedString())
print(seq.toString())
print(seq.toUniModString())
print(seq.toBracketString())
print(seq.toBracketString(False))

ESSAMLA
ESSAM(Oxidation)LA
ESSAM(UniMod:35)LA
ESSAM[147]LA
ESSAM[147.03540001709996]LA


In [25]:
print(AASequence.fromString("ESSAM(UniMod:35)LA"))

print(AASequence.fromString("ESSAM[+16]LA"))

print(AASequence.fromString("ESSAM[+15.99]LA"))

print(AASequence.fromString("ESSAM[147]LA"))

print(AASequence.fromString("ESSAM[147.035405]GER"))

ESSAM(Oxidation)LA
ESSAM(Oxidation)LA
ESSAM(Oxidation)LA
ESSAM(Oxidation)LA
ESSAM(Oxidation)GER


In [26]:
bsa = FASTAEntry() 

bsa.sequence = "BBBBBBBBBBBBBGGGGGGGGGGGSSSSSSS"

bsa.description = "BSA (partial sequence)"

bsa.identifier = "BSA"

alb = FASTAEntry()

alb.sequence = "NNNNNNNNNNNNNNNNNNNNNNNNNNNN"

alb.description = "ALB (partial sequence)"

alb.identifier = "ALB"


entries = [bsa, alb]

f = FASTAFile()

f.store("example.fasta", entries)

In [27]:
entries = []

f = FASTAFile()

f.load("example.fasta", entries)

print( len(entries) )

for e in entries:
    print (e.identifier, e.sequence)

2
BSA BBBBBBBBBBBBBGGGGGGGGGGGSSSSSSS
ALB NNNNNNNNNNNNNNNNNNNNNNNNNNNN
