In [1]:
from urllib.request import urlretrieve
from pyopenms import *
gh = "https://raw.githubusercontent.com/OpenMS/pyopenms-extra/master"
urlretrieve (gh +"/src/data/SimpleSearchEngine_1.mzML", "searchfile.mzML")
urlretrieve (gh +"/src/data/SimpleSearchEngine_1.fasta", "search.fasta")
protein_ids = []
peptide_ids = []
SimpleSearchEngineAlgorithm().search("searchfile.mzML", "search.fasta", protein_ids, peptide_ids)

In [2]:
for peptide_id in peptide_ids:
  # Peptide identification values
  print (35*"=")
  print ("Peptide ID m/z:", peptide_id.getMZ())
  print ("Peptide ID rt:", peptide_id.getRT())
  print ("Peptide scan index:", peptide_id.getMetaValue("scan_index"))
  print ("Peptide scan name:", peptide_id.getMetaValue("scan_index"))
  print ("Peptide ID score type:", peptide_id.getScoreType())
  # PeptideHits
  for hit in peptide_id.getHits():
    print(" - Peptide hit rank:", hit.getRank())
    print(" - Peptide hit charge:", hit.getCharge())
    print(" - Peptide hit sequence:", hit.getSequence())
    mz = hit.getSequence().getMonoWeight(Residue.ResidueType.Full, hit.getCharge()) / hit.getCharge()
    print(" - Peptide hit monoisotopic m/z:", mz)
    print(" - Peptide ppm error:", abs(mz - peptide_id.getMZ())/mz *10**6 )
    print(" - Peptide hit score:", hit.getScore())

Peptide ID m/z: 520.262817382812
Peptide ID rt: 2655.095703125
Peptide scan index: 0
Peptide scan name: 0
Peptide ID score type: hyperscore
 - Peptide hit rank: 1
 - Peptide hit charge: 3
 - Peptide hit sequence: DFASSGGYVLHLHR
 - Peptide hit monoisotopic m/z: 520.2635538953376
 - Peptide ppm error: 1.4156527399679193
 - Peptide hit score: 16.84163475036621
Peptide ID m/z: 1063.20983886719
Peptide ID rt: 4587.6689453125
Peptide scan index: 1
Peptide scan name: 1
Peptide ID score type: hyperscore
 - Peptide hit rank: 1
 - Peptide hit charge: 3
 - Peptide hit sequence: IALSRPNVEVVALNDPFITNDYAAYM(Oxidation)FK
 - Peptide hit monoisotopic m/z: 1063.2092724146712
 - Peptide ppm error: 0.5327761274224441
 - Peptide hit score: 42.21834182739258
Peptide ID m/z: 775.38720703125
Peptide ID rt: 4923.77734375
Peptide scan index: 2
Peptide scan name: 2
Peptide ID score type: hyperscore
 - Peptide hit rank: 1
 - Peptide hit charge: 3
 - Peptide hit sequence: RPGADSDIGGFGGLFDLAQAGFR
 - Peptide hit mon

In [3]:
tsg = TheoreticalSpectrumGenerator()
thspec = MSSpectrum()
p = Param()
p.setValue("add_metainfo", "true")
tsg.setParameters(p)
peptide = AASequence.fromString("RPGADSDIGGFGGLFDLAQAGFR")
tsg.getSpectrum(thspec, peptide, 1, 1)
# Iterate over annotated ions and their masses
for ion, peak in zip(thspec.getStringDataArrays()[0], thspec):
    print(ion, peak.getMZ())

e = MSExperiment()
MzMLFile().load("searchfile.mzML", e)
print ("Spectrum native id", e[2].getNativeID() )
mz,i = e[2].get_peaks()
peaks = [(mz,i) for mz,i in zip(mz,i) if i > 1500 and mz > 300]
for peak in peaks:
  print (peak[0], "mz", peak[1], "int")

b'y1+' 175.118952913371
b'b2+' 254.161152072871
b'b3+' 311.18261616857103
b'y2+' 322.187367200471
b'y3+' 379.208831296171
b'b4+' 382.219730328071
b'y4+' 450.245945455671
b'b5+' 497.24667448757106
b'y5+' 578.304523710871
b'b6+' 584.2787036470711
b'y6+' 649.341637870371
b'b7+' 699.3056478065711
b'y7+' 762.425702221271
b'b8+' 812.3897121574711
b'b9+' 869.4111762531711
b'y8+' 877.452646380771
b'b10+' 926.4326403488711
b'y9+' 1024.5210606678709
b'b11+' 1073.501054635971
b'b12+' 1130.522518731671
b'y10+' 1137.6051250187709
b'b13+' 1187.5439828273709
b'y11+' 1194.6265891144708
b'y12+' 1251.6480532101707
b'b14+' 1300.628047178271
b'y13+' 1398.7164674972707
b'b15+' 1447.696461465371
b'y14+' 1455.7379315929707
b'y15+' 1512.7593956886706
b'b16+' 1562.723405624871
b'y16+' 1625.8434600395706
b'b17+' 1675.807469975771
b'y17+' 1740.8704041990707
b'b18+' 1746.8445841352711
b'y18+' 1827.9024333585708
b'b19+' 1874.9031623904712
b'y19+' 1942.9293775180709
b'b20+' 1945.9402765499713
b'b21+' 2002.961740645

In [4]:
salgo = SimpleSearchEngineAlgorithm()
p = salgo.getDefaults()
print ( p.items() )
p[b'precursor:mass_tolerance'] = 4.0
salgo.setParameters(p)

protein_ids = []
peptide_ids = []
salgo.search("searchfile.mzML", "search.fasta", protein_ids, peptide_ids)
print("Found", len(peptide_ids), "peptides")

[(b'enzyme', 'Trypsin'), (b'decoys', 'false'), (b'precursor:mass_tolerance', 10.0), (b'precursor:mass_tolerance_unit', 'ppm'), (b'precursor:min_charge', 2), (b'precursor:max_charge', 5), (b'precursor:isotopes', [0, 1]), (b'fragment:mass_tolerance', 10.0), (b'fragment:mass_tolerance_unit', 'ppm'), (b'modifications:fixed', [b'Carbamidomethyl (C)']), (b'modifications:variable', [b'Oxidation (M)']), (b'modifications:variable_max_per_peptide', 2), (b'annotate:PSM', [b'ALL']), (b'peptide:min_size', 7), (b'peptide:max_size', 40), (b'peptide:missed_cleavages', 1), (b'peptide:motif', ''), (b'report:top_hits', 1)]
Found 3 peptides
