In [2]:
import re
from collections import namedtuple, defaultdict

try:
    from enum import Enum
except ImportError:
    # Python 2 doesn't have a builtin Enum type
    Enum = object

from six import add_metaclass

from pyteomics import parser

In [40]:
import importlib
from pyteomics import proforma
importlib.reload(proforma)

tokenize_proforma = proforma.parse_proforma
format_proforma = proforma.to_proforma

In [41]:
seq, props = proforma.parse_proforma("{Glycan:Hex 1 HexNAc 2 NeuAc 1}STYGIAN")

In [43]:
p = proforma.ProForma.parse("{Glycan:Hex1HexNAc2NeuAc1#g1}S[#g1]T[#g1]YGIANS[#g1]EQ")

In [48]:
p.properties

{'n_term': None,
 'c_term': None,
 'unlocalized_modifications': [],
 'labile_modifications': [GlycanModification('Hex1HexNAc2NeuAc1', None, None)],
 'fixed_modifications': [],
 'intervals': [],
 'isotopes': [],
 'group_ids': []}

1

{'mass': 841.2962353162999,
 'composition': Composition({'C': 29, 'H': 51, 'O': 22, 'N': 3}),
 'name': 'Hex 1 HexNAc 2 NeuAc 1',
 'monosaccharides': BasicComposition({'Hex': 1, 'HexNAc': 2, 'NeuAc': 1})}

In [120]:
seq, fields = tokenize_proforma("<[Carbamidomethyl]@C><13C>[Hydroxyl]?{HexNAc}[Hex]-ST[U:Oxidation](EPP)[+18.15]ING")
seq, fields

([('S', None),
  ('T', UnimodModification('Oxidation', None, None)),
  ('E', None),
  ('P', None),
  ('P', None),
  ('I', None),
  ('N', None),
  ('G', None)],
 {'n_term': GenericModification('Hex', None, None),
  'c_term': None,
  'unlocalized_modifications': [GenericModification('Hydroxyl', None, None)],
  'labile_modifications': [GenericModification('HexNAc', None, None)],
  'fixed_modifications': [ModificationRule(GenericModification('Carbamidomethyl', None, None), ['C'])],
  'intervals': [TaggedInterval(2, 5, +18.15)],
  'isotopes': [StableIsotope(13C)],
  'group_ids': []})

In [121]:
format_proforma(seq, **fields)

'<[Carbamidomethyl]@C><13C>[Hydroxyl]?{HexNAc}[Hex]-ST[UNIMOD:Oxidation](EPP)[+18.15]ING'

In [119]:
'1010100.00001'.rstrip('0').rstrip('.')

'1010100.00001'

In [67]:
seq, fields = tokenize_proforma("S(EPP)[+18]ING")
seq, fields

([('S', None),
  ('E', None),
  ('P', None),
  ('P', None),
  ('I', None),
  ('N', None),
  ('G', None)],
 {'n_term': None,
  'c_term': None,
  'unlocalized_modifications': [],
  'labile_modifications': [],
  'fixed_modifications': [],
  'intervals': [TaggedInterval(1, 4, 18.0000)],
  'isotopes': [],
  'group_ids': []})

In [69]:
tokenize_proforma("[Phospho#s1]?EM[Oxidation]EVT[#s1(0.01)]S[#s1(0.09)]ES[#s1(0.90)]PEK")

([('E', None),
  ('M', GenericModification('Oxidation', None, None)),
  ('E', None),
  ('V', None),
  ('T', LocalizationMarker(0.01, None, '#s1')),
  ('S', LocalizationMarker(0.09, None, '#s1')),
  ('E', None),
  ('S', LocalizationMarker(0.9, None, '#s1')),
  ('P', None),
  ('E', None),
  ('K', None)],
 {'n_term': None,
  'c_term': None,
  'unlocalized_modifications': [GenericModification('Phospho', [], '#s1')],
  'labile_modifications': [],
  'fixed_modifications': [],
  'intervals': [],
  'isotopes': [],
  'group_ids': ['#s1']})

In [234]:
tokenize_proforma("EM[Oxidation]EVT[#g1(0.01)]S[#g1(0.09)]ES[Phospho#g1(0.90)]PEK")

([('E', None),
  ('M', GenericModification('Oxidation', None, None)),
  ('E', None),
  ('V', None),
  ('T', LocalizationMarker(0.01, None, '#g1')),
  ('S', LocalizationMarker(0.09, None, '#g1')),
  ('E', None),
  ('S',
   GenericModification('Phospho', [LocalizationMarker(0.9, None, '#g1')], '#g1')),
  ('P', None),
  ('E', None),
  ('K', None)],
 {'n_term': None,
  'c_term': None,
  'unlocalized_modifications': [],
  'labile_modifications': [],
  'fixed_modifications': [],
  'intervals': [],
  'isotopes': [],
  'group_ids': ['#g1']})

In [235]:
tokenize_proforma("EMEVT[#g1(0.01)]S[#g1(0.09)]ES[Glycan:HexNAc 1#g1(0.90)]PEK")

([('E', None),
  ('M', None),
  ('E', None),
  ('V', None),
  ('T', LocalizationMarker(0.01, None, '#g1')),
  ('S', LocalizationMarker(0.09, None, '#g1')),
  ('E', None),
  ('S',
   GlycanModification('HexNAc 1', [LocalizationMarker(0.9, None, '#g1')], '#g1')),
  ('P', None),
  ('E', None),
  ('K', None)],
 {'n_term': None,
  'c_term': None,
  'unlocalized_modifications': [],
  'labile_modifications': [],
  'fixed_modifications': [],
  'intervals': [],
  'isotopes': [],
  'group_ids': ['#g1']})