pymzml · MKoesters · Jul 1, 2022 · Jun 27, 2022
diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py
@@ -203,7 +203,9 @@ def _binary_search(self, target_index):
                     matches = re.finditer(regex_patterns.SPECTRUM_OPEN_PATTERN, chunk)
                     for _match_number, match in enumerate(matches):
                         if match is not None:
-                            scan = int(re.search(b"[0-9]*$", match.group("id")).group())
+                            spec_info = match.groups()
+                            spec_info = dict(zip(spec_info[0::2], spec_info[1::2]))
+                            scan = int(re.search(b"[0-9]*$", spec_info[b"id"]).group())
                             # print(">>", _match_number, scan)
                             if jump_direction == 'forwards':
                                 if scan > target_index:
@@ -493,8 +495,10 @@ def _interpol_search(self, target_index, chunk_size=8, fallback_cutoff=100):
             if spec_start is not None:
                 spec_start_offset = file_pointer + spec_start.start()
                 seeker.seek(spec_start_offset)
+                spec_info = self.spec_open.search(data).groups()
+                spec_info = dict(zip(spec_info[0::2], spec_info[1::2]))
                 current_index = int(
-                    re.search(b"[0-9]*$", spec_start.group("id")).group()
+                    re.search(b"[0-9]*$", spec_info[b"id"]).group()
                 )
 
                 self.offset_dict[current_index] = (spec_start_offset,)
@@ -519,9 +523,10 @@ def _interpol_search(self, target_index, chunk_size=8, fallback_cutoff=100):
                         current_position = seeker.tell()
                         data = seeker.read(chunk_size)
                         if self.spec_open.search(data):
-                            spec_start = self.spec_open.search(data)
+                            spec_info = self.spec_open.search(data).groups()
+                            spec_info = dict(zip(spec_info[0::2], spec_info[1::2]))
                             current_index = int(
-                                re.search(b"[0-9]*$", spec_start.group("id")).group()
+                                re.search(b"[0-9]*$", spec_info[b"id"]).group()
                             )
                     seeker.seek(current_position)
                     spectrum = self._search_linear(seeker, target_index)
@@ -685,8 +690,10 @@ def _search_linear(self, seeker, index, chunk_size=8):
             if spec_start:
                 spec_start_offset = file_pointer + spec_start.start()
                 seeker.seek(spec_start_offset)
+                spec_info = spec_start.groups()
+                spec_info = dict(zip(spec_info[0::2], spec_info[1::2]))
                 current_index = int(
-                    re.search(b"[0-9]*$", spec_start.group("id")).group()
+                    re.search(b"[0-9]*$", spec_info[b"id"]).group()
                 )
                 # print(current_index)
                 spec_end = self.spec_close.search(data[spec_start.start() :])

diff --git a/pymzml/regex_patterns.py b/pymzml/regex_patterns.py
@@ -38,10 +38,7 @@
 """
 Regex to catch moby dick chapter number used in the index gezip writer example.
 """
-
-SPECTRUM_OPEN_PATTERN = re.compile(
-    b'<*spectrum[^>]*index="(?P<index>[0-9]+)" id="(?P<id>[^"]+)" defaultArrayLength="[0-9]+">'
-)
+SPECTRUM_OPEN_PATTERN = re.compile(b'<*spectrum[^>]*(index|id)="(.*?)".*(index|id)="(.*?)"')
 """
 Regex to catch specturm open xml tag with encoded array length
 """

diff --git a/tests/regex_test.py b/tests/regex_test.py
@@ -9,6 +9,7 @@
 import pymzml.regex_patterns as rp
 import unittest
 from collections import OrderedDict as odict
+import re
 
 
 class RegexTest(unittest.TestCase):
@@ -62,6 +63,15 @@ def test_spectrum_tag_patter(self):
         for tag in self.spec_tags.values():
             self.assertRegex(tag.decode("utf-8"), rp.SPECTRUM_TAG_PATTERN)
 
+    def test_index_and_id_order_does_not_matter(self):
+        a = b'<spectrum id="controllerType=0 controllerNumber=1 scan=1" index="0" defaultArrayLength="1100">'
+        b = b'<spectrum index="0" id="controllerType=0 controllerNumber=1 scan=1" defaultArrayLength="917">'
+        a_match = re.search(rp.SPECTRUM_OPEN_PATTERN, a).groups()
+        b_match = re.search(rp.SPECTRUM_OPEN_PATTERN, b).groups()
+        a_dict = dict(zip(a_match[0::2], a_match[1::2]))
+        b_dict = dict(zip(b_match[0::2], b_match[1::2]))
+        assert a_dict == b_dict
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=3)