In [None]:
import re
from dataclasses import dataclass, field
from typing import Optional, Sequence, List

In [None]:
# Define your MPEP regex pattern
#MPEP_REGEX = r'(?P<MPEP>\d+)(?:§)?\s+(?P<chapter>\d+)(?:\.\s*(?P<section>[\d.]+))?'
#MPEP_REGEX = r'(\bMPEP\s+)(?:§\s+)?(?P<chapter>\d+)(?:\.\s*(?P<section>[\d]+)(?P<subsection>(?:\([\da-zA-Z]+\)(?:\(\d+\))?)*)|\.\d+[a-zA-Z]+(?:\.\(\d+\)[a-zA-Z]+)?)*'
#MPEP_REGEX = r'(\bMPEP\s+)(?:§\s+)?(?P<chapter>\d+)(?:\.\s*(?P<section>[\d]+)(?P<subsection>(?:\([\da-zA-Z]+\)(?:\(\d+\))?|(?:\.([\da-zA-Z]+)(?:\.\([\da-zA-Z]+)?))?)?'
MPEP_REGEX = r'(\bMPEP\s+)(?:§\s+)?(?P<chapter>\d+)(?:\.\s*(?P<section>[\d]+)(?P<subsection>(?:\([\da-zA-Z]+\)(?:\(\d+\))?|(?:\.([\da-zA-Z]+)(?:\.\([\da-zA-Z]+)\)?)?)?)?)?'


# Example MPEP citation: MPEP 2145 or MPEP § 701.32(a)(1)
mpep_pattern = re.compile(MPEP_REGEX)

In [None]:
text = "The relevant MPEP citations are MPEP 2145 and MPEP § 701.32(a)(1)."
matches = mpep_pattern.finditer(text)
#print(matches)
for match in matches:
    print("Full Match:", match.group())
    print(match.groupdict())


Full Match: MPEP 2145
{'chapter': '2145', 'section': None, 'subsection': None}
Full Match: MPEP § 701.32(a)(1)
{'chapter': '701', 'section': '32', 'subsection': '(a)(1)'}


In [None]:
text = "The relevant MPEP citation is MPEP § 701.32(a)(1) or MPEP § 701.32.a.1."

matches = re.finditer(MPEP_REGEX, text)
for match in matches:
    mpep_citation = match.group(0)
    print(mpep_citation)
    chapter = match.group('chapter')
    section = match.group('section')
    subsection = match.group('subsection')
    print(f"Chapter: {chapter}, Section: {section}, Subsection: {subsection}")

MPEP § 701.32(a)(1)
Chapter: 701, Section: 32, Subsection: (a)(1)
MPEP § 701.32
Chapter: 701, Section: 32, Subsection: 


In [None]:
text = "The relevant MPEP citation is MPEP 2145."
matches = mpep_pattern.finditer(text)

for match in matches:
    print("Full Match:", match.group())
    print("Named Groups:", match.groupdict())

Full Match: MPEP 2145
Named Groups: {'chapter': '2145', 'section': None, 'subsection': None}


In [None]:
@dataclass(eq=True, unsafe_hash=True)
class MPEPCitation:
    """Class representing an MPEP citation."""
    metadata: Optional[dict] = field(default=None)

    def formatted(self):
        """Return a formatted version of the extracted MPEP citation."""
        m = self.metadata
        if m:
            parts = [f'MPEP {m["chapter"]}']
            if "section" in m:
                parts.append(f'§ {m["section"]}')
            return ' '.join(parts)
        return ''

def extract_mpep_citation(text):
    """Extract MPEP citations from the given text."""
    matches = mpep_pattern.finditer(text)
    print(matches)
    citations = []
    for match in matches:
        citation = MPEPCitation(metadata=match.groupdict())
        print(citation)
        citations.append(citation)
    return citations

In [None]:
# Example usage:
text = "The relevant MPEP citations are MPEP 2145 and MPEP § 701.32(a)(1)."
citations = extract_mpep_citation(text)
for citation in citations:
    print(citation.formatted())


<callable_iterator object at 0x1127cd150>
MPEPCitation(metadata={'chapter': '2145', 'section': None, 'subsection': None})
MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': '(a)(1)'})
MPEP 2145 § None
MPEP 701 § 32


In [None]:
print(citations)

[MPEPCitation(metadata={'chapter': '2145', 'section': None, 'subsection': None}), MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': '(a)(1)'})]


In [None]:
from find_mpep import extract_mpep_citation

text = "The relevant MPEP citation are MPEP 2145 and MPEP § 701.32(a)(1) or MPEP § 701.32.a.1."
result = extract_mpep_citation(text)

for citation in result:
    print(citation)

<callable_iterator object at 0x111f2da80>
MPEPCitation(metadata={'chapter': '2145', 'section': None, 'subsection': None})
MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': '(a)(1)'})
MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': ''})
MPEPCitation(metadata={'chapter': '2145', 'section': None, 'subsection': None})
MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': '(a)(1)'})
MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': ''})


In [None]:
from find_mpep import extract_mpep_citation

text = "The relevant MPEP citation are MPEP 2145 and MPEP § 701.32(a)(1) or MPEP § 701.32.a.1."
result = extract_mpep_citation(text)
print(result)

#for citation in result:
 #   print(citation)

<callable_iterator object at 0x111f2db10>
MPEPCitation(metadata={'chapter': '2145', 'section': None, 'subsection': None})
MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': '(a)(1)'})
MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': ''})
[MPEPCitation(metadata={'chapter': '2145', 'section': None, 'subsection': None}), MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': '(a)(1)'}), MPEPCitation(metadata={'chapter': '701', 'section': '32', 'subsection': ''})]
