# Names

In [1]:
text = """
    Here are the full name of some people:
    Smith, Mr. Robert
    Johnson, Ms Mary
    Wilson, Mrs. Barbara
    Taylor, Dr Karen
    Lewis, Mr. John

"""

In [2]:
from pregex.core.classes import AnyButWhitespace
from pregex.core.quantifiers import OneOrMore, Optional
from pregex.core.operators import Either

family_name = OneOrMore(AnyButWhitespace())
title = Either("Mrs", "Mr", "Ms", "Dr")
given_name = OneOrMore(AnyButWhitespace())

pre = (
    family_name +
    ', ' +
    title +
    Optional(".") +
    ' ' +
    given_name
)

pre.get_matches(text)

['Smith, Mr. Robert',
 'Johnson, Ms Mary',
 'Wilson, Mrs. Barbara',
 'Taylor, Dr Karen',
 'Lewis, Mr. John']

In [3]:
pre.get_pattern()

'\\S+, (?:Mrs|Mr|Ms|Dr)\\.? \\S+'

# Date

In [4]:
text = """
    04-13-2021
    2021-04-13
    2021-13-04
"""

In [5]:
from pregex.core.classes import AnyDigit
from pregex.core.quantifiers import Exactly

two_digits = Exactly(AnyDigit(), 2) 
four_digits = Exactly(AnyDigit(), 4)

pre = (
    two_digits +
    "-" +
    two_digits +
    "-" +
    four_digits
)

pre.get_matches(text)

['04-13-2021']

In [6]:
pre.get_pattern()

'\\d{2}-\\d{2}-\\d{4}'

# Email

In [7]:
text = """
    example@python.com
    example@@python.com
    example@python.com.
"""

In [8]:
from pregex.core.classes import AnyButFrom
from pregex.core.quantifiers import OneOrMore, AtLeast
from pregex.core.assertions import MatchAtLineEnd

non_at_sign_space = OneOrMore(AnyButFrom("@", ' '))
non_at_sign_space_dot = OneOrMore(AnyButFrom("@", ' ', '.'))
domain = MatchAtLineEnd(AtLeast(AnyButFrom("@", ' ', '.'), 2))
pre = (
    non_at_sign_space +
    "@" +
    non_at_sign_space_dot +
    '.' +
    domain
)

pre.get_matches(text)

['example@python.com']

In [9]:
pre.get_pattern()

'[^ @]+@[^ .@]+\\.[^ .@]{2,}$'