# Regexps

Urls:
- [Tutorial](https://regexone.com/)
- [Debugger](https://regex101.com/)
- [Reference](https://www.regular-expressions.info/)
- [Exercises](https://regex.sketchengine.co.uk/)


In [1]:
# Basic code for working with Python regexps

import re

txt: str        = "Hola a todosssss."
reg: str        = r"s{2,}"
pat: re.Pattern = re.compile(reg)

# 1. Buscar el patrón
match_list: list[re.Match] = list(pat.finditer(txt))

assert len(match_list) == 1
match: re.Match = match_list[0]

match.start()
match.end()
match.span()    # (start, end)
match.group(0)  # El grupo 0 es toda la coincidencia. Siempre existe aunque no haya creado grupos.

# 2. Substituciones
new_txt: str = pat.sub("s", txt)
new_txt


'Hola a todos.'

In [2]:
# Example of pretty-printing multiple matches. Also multiple compilation options.

import re

txt: str        = "GATAGATAGATA"
reg: str        = r"GATA"
pat: re.Pattern = re.compile(reg)
pat: re.Pattern = re.compile(reg, re.IGNORECASE | re.MULTILINE | re.DOTALL)

# 1. Buscar patrón con grupos
match_list: list[re.Match] = list(pat.finditer(txt))
print(f"Number of matches: {len(match_list)}")

match: re.Match
for id, match in enumerate(match_list):
    print(f"Match {id}:", match.span(), match.group(0))


Number of matches: 3
Match 0: (0, 4) GATA
Match 1: (4, 8) GATA
Match 2: (8, 12) GATA


In [3]:
# Example with multiple matches, each with multiple groups

import re

txt: str        = "GATAGATCGATT"
reg: str        = r"(G.)(T.)"
pat: re.Pattern = re.compile(reg)

# 1. Buscar patrón con grupos
match_list: list[re.Match] = list(pat.finditer(txt))
print(f"Number of matches: {len(match_list)}\n")

match: re.Match
for id, match in enumerate(match_list):
    print(f"Match {id}: {match.group(0)}")
    print(f"Groups: {match.group(1)}, {match.group(2)}")   # len(match.groups())
    print()

Number of matches: 3

Match 0: GATA
Groups: GA, TA

Match 1: GATC
Groups: GA, TC

Match 2: GATT
Groups: GA, TT

