# Sample Template

Here is some sample text, and a first block of code

In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.

In [None]:
from dataclasses import dataclass


@dataclass
class Foo:
    x: int
    y: int = 0

    def __repr__(self):
        print("repr")
        return f"Foo(x={self.x}, y={self.y})"


@dataclass(repr=False)
class Bar(Foo):
    z: int = 0

In [None]:
import pandas as pd

d = dict(a=1, b=2)
idx = pd.Index(d)

In [None]:
s = pd.Series(["a", "b", "c"])
list(s)

In [None]:
from typing import Protocol, Iterable, Any


class SupportsKwargs(Protocol[V_co]):
    """Protocol for objects that support **kwargs."""

    def keys(self) -> Iterable[str]: ...
    def __getitem__(self, key: K, /) -> V_co: ...

    def __instancecheck__(self, obj: object) -> bool:
        """Check if the instance is a mapping."""
        return isinstance(obj, SupportsKeysAndGetItem) and isinstance(
            next(iter(obj.keys())), str
        )


class Foo:
    def keys(self) -> list[str]:
        return ["some", "strings"]

    def __getitem__(self, key) -> int:
        return len(key)


class Bar:
    def keys(self) -> list[int]:
        return [1, 2]

    def __getitem__(self, key) -> int:
        return key


def unpack_kwargs(**kwargs):
    print(kwargs)


def unpack_obj(obj: SupportsKwargs) -> None:
    unpack_kwargs(**obj)


unpack_obj(Foo())  # prints {'some': 4, 'strings': 7}
unpack_obj(Bar())  # errors with "TypeError: keywords must be strings"

In [None]:
isinstane(Foo(), SupportsKwar)

In [None]:
class Foo:
    def keys(self):
        return ["a", "b", "c"]

    def __getitem__(self, key):
        return len(key)

In [None]:
def foo(**kwargs):
    print(kwargs)


foo(**Foo())

In [None]:
import re

In [None]:
def ignore_subgroups(pattern: str | re.Pattern, /) -> str:
    """Ignore all named groups in the given pattern."""
    return re.sub(r"\(\?P<[^>]+>", r"(?:", pattern)


# https://peps.python.org/pep-0440/#appendix-b-parsing-version-strings-with-regular-expressions
VERSION = r"""(?ix:                                       # case-insensitive, verbose
    v?(?:
        (?:(?P<epoch>[0-9]+)!)?                           # epoch
        (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
        (?P<pre>                                          # pre-release
            [-_.]?
            (?P<pre_l>(?:a|b|c|rc|alpha|beta|pre|preview))
            [-_.]?
            (?P<pre_n>[0-9]+)?
        )?
        (?P<post>                                         # post release
            (?:-(?P<post_n1>[0-9]+))
            |
            (?:
                [-_.]?
                (?P<post_l>post|rev|r)
                [-_.]?
                (?P<post_n2>[0-9]+)?
            )
        )?
        (?P<dev>                                          # dev release
            [-_.]?
            (?P<dev_l>dev)
            [-_.]?
            (?P<dev_n>[0-9]+)?
        )?
    )
    (?:\+(?P<local>[a-z0-9]+(?:[-_.][a-z0-9]+)*))?        # local version
)"""
r"^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$"
VERSION_GROUP = rf"""(?P<version>{ignore_subgroups(VERSION)})"""
VERSION_REGEX: re.Pattern = re.compile(VERSION_GROUP)

VERSION_REGEX.groups

In [None]:
NAME = re.compile(r"""\b[a-zA-Z0-9](?:[a-zA-Z0-9._-]*[a-zA-Z0-9])?\b""")
NAME_GROUP = rf"""(?P<name>{NAME.pattern})"""
NAME_REGEX = re.compile(NAME_GROUP)
assert NAME_REGEX.groups == 1, f"NAME_REGEX has {NAME_REGEX.groups} groups."

In [None]:
import pyarrow as pa

pa_x = pa.scalar(None, type=pa.int64())
pa_y = pa.compute.greater(pa_x, None)
result = pa_y.as_py()  # None

py_x = pa_x.as_py()
py_y = (
    py_x > 0
)  # TypeError: '>' not supported between instances of 'NoneType' and 'int'

In [None]:
re.compile(r"(?ix:\w+)").groups

In [None]:
dir(VaERSION_REGEX)

In [None]:
# https://peps.python.org/pep-0508/#names
# NOTE: we modify this regex a bit to allow to match inside context
NAME = re.compile(r"""\b[a-zA-Z0-9](?:[a-zA-Z0-9._-]*[a-zA-Z0-9])?\b""")
NAME_GROUP = rf"""(?P<name>{NAME})"""
NAME_REGEX = re.compile(NAME_GROUP)
assert NAME_REGEX.groups == 1

# NOTE: to get a list of extras, match NAME_PATTERN with EXTRAS_PATTERN
EXTRAS = rf"""\[\s*(?:{NAME})(?:\s*,{NAME})*\s*\]"""
EXTRAS_GROUP = rf"""(?P<extras>{EXTRAS})"""
EXTRAS_REGEX = re.compile(EXTRAS_GROUP)
assert EXTRAS_REGEX.groups == 1

PROJECT_DEP = rf"""["']{NAME_GROUP}{EXTRAS}?\s*>=\s*{VERSION_GROUP}"""
PROJECT_DEP_GROUP = rf"""(?P<DEPENDENCY>{PROJECT_DEP})"""
PROJECT_DEP_REGEX = re.compile(PROJECT_DEP_GROUP)

In [None]:
NAME = re.compile(r"""\b[a-zA-Z0-9](?:[a-zA-Z0-9._-]*[a-zA-Z0-9])?\b""")
NAME_GROUP = re.compile(rf"""(?P<name>{NAME.pattern})""")
NAME_REGEX = re.compile(NAME_GROUP)

In [None]:
PROJECT_DEP = rf"""["']{NAME_GROUP}{EXTRAS}?\s*>=\s*{VERSION_GROUP}"""
PROJECT_DEP_GROUP = rf"""(?P<DEPENDENCY>{PROJECT_DEP})"""
PROJECT_DEP_REGEX = re.compile(PROJECT_DEP_GROUP)

In [None]:
import re


def is_canonical(version):
    return (
        re.match(
            r"^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$",
            version,
        )
        is not None
    )

In [None]:
VERSION_PATTERN = r"""
    v?
    (?:
        (?:(?P<epoch>[0-9]+)!)?                           # epoch
        (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
        (?P<pre>                                          # pre-release
            [-_\.]?
            (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
            [-_\.]?
            (?P<pre_n>[0-9]+)?
        )?
        (?P<post>                                         # post release
            (?:-(?P<post_n1>[0-9]+))
            |
            (?:
                [-_\.]?
                (?P<post_l>post|rev|r)
                [-_\.]?
                (?P<post_n2>[0-9]+)?
            )
        )?
        (?P<dev>                                          # dev release
            [-_\.]?
            (?P<dev_l>dev)
            [-_\.]?
            (?P<dev_n>[0-9]+)?
        )?
    )
    (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
"""
_regex = re.compile(VERSION_PATTERN, re.VERBOSE | re.IGNORECASE)


def is_canonical2(version):
    return re.match(_regex, version) is not None


is_canonical2("0.1.23")

In [None]:
VERSION = r"""(?ix:                                       # case-insensitive, verbose
    v?(?:
        (?:(?P<epoch>[0-9]+)!)?                           # epoch
        (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
        (?P<pre>                                          # pre-release
            [-_\.]?
            (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
            [-_\.]?
            (?P<pre_n>[0-9]+)?
        )?
        (?P<post>                                         # post release
            (?:-(?P<post_n1>[0-9]+))
            |
            (?:
                [-_\.]?
                (?P<post_l>post|rev|r)
                [-_\.]?
                (?P<post_n2>[0-9]+)?
            )
        )?
        (?P<dev>                                          # dev release
            [-_\.]?
            (?P<dev_l>dev)
            [-_\.]?
            (?P<dev_n>[0-9]+)?
        )?
    )
    (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?        # local version
)"""
VERSION_GROUP = rf"""(?P<version>{VERSION})"""
VERSION_REGEX = re.compile(VERSION)


def is_canonical3(version):
    return re.match(VERSION_REGEX, version) is not None


is_canonical3("aga.aga.aga")

In [None]:
VERSION_REGEX.match("0.1.23")

In [None]:
VERSION_REGEX.match(".1.23")

In [None]:
import re

In [None]:
is_canonical2("0.1.23")

In [None]:
tests = r"""
black[ jupyterlab , d ]
pandas[numpy]
pkg[   ext1 , ext2, ext3, ext4, ext5]
a "witch" and her "broom" is one
"""

In [None]:
pattern = r'"[^\"]+"'
matches = re.findall(pattern, tests)

In [None]:
name = r"(\b[\w-]+\b)"
extras = f"\[(?:[,\s*]?{name}[,\s*]?)*\]"

pattern = r"\[\s*(?:(?:\b[\w-]+\b\s*,\s*)*(\b[\w-]+\b)(?:\s*,\s*\b[\w-]+\b)*)+\s*\]"
regex = re.compile(pattern, flags=re.VERBOSE)
# matches = list(re.finditer(pattern, tests))
matches = re.findall(pattern, tests)

In [None]:
?re.compile

In [None]:
# https://peps.python.org/pep-0440/#appendix-b-parsing-version-strings-with-regular-expressions
VERSION_PATTERN = """
v?
(?:
    (?:(?P<epoch>[0-9]+)!)?                           # epoch
    (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
    (?P<pre>                                          # pre-release
        [-_\.]?
        (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
        [-_\.]?
        (?P<pre_n>[0-9]+)?
    )?
    (?P<post>                                         # post release
        (?:-(?P<post_n1>[0-9]+))
        |
        (?:
            [-_\.]?
            (?P<post_l>post|rev|r)
            [-_\.]?
            (?P<post_n2>[0-9]+)?
        )
    )?
    (?P<dev>                                          # dev release
        [-_\.]?
        (?P<dev_l>dev)
        [-_\.]?
        (?P<dev_n>[0-9]+)?
    )?
)
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
"""

VERSION_REGEX: re.Pattern = re.compile(
    f"""(?P<version>{VERSION_PATTERN})""",
    re.VERBOSE | re.IGNORECASE,
)

# https://peps.python.org/pep-0508/#names
NAME_PATTERN = r"[A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9]"
NAME_REGEX = re.compile(
    f"(?P<name>{NAME_PATTERN})",
    re.IGNORECASE,
)

EXTRAS_PATTERN = r"\n\[\s*(?:{NAME_PATTERN}\s*,\s*)*{NAME_PATTERN}\s*\]"
EXTRAS_REGEX = re.compile(
    f"(?P<extras>{EXTRAS_PATTERN})",
    re.IGNORECASE,
)

In [None]:
EXTRAS_PATTERN.format(NAME_PATTERN=NAME_PATTERN)

In [None]:
EXTRAS_PATTERN = f"\n\[\s*(?:{NAME_PATTERN}\s*,\s*)*{NAME_PATTERN}\s*\]"

In [None]:
rf" \{{"

In [None]:
r" \{"

In [None]:
EXTRAS_PATTERN

In [None]:
\[\s*  (  (\b[\w-]+\b)  (\s*,\s*)?  )*  \s*\]