# Sample Template

Here is some sample text, and a first block of code

In [None]:
import argparse
import ast
from collections.abc import Iterator
from pathlib import Path

In [None]:
with open("../src/tsdm/backend/pyarrow.py", "r") as file:
    tree = ast.parse(file.read())

In [None]:
def get_attributes(tree: ast.AST) -> Iterator[ast.Attribute]:
    """Get all attribute nodes."""
    for node in ast.walk(tree):
        if isinstance(node, ast.Attribute):
            yield node


def get_type_hints(tree: ast.AST) -> Iterator[ast.AST]:
    """Get all nodes that are type hints."""
    for node in ast.walk(tree):
        ann = getattr(node, "annotation", None)
        if ann is not None:
            yield ann


def get_imported_symbols(tree: ast.AST) -> dict[str, str]:
    """Get all imported symbols."""
    imported_symbols = {}

    for node in ast.walk(tree):
        if isinstance(node, ast.Import):
            for alias in node.names:
                imported_symbols[alias.asname or alias.name] = alias.name
        elif isinstance(node, ast.ImportFrom):
            module_name = node.module
            if module_name is not None:
                for alias in node.names:
                    full_name = f"{module_name}.{alias.name}"
                    imported_symbols[alias.asname or alias.name] = full_name

    return imported_symbols


def get_attrs_shadow_imported(tree: ast.AST) -> Iterator[ast.Attribute]:
    """Get attribute nodes that shadow directly imported symbols."""
    imported_symbols = get_imported_symbols(tree)

    for node in get_type_hints(tree):
        for attr in get_attributes(node):
            if attr.attr in imported_symbols:
                yield attr

In [None]:
nodes = list(get_attrs_shadow_imported(tree))

In [None]:
vars(nodes[0].value.value.value.ctx)

In [None]:
vars(nodes[0].value.value.attr)

In [None]:
for node in ast.walk(tree):
    if isinstance(node, ast.ImportFrom):
        break

In [None]:
for node in ast.walk(tree):
    if isinstance(node, ast.AnnAssign) and isinstance(node.annotation, ast.Name):

In [None]:
def get_type_hints

In [None]:
def get_type_hints(node: ast.FunctionDef):

In [None]:
def extract_type_hints(tree):
    for node in ast.walk(tree):
        if isinstance(node, ast.AnnAssign):
            return node
            for arg in node.args.args:
                if arg.annotation:
                    lineno, col_offset = (
                        arg.annotation.lineno,
                        arg.annotation.col_offset,
                    )
                    type_hint = ast.dump(arg.annotation)
                    print(
                        f"Function: {node.name}, Argument: {arg.arg}, Type Hint: {type_hint}, Line: {lineno}, Column: {col_offset}"
                    )


node = extract_type_hints(tree)
ARG = node.args.posonlyargs[0]
ANN = ARG.annotation
display(type(ANN), ANN)
display(vars(ANN))
display(vars(ANN.value))
display(ANN.attr)

In [None]:
def extract_type_hints(tree):
    for node in ast.walk(tree):
        ann = getattr(node, "annotation", None)
        if ann is not None:
            yield ann

In [None]:
def get_type_hint(arg: ast.arg) -> ast.AST:
    if arg.annotation is not None:
        yield arg.annotation


def get_type_hints(args: ast.arguments) -> Iterator[ast.AST]:
    for arg in args.posonlyargs:
        yield from get_type_hint(arg)
    for arg in args.args:
        yield from get_type_hint(arg)
    if args.vararg is not None:
        yield from get_type_hint(args.vararg)
    for arg in args.kwonlyargs:
        yield from get_type_hint(arg)
    if args.kwarg is not None:
        yield from get_type_hint(arg)

In [None]:
list(get_type_hints(node.args))

In [None]:
def extract_type_hints(tree):
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef):
            return node
            for arg in node.args.args:
                if arg.annotation:
                    lineno, col_offset = (
                        arg.annotation.lineno,
                        arg.annotation.col_offset,
                    )
                    type_hint = ast.dump(arg.annotation)
                    print(
                        f"Function: {node.name}, Argument: {arg.arg}, Type Hint: {type_hint}, Line: {lineno}, Column: {col_offset}"
                    )


node = extract_type_hints(tree)
ARG = node.args.posonlyargs[0]
display(vars(ARG))
ANN = ARG.annotation
display(type(ANN), ANN)
display(vars(ANN))
display(vars(ANN.value))
display(ANN.attr)

In [None]:
ARG

In [None]:
type(node.args)

In [None]:
type(node.args.posonlyargs[0])

In [None]:
ast.dump(node.args.posonlyargs[0].annotation)

In [None]:
"posonlyargs",
"args",
"vararg"
"kwonlyargs",
"kwarg",

In [None]:
def get_type_hints(node: ast.FunctionDef):
    args = node.args
    if args.posonlyargs:
        ...
    if args.args:
        ...
    if args.vararg is not None:
        ...
    if args.kwonlyargs:
        ...
    if args.kwarg is not None:
        ...

In [None]:
vars(node.args)

In [None]:
dir(node.args)

In [None]:
ast.arg

In [None]:
vars(node)

In [None]:
from typing import Any, Protocol, runtime_checkable


@runtime_checkable
class Top(Protocol):
    """Represents the top type."""


assert issubclass(object, Top)
assert issubclass(Top, object)

In [None]:
issubclass(Any, Top)

In [None]:
from typing import _SpecialForm

In [None]:
isinstance(..., object)

In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%load_ext autoreload
%autoreload 2
%matplotlib inline

import logging

logging.basicConfig(level=logging.INFO)

In [None]:
import tsdm

In [None]:
ds = tsdm.datasets.PhysioNet2019()

In [None]:
ds.metadata

In [None]:
import pandas as pd
import pyarrow as pa

pa_dict = pa.dictionary(pa.int32(), pa.string())
pd_dict = pd.ArrowDtype(pa_dict)

# create table in pyarrow
data = {"foo": [1, 2], "bar": ["a", "b"]}
schema = {"foo": int, "bar": "category"}

df = pd.DataFrame(data).astype(schema)
df.to_parquet("foo")
df_loaded = pd.read_parquet("foo", dtype_backend="pyarrow")


assert df_loaded.bar.dtype == pd_dict

In [None]:
df_loaded.bar.dtype == df.bar.dtype

In [None]:
import pandas as pd
import pyarrow as pa

pa_dict = pa.dictionary(pa.int32(), pa.string())
pd_dict = pd.ArrowDtype(pa_dict)

# create table in pyarrow
data = {"foo": [1, 2], "bar": ["a", "b"]}
schema = pa.schema({"foo": pa.int32(), "bar": pa_dict})
table = pa.table(data, schema=schema)

# serialize in arrow and loading to pandas works
pa.parquet.write_table(table, "demo.parquet")
df = pd.read_parquet("demo.parquet", dtype_backend="pyarrow")

assert df.bar.dtype == pd_dict  # ✔ the dtype is dictionary[int32,string]

# # saving and re-loading doesn't
# df.to_parquet("demo2.parquet")
# pd.read_parquet("demo2.parquet", dtype_backend="pyarrow")

In [None]:
pa.__version__

In [None]:
ds.timeseries.hist(figsize=(20, 12), density=True, log=True, bins=20);

In [None]:
import pandas as pd
import pyarrow as pa

In [None]:
ts = ds.timeseries.astype({"wd": pd.ArrowDtype(pa.dictionary(pa.int32(), pa.string()))})

In [None]:
import pandas as pd
import pyarrow as pa

pa_dict = pa.dictionary(pa.int32(), pa.string())
pd_dict = pd.ArrowDtype(pa_dict)

# create table in pyarrow
data = {"foo": [1, 2], "bar": ["a", "b"]}
schema = pa.schema({"foo": pa.int32(), "bar": pa_dict})
table = pa.table(data, schema=schema)

# serliaze in arrow and loading to pandas works
pa.parquet.write_table(table, "demo.parquet")
df = pd.read_parquet("demo.parquet", dtype_backend="pyarrow")

assert df.bar.dtype == pd_dict  # ✔ the dtype is dictionary[int32,string]

# saving and re-loading doesn't
df.to_parquet("demo2.parquet")
pd.read_parquet("demo2.parquet", dtype_backend="pyarrow")