# sktime Series Annotators Catalog


This notebook catalogs sktime series annotators (anomaly detection, change points, segmentation). It includes:
- task notation for point/interval labels,
- a small synthetic example,
- a dynamic registry query for annotators in your local sktime install.


## Annotation setup and notation
We produce labels $a_t$ or intervals for a series $y_{1:T}$:

$$
a_t = g(y_{1:T})
$$

with $a_t$ marking anomalies, regimes, or change points.

In [1]:
import numpy as np
import plotly.graph_objects as go

rng = np.random.default_rng(14)
t = np.arange(180)
y = 0.5 * np.sin(t / 12) + 0.25 * rng.normal(size=t.size)
y[60:] += 1.1
y[120:] -= 1.4
change_points = [60, 120]
anoms = np.array([25, 95, 160])
y[anoms] += np.array([3.2, -2.5, 3.7])

fig = go.Figure()
fig.add_trace(go.Scatter(x=t, y=y, mode="lines", name="series"))
fig.add_trace(
    go.Scatter(
        x=anoms,
        y=y[anoms],
        mode="markers",
        marker=dict(color="crimson", size=9),
        name="anomalies",
    )
)
for cp in change_points:
    fig.add_vline(x=cp, line_dash="dash", line_color="gray")

fig.update_layout(
    title="Series annotation example",
    height=300,
    margin=dict(l=20, r=20, t=50, b=20),
)
fig.show()


## Dynamic registry query


In [2]:
import pandas as pd
import plotly.express as px


def _resolve_all_estimators():
    try:
        from sktime.registry import all_estimators
        return all_estimators
    except Exception:
        from sktime.utils.discovery import all_estimators
        return all_estimators


def _safe_tags(cls):
    if hasattr(cls, "get_class_tags"):
        try:
            return cls.get_class_tags()
        except Exception:
            return {}
    return {}


def _matches_scitype(value, target):
    if value is None:
        return False
    if isinstance(value, (list, tuple, set)):
        return target in value
    return value == target


def _top_tag_keys(tag_dicts, limit=8):
    counts = {}
    for tags in tag_dicts:
        for key, value in tags.items():
            if key in {"scitype", "estimator_type", "task", "learning_type"}:
                continue
            if isinstance(value, (bool, int, float, str)):
                counts[key] = counts.get(key, 0) + 1
    return [
        key
        for key, _ in sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))[:limit]
    ]


try:
    import sktime  # noqa: F401

    all_estimators = _resolve_all_estimators()
    SKTIME_AVAILABLE = True
except Exception as exc:
    SKTIME_AVAILABLE = False
    _err = exc

if not SKTIME_AVAILABLE:
    print("sktime not installed. Install with `pip install sktime` to populate the catalog.")
    print("Error:", _err)


sktime not installed. Install with `pip install sktime` to populate the catalog.
Error: No module named 'sktime'


In [3]:
if SKTIME_AVAILABLE:
    targets = {"series-annotator", "annotator"}
    estimators = all_estimators()
    records = []
    tag_dicts = []
    for name, cls in estimators:
        tags = _safe_tags(cls)
        scitype = tags.get("scitype") or tags.get("estimator_type")
        if not any(_matches_scitype(scitype, target) for target in targets):
            continue
        records.append(
            {
                "name": name,
                "class": cls.__name__,
                "module": cls.__module__,
                "module_family": ".".join(cls.__module__.split(".")[:3]),
            }
        )
        tag_dicts.append(tags)

    tag_keys = _top_tag_keys(tag_dicts, limit=8)
    rows = []
    for record, tags in zip(records, tag_dicts):
        row = record.copy()
        for key in tag_keys:
            row[key] = tags.get(key)
        rows.append(row)

    df = pd.DataFrame(rows).sort_values("name")
    df.head(20)


In [4]:
if SKTIME_AVAILABLE and not df.empty:
    counts = (
        df["module_family"]
        .value_counts()
        .reset_index()
        .rename(columns={"index": "module_family", "module_family": "count"})
    )
    fig = px.bar(
        counts,
        x="module_family",
        y="count",
        title="Annotators by module family",
    )
    fig.show()

    fig = px.treemap(
        df,
        path=["module_family", "name"],
        title="Annotators catalog (module family -> estimator)",
    )
    fig.show()

    bool_tags = []
    for key in df.columns:
        if key in {"name", "class", "module", "module_family"}:
            continue
        series = df[key].dropna()
        if not series.empty and series.isin([True, False]).all():
            bool_tags.append(key)

    if bool_tags:
        summary = pd.DataFrame(
            {
                "tag": bool_tags,
                "share_true": [df[tag].mean() for tag in bool_tags],
            }
        )
        fig = px.bar(
            summary,
            x="tag",
            y="share_true",
            title="Share of annotators with tag=True",
        )
        fig.show()


## How to use this catalog
- Look for module families focused on anomaly detection or change point detection.
- Use tag columns to confirm whether labels are pointwise or interval-based.
- Combine annotators with transformers (detrending, smoothing) for more stable signals.