# sktime Time Series Transformers Catalog


This notebook catalogs sktime transformers for time series. It includes:
- notation for transformations,
- a visual example of smoothing and differencing,
- a dynamic registry query to list all transformers in your local sktime install.


## Transformer setup and notation\n\nTransformers apply a mapping $\Phi$ to a series $X$ to produce a new representation $Z$:\n\n$$Z = \Phi(X).$$\n\nTransformers can smooth, decompose, extract features, or change representation for downstream models.


In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

rng = np.random.default_rng(12)
t = np.arange(90)
series = np.sin(t / 8) + 0.3 * rng.normal(size=t.size)

series_s = pd.Series(series)
rolling = series_s.rolling(7, min_periods=1).mean()
diff = series_s.diff().fillna(0)

fig = go.Figure()
fig.add_trace(go.Scatter(x=t, y=series_s, mode="lines", name="original"))
fig.add_trace(go.Scatter(x=t, y=rolling, mode="lines", name="rolling mean"))
fig.add_trace(
    go.Scatter(x=t, y=diff, mode="lines", name="first difference")
)
fig.update_layout(
    title="Example transformations on a time series",
    xaxis_title="time",
    yaxis_title="value",
)
fig.show()


## Dynamic registry query


In [None]:
import pandas as pd
import plotly.express as px


def _resolve_all_estimators():
    try:
        from sktime.registry import all_estimators
        return all_estimators
    except Exception:
        from sktime.utils.discovery import all_estimators
        return all_estimators


def _safe_tags(cls):
    if hasattr(cls, "get_class_tags"):
        try:
            return cls.get_class_tags()
        except Exception:
            return {}
    return {}


def _matches_scitype(value, target):
    if value is None:
        return False
    if isinstance(value, (list, tuple, set)):
        return target in value
    return value == target


def _top_tag_keys(tag_dicts, limit=8):
    counts = {}
    for tags in tag_dicts:
        for key, value in tags.items():
            if key in {"scitype", "estimator_type", "task", "learning_type"}:
                continue
            if isinstance(value, (bool, int, float, str)):
                counts[key] = counts.get(key, 0) + 1
    return [
        key
        for key, _ in sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))[:limit]
    ]


try:
    import sktime  # noqa: F401

    all_estimators = _resolve_all_estimators()
    SKTIME_AVAILABLE = True
except Exception as exc:
    SKTIME_AVAILABLE = False
    _err = exc

if not SKTIME_AVAILABLE:
    print("sktime not installed. Install with `pip install sktime` to populate the catalog.")
    print("Error:", _err)


In [None]:
if SKTIME_AVAILABLE:
    target = "transformer"
    estimators = all_estimators()
    records = []
    tag_dicts = []
    for name, cls in estimators:
        tags = _safe_tags(cls)
        scitype = tags.get("scitype") or tags.get("estimator_type")
        if not _matches_scitype(scitype, target):
            continue
        records.append(
            {
                "name": name,
                "class": cls.__name__,
                "module": cls.__module__,
                "module_family": ".".join(cls.__module__.split(".")[:3]),
            }
        )
        tag_dicts.append(tags)

    tag_keys = _top_tag_keys(tag_dicts, limit=8)
    rows = []
    for record, tags in zip(records, tag_dicts):
        row = record.copy()
        for key in tag_keys:
            row[key] = tags.get(key)
        rows.append(row)

    df = pd.DataFrame(rows).sort_values("name")
    df.head(20)


In [None]:
if SKTIME_AVAILABLE and not df.empty:
    counts = (
        df["module_family"]
        .value_counts()
        .reset_index()
        .rename(columns={"index": "module_family", "module_family": "count"})
    )
    fig = px.bar(
        counts,
        x="module_family",
        y="count",
        title="Transformers by module family",
    )
    fig.show()

    fig = px.treemap(
        df,
        path=["module_family", "name"],
        title="Transformers catalog (module family -> estimator)",
    )
    fig.show()

    bool_tags = []
    for key in df.columns:
        if key in {"name", "class", "module", "module_family"}:
            continue
        series = df[key].dropna()
        if not series.empty and series.isin([True, False]).all():
            bool_tags.append(key)

    if bool_tags:
        summary = pd.DataFrame(
            {
                "tag": bool_tags,
                "share_true": [df[tag].mean() for tag in bool_tags],
            }
        )
        fig = px.bar(
            summary,
            x="tag",
            y="share_true",
            title="Share of estimators with tag=True",
        )
        fig.show()


## How to use this catalog

- Inspect module families to see feature extraction, smoothing, and decomposition tools.
- Tag columns can reveal whether a transformer is stateless or requires fitting.
- Combine transformers with classifiers/regressors to build end-to-end pipelines.
