# Anomaly Impact Alert - быстрый демо-ноутбук

Добро пожаловать! Здесь за 10–15 минут покажем, как из обычного временного ряда получить:

- сигналы аномалий
- прогноз что должно было быть
- вклад сегментов (platform / country / …) в общую дельту
- готовое сообщение для бота/дашборда

Что сделаем в ноутбуке

- Сгенерируем (или загрузим) примерный датасет time_at, metric_name, metric_value, platform, country
- Прогоним ансамбль детекторов (CI/MAD, Z-score, STL/SESD, IsolationForest, LOF)
- Построим ансамблевый прогноз (Prophet + STL/ETS + Naive) и сравним факт vs. forecast
- Посчитаем вклады сегментов в общую дельту за день T

Сформируем сообщение для бота в стиле: заголовок → график → цифры → топ-вклады

In [None]:
!pip install anomaly-impact-alert # Установка библиотеки

Collecting anomaly-impact-alert
  Downloading anomaly_impact_alert-0.3.0.tar.gz (611 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/611.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m286.7/611.1 kB[0m [31m8.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.1/611.1 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting python-telegram-bot>=20.0 (from anomaly-impact-alert)
  Downloading python_telegram_bot-22.5-py3-none-any.whl.metadata (17 kB)
Downloading python_telegram_bot-22.5-py3-none-any.whl (730 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m731.0/731.0 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected p

In [None]:
import logging
import sys
import warnings
from datetime import date, datetime, timedelta
from typing import Final

import numpy as np
import pandas as pd
from tqdm import tqdm

from anomaly_impact_alert import (
    AlertConfig,
    AnomalyParams,
    BFConfig,
    ImpactConfig,
    analyze_latest_point,
    attach_impact_text,
    columns_true,
    forecast_values_for_targets_better,
    send_alert_for_date,
)

warnings.filterwarnings("ignore")
logging.getLogger("cmdstanpy").disabled = True
logging.getLogger("prophet").setLevel(logging.WARNING)

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 0)
pd.set_option("display.max_colwidth", None)
pd.set_option("display.float_format", "{:,.2f}".format)
pd.set_option("display.large_repr", "truncate")

## Dataset

In [None]:
# Здесь синтетический ряд условной метрики Amount
np.random.seed(4242)

dates = pd.date_range("2025-06-01", "2025-11-10", freq="D")

monthly_slopes = {
    (2025, 6): +2.0,
    (2025, 7): -1.0,
    (2025, 8): +4.0,
    (2025, 9): -2.0,
    (2025,10): +3.0,
    (2025,11): -1.5,
}

weekday_multipliers = {
    0: 0.95,
    1: 1.00,
    2: 1.05,
    3: 1.10,
    4: 1.18,
    5: 0.90,
    6: 0.82,
}

base_level = 1000000.0
values = []
for d in dates:
    slope = monthly_slopes.get((d.year, d.month), 0.0)
    base_level += slope
    weekday_factor = weekday_multipliers[d.weekday()]
    values.append(base_level * weekday_factor)

base = np.array(values, dtype=float)

factors = np.ones(len(dates), dtype=float)

anom_sep = {
    "2025-09-05": 0.65,
    "2025-09-17": 0.40,
    "2025-09-26": 1.12,
}
anom_oct = {
    "2025-10-23": 0.75,
    "2025-10-24": 1.1,
}

for ds, v in {**anom_sep, **anom_oct}.items():
    idx = (dates == pd.Timestamp(ds))
    factors[idx] = v

smooth_noise = 1.0 + np.random.normal(0, 0.01, size=len(dates))
signal = base * factors * smooth_noise

platforms = ["web", "android", "ios"]
p_weights = np.array([0.62, 0.28, 0.10])

countries = ["russia", "belarus", "kazakhstan"]
c_weights = np.array([0.74, 0.11, 0.15])

rows = []
for i, d in enumerate(dates):
    total_val = float(signal[i])
    for p, w in zip(platforms, p_weights):
        rows.append([d, "amount", total_val * w, p, "total"])
    for c, w in zip(countries, c_weights):
        rows.append([d, "amount", total_val * w, "total", c])
    rows.append([d, "amount", total_val, "total", "total"])

df = pd.DataFrame(rows, columns=["time_at", "metric_name", "metric_value", "platform", "country"])
df = df.sort_values(["time_at", "platform", "country"]).reset_index(drop=True)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1141 entries, 0 to 1140
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   time_at       1141 non-null   datetime64[ns]
 1   metric_name   1141 non-null   object        
 2   metric_value  1141 non-null   float64       
 3   platform      1141 non-null   object        
 4   country       1141 non-null   object        
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 44.7+ KB


In [None]:
# Пример данных за один день
df[df['time_at'] == '2025-09-01'].sort_values(by=['country', 'metric_value'], ascending=False)

Unnamed: 0,time_at,metric_name,metric_value,platform,country
649,2025-09-01,amount,961433.54,total,total
650,2025-09-01,amount,596088.79,web,total
644,2025-09-01,amount,269201.39,android,total
645,2025-09-01,amount,96143.35,ios,total
648,2025-09-01,amount,711460.82,total,russia
647,2025-09-01,amount,144215.03,total,kazakhstan
646,2025-09-01,amount,105757.69,total,belarus


In [None]:
# Выделим метрику которую будем рассчитывать

df_amount_total = df[(df['platform'] == 'total') & (df['country'] == 'total')]

## Аномалии

In [None]:
p = AnomalyParams(
    # --- Базовое ---
    granularity="daily",

    # --- CI/Z по MAD (история и пороги) ---
    ci_k=1.845,                  # ширина доверительного интервала (меньше → чувствительнее)
    z_threshold=1.845,           # порог для |z| по MAD (меньше → чувствительнее)
    rolling_window_hourly=24,   # для часов, если будете считать hourly
    rolling_window_daily=28,    # для дней (≈4 недели истории)

    # --- STL (сезонность) ---
    stl_period_hourly=24*7,     # недельная сезонность для часов
    stl_period_daily=7,         # недельная сезонность для дней
    stl_std_multiplier=2,     # порог по остаткам STL (меньше → чувствительнее)

    # --- SESD / Seasonal ESD (последняя точка) ---
    sesd_alpha=0.25,            # уровень значимости (больше → чувствительнее)
    seasonality_hourly=24*7,    # длина сезонности для hourly
    seasonality_daily=7,        # длина сезонности для daily
    sesd_window_hourly=24,      # reference окно (hourly)
    sesd_window_daily=7,        # reference окно (daily)
    sesd_ppd_hourly=24,         # points-per-day (hourly)
    sesd_ppd_daily=1,           # points-per-day (daily)
    sesd_hybrid=True,           # гибридный MAD-z (устойчивее к выбросам)

    # --- LOF / Isolation Forest (глобальные выбросы) ---
    contamination_threshold=0.21,  # доля выбросов для IForest (меньше → строже)
    lof_contamination=0.14,        # доля выбросов для LOF
    lof_neighbors_hourly=10,       # соседей для LOF (hourly)
    lof_neighbors_daily=15,        # соседей для LOF (daily)

    # --- CUSUM (сдвиг среднего) ---
    cusum_k=0.5,                   # «размер» ожидаемого сдвига (меньше → чувствительнее)
    cusum_h=5,                     # порог тревоги (меньше → чувствительнее)
    cusum_reference_window=56,     # окно на базу (у вас ~8 недель)

    # --- Переключатели методов ---
    enable_sesd=True,
    enable_stl=True,
    enable_iforest=True,
    enable_lof=True,
    enable_cusum=True,
)



# За какой период считать
start_date = datetime(2025, 9, 1)
end_date = datetime(2025, 11, 11)

daily_dates = [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]

df_anomaly = pd.DataFrame()


for date_str in tqdm(daily_dates):

    pre_df = df_amount_total[(df_amount_total['time_at'] <= date_str) & (df_amount_total['time_at'] >= date_str - pd.Timedelta(days=30))]

    result_pre = analyze_latest_point(
                    df_two_cols=pre_df[["time_at", "metric_value"]],
                    metric_name="amount",
                    granularity="daily",
                    params=p)


    df_anomaly = pd.concat([df_anomaly, result_pre])

100%|██████████| 72/72 [00:26<00:00,  2.76it/s]


In [None]:
# Вывод рассчитаных аномалий
df_anomaly.head()

Unnamed: 0,time_at,metric_value,ci_mean,ci_std,ci_upper,ci_lower,ci_alert,z_score,z_alert,iforest_alert,lof_alert,stl_resid,stl_alert,sesd_alert,cusum_alert,anomaly_final,metric_name,granularity
0,2025-09-01,961433.54,958368.83,2098.23,962240.06,954497.59,0.0,1.46,0.0,0,0,373.91,0,0,0,0,amount,daily
0,2025-09-02,999866.26,1002308.96,4625.2,1010842.44,993775.47,0.0,-0.53,0.0,0,0,-161.68,0,0,0,0,amount,daily
0,2025-09-03,1036873.02,1048010.52,3598.26,1054649.31,1041371.73,1.0,-3.1,1.0,0,0,-3297.23,0,0,0,0,amount,daily
0,2025-09-04,1106324.83,1106401.07,2826.98,1111616.85,1101185.29,0.0,-0.03,0.0,0,0,7013.95,0,0,0,0,amount,daily
0,2025-09-05,765004.25,1173403.88,17094.95,1204944.07,1141863.69,1.0,-23.89,1.0,1,1,-94989.87,1,0,0,1,amount,daily


### График аномалий

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

x_tick_format = '%Y-%m-%d'

fig = make_subplots(specs=[[{"secondary_y": False}]])
fig.add_trace(go.Scatter(x=df_anomaly['time_at'], y=df_anomaly['metric_value'],
                         mode='lines', name='Метрика', line=dict(color='black')))

fig.add_trace(go.Scatter(x=df_anomaly['time_at'], y=df_anomaly['ci_upper'],
                         mode='lines', name='CI Верх', line=dict(dash='dot', color='red')))
fig.add_trace(go.Scatter(x=df_anomaly['time_at'], y=df_anomaly['ci_lower'],
                         mode='lines', name='CI Низ', line=dict(dash='dot', color='blue')))

markers = {
    'ci_alert': ('▲', 'darkorange'),
    'z_alert': ('■', 'green'),
    'iforest_alert': ('♦', 'purple'),
    'lof_alert': ('✖', 'brown'),
    'stl_alert': ('▣', 'blue'),
    'sesd_alert': ('⬢', 'magenta'),
    'cusum_alert': ('⬢', 'black'),
    'anomaly_final': ('★', 'red')
}

for method, (symbol, color) in markers.items():
    sub = df_anomaly[df_anomaly[method] == 1]
    fig.add_trace(go.Scatter(
        x=sub['time_at'],
        y=sub['metric_value'],
        mode='markers',
        name=method,
        marker=dict(symbol='circle', size=9, color=color),
        text=[f"{method}" for _ in sub['time_at']]
    ))

fig.update_layout(
    title='Обнаружение аномалий',
    xaxis_title='Дата',
    yaxis_title='Значение метрики',
    template='plotly_white',
    xaxis=dict(
        tickformat=x_tick_format,
        tickangle=45
    ),
    width=1800,
    height=450
)

fig.update_yaxes(range=[0, df_anomaly['ci_upper'].max() * 1.05])

fig.show()

In [None]:
# Можно сохранить график, чтобы поделиться
fig.write_html("anomaly_plot.html")

## Декомпозиция метрик

In [None]:
# Импактный анализ для столбца country
cfg = ImpactConfig(
    time_col="time_at",
    group_col="group_col",
    metric_col="metric_value",
    window_days=5,     # среднее по 5 предыдущим точкам
    top_k=3,
    exclude_groups=("total", "-"),
    parallel=True,
)
df_impact = df[(df['platform'] == 'total') & (df['country'] != 'total')][['time_at', 'country', 'metric_value']].rename(columns={"country": "group_col"})
df_final_impact_anomalys = attach_impact_text(
    df_anomaly=df_anomaly,
    df_impact=df_impact,
    config=cfg,
    time_col_anom="time_at",
    anomaly_flag_col="anomaly_final",
    output_col="impact_text_country"
)


# Импактный анализ для столбца platform
cfg = ImpactConfig(
    time_col="time_at",
    group_col="group_col",
    metric_col="metric_value",
    window_days=5,     # среднее по 5 предыдущим точкам
    top_k=3,
    exclude_groups=("total", "-"),
    parallel=True,
)
df_impact = df[(df['platform'] != 'total') & (df['country'] == 'total')][['time_at', 'platform', 'metric_value']].rename(columns={"platform": "group_col"})
df_final_impact_anomalys = attach_impact_text(
    df_anomaly=df_final_impact_anomalys, # передай предыдыдущий, чтобы добавить еще столбец с импактом
    df_impact=df_impact,
    config=cfg,
    time_col_anom="time_at",
    anomaly_flag_col="anomaly_final",
    output_col="impact_text_platform"
)

In [None]:
df_final_impact_anomalys[df_final_impact_anomalys['anomaly_final'] == 1]

Unnamed: 0,time_at,metric_value,ci_mean,ci_std,ci_upper,ci_lower,ci_alert,z_score,z_alert,iforest_alert,lof_alert,stl_resid,stl_alert,sesd_alert,cusum_alert,anomaly_final,metric_name,granularity,impact_text_country,impact_text_platform
0,2025-09-05,765004.25,1173403.88,17094.95,1204944.07,1141863.69,1.0,-23.89,1.0,1,1,-94989.87,1,0,0,1,amount,daily,"1. russia: -162,130 (-22.3%), вклад: -74.0%\n2. kazakhstan: -32,864 (-22.3%), вклад: -15.0%\n3. belarus: -24,100 (-22.3%), вклад: -11.0%","1. web: -135,839 (-22.3%), вклад: -62.0%\n2. android: -61,347 (-22.3%), вклад: -28.0%\n3. ios: -21,910 (-22.3%), вклад: -10.0%"
0,2025-09-17,421458.46,1050437.51,10339.55,1069513.98,1031361.05,1.0,-60.83,1.0,1,1,-150088.29,1,0,0,1,amount,daily,"1. russia: -413,172 (-57.0%), вклад: -74.0%\n2. kazakhstan: -83,751 (-57.0%), вклад: -15.0%\n3. belarus: -61,418 (-57.0%), вклад: -11.0%","1. web: -346,172 (-57.0%), вклад: -62.0%\n2. android: -156,336 (-57.0%), вклад: -28.0%\n3. ios: -55,834 (-57.0%), вклад: -10.0%"
0,2025-09-26,1308404.62,1184878.01,4010.27,1192276.95,1177479.07,1.0,30.8,1.0,1,1,-62427.73,0,1,0,1,amount,daily,"1. russia: 241,698 (33.3%), вклад: 74.0%\n2. kazakhstan: 48,993 (33.3%), вклад: 15.0%\n3. belarus: 35,928 (33.3%), вклад: 11.0%","1. web: 202,504 (33.3%), вклад: 62.0%\n2. android: 91,453 (33.3%), вклад: 28.0%\n3. ios: 32,662 (33.3%), вклад: 10.0%"
0,2025-10-23,811418.67,1104907.49,7288.11,1118354.05,1091460.93,1.0,-40.27,1.0,1,0,-61001.48,1,0,0,1,amount,daily,"1. russia: -93,300 (-13.4%), вклад: -74.0%\n2. kazakhstan: -18,912 (-13.4%), вклад: -15.0%\n3. belarus: -13,869 (-13.4%), вклад: -11.0%","1. web: -78,170 (-13.4%), вклад: -62.0%\n2. android: -35,303 (-13.4%), вклад: -28.0%\n3. ios: -12,608 (-13.4%), вклад: -10.0%"
0,2025-10-24,1327196.54,1190726.31,6176.19,1202121.38,1179331.25,1.0,22.1,1.0,1,1,60364.0,0,1,0,1,amount,daily,"1. russia: 300,059 (44.0%), вклад: 74.0%\n2. kazakhstan: 60,823 (44.0%), вклад: 15.0%\n3. belarus: 44,603 (44.0%), вклад: 11.0%","1. web: 251,401 (44.0%), вклад: 62.0%\n2. android: 113,536 (44.0%), вклад: 28.0%\n3. ios: 40,549 (44.0%), вклад: 10.0%"


## Прогноз

In [None]:
# Только эти даты буду расчитаны
targets = pd.date_range("2025-10-21", "2025-10-24", freq="D")

cfg = BFConfig(
    time_col="time_at",
    value_col="metric_value",
    granularity="daily",
    targets=targets,
    winsorize=True,
    winsor_k=4.0,
    # Prophet
    holidays_country="RU",
    seasonality_mode="multiplicative",
    interval_width=0.8,
    uncertainty_samples=0,
    changepoint_prior_scale=0.1,
    seasonality_prior_scale=10.0,
    yearly=True,
    weekly=True,
    add_monthly_for_daily=True,
    # ETS
    ets_trend="add",
    ets_seasonal_daily="add",
    ets_seasonal_hourly="add",
    # точки/окна
    min_points_prophet=28,
    min_points_ets=28,
    backtest_window_daily=28,
    backtest_window_hourly=24*7,
    # логика скейла
    log_y="auto",
    log_cv_threshold=0.3,
    seed=42,
)

fc_df = forecast_values_for_targets_better(df[(df['platform'] == 'total') & (df['country'] == 'total')], cfg=cfg)

df_final_impact_anomalys_fc = df_final_impact_anomalys.merge(fc_df, on="time_at", how="left")

In [None]:
df_final_impact_anomalys_fc.tail(3)

Unnamed: 0,time_at,metric_value,ci_mean,ci_std,ci_upper,ci_lower,ci_alert,z_score,z_alert,iforest_alert,lof_alert,stl_resid,stl_alert,sesd_alert,cusum_alert,anomaly_final,metric_name,granularity,impact_text_country,impact_text_platform,forecast,forecast_prophet,forecast_ets,forecast_naive,w_prophet,w_ets,w_naive
69,2025-11-09,822586.49,820818.33,8794.91,837044.93,804591.72,0.0,0.2,0.0,0,0,5630.63,0,0,0,0,amount,daily,,,,,,,,,
70,2025-11-10,954922.06,951849.76,6448.45,963747.16,939952.36,0.0,0.48,0.0,0,0,-384.05,0,0,0,0,amount,daily,,,,,,,,,
71,2025-11-10,954922.06,951849.76,6448.45,963747.16,939952.36,0.0,0.48,0.0,0,0,-1003.38,0,0,0,0,amount,daily,,,,,,,,,


In [None]:
df_final_impact_anomalys_fc[df_final_impact_anomalys_fc['anomaly_final'] == 1]

Unnamed: 0,time_at,metric_value,ci_mean,ci_std,ci_upper,ci_lower,ci_alert,z_score,z_alert,iforest_alert,lof_alert,stl_resid,stl_alert,sesd_alert,cusum_alert,anomaly_final,metric_name,granularity,impact_text_country,impact_text_platform,forecast,forecast_prophet,forecast_ets,forecast_naive,w_prophet,w_ets,w_naive
4,2025-09-05,765004.25,1173403.88,17094.95,1204944.07,1141863.69,1.0,-23.89,1.0,1,1,-94989.87,1,0,0,1,amount,daily,"1. russia: -162,130 (-22.3%), вклад: -74.0%\n2. kazakhstan: -32,864 (-22.3%), вклад: -15.0%\n3. belarus: -24,100 (-22.3%), вклад: -11.0%","1. web: -135,839 (-22.3%), вклад: -62.0%\n2. android: -61,347 (-22.3%), вклад: -28.0%\n3. ios: -21,910 (-22.3%), вклад: -10.0%",,,,,,,
16,2025-09-17,421458.46,1050437.51,10339.55,1069513.98,1031361.05,1.0,-60.83,1.0,1,1,-150088.29,1,0,0,1,amount,daily,"1. russia: -413,172 (-57.0%), вклад: -74.0%\n2. kazakhstan: -83,751 (-57.0%), вклад: -15.0%\n3. belarus: -61,418 (-57.0%), вклад: -11.0%","1. web: -346,172 (-57.0%), вклад: -62.0%\n2. android: -156,336 (-57.0%), вклад: -28.0%\n3. ios: -55,834 (-57.0%), вклад: -10.0%",,,,,,,
25,2025-09-26,1308404.62,1184878.01,4010.27,1192276.95,1177479.07,1.0,30.8,1.0,1,1,-62427.73,0,1,0,1,amount,daily,"1. russia: 241,698 (33.3%), вклад: 74.0%\n2. kazakhstan: 48,993 (33.3%), вклад: 15.0%\n3. belarus: 35,928 (33.3%), вклад: 11.0%","1. web: 202,504 (33.3%), вклад: 62.0%\n2. android: 91,453 (33.3%), вклад: 28.0%\n3. ios: 32,662 (33.3%), вклад: 10.0%",,,,,,,
52,2025-10-23,811418.67,1104907.49,7288.11,1118354.05,1091460.93,1.0,-40.27,1.0,1,0,-61001.48,1,0,0,1,amount,daily,"1. russia: -93,300 (-13.4%), вклад: -74.0%\n2. kazakhstan: -18,912 (-13.4%), вклад: -15.0%\n3. belarus: -13,869 (-13.4%), вклад: -11.0%","1. web: -78,170 (-13.4%), вклад: -62.0%\n2. android: -35,303 (-13.4%), вклад: -28.0%\n3. ios: -12,608 (-13.4%), вклад: -10.0%",1103416.06,,1101283.6,1105570.72,0.0,0.5,0.5
53,2025-10-24,1327196.54,1190726.31,6176.19,1202121.38,1179331.25,1.0,22.1,1.0,1,1,60364.0,0,1,0,1,amount,daily,"1. russia: 300,059 (44.0%), вклад: 74.0%\n2. kazakhstan: 60,823 (44.0%), вклад: 15.0%\n3. belarus: 44,603 (44.0%), вклад: 11.0%","1. web: 251,401 (44.0%), вклад: 62.0%\n2. android: 113,536 (44.0%), вклад: 28.0%\n3. ios: 40,549 (44.0%), вклад: 10.0%",1185775.5,,1176678.71,1195001.56,0.0,0.5,0.5


## Сообщение в чат

In [None]:
from anomaly_impact_alert.alert_bot_telegram import AlertConfig, send_alert_for_date

cfg = AlertConfig(
    plot_window_points=60,
    slice1_name="Страна", slice1_value="Total",
    slice2_name="Платформа", slice2_value="Total",
    impact_blocks=[
        ("Изменения за счёт Age Group:", "impact_text_age_group"),
        ("Изменения за счёт Стран:", "impact_text_country"),
        ("Изменения за счёт Платформ:", "impact_text_platform"),
    ],
    links=[("Дашборд по аномалиям", "https://mydashboard.com")]
)

resp = send_alert_for_date(
    df_final=df_final_impact_anomalys_fc[df_final_impact_anomalys_fc['time_at'] <= '2025-10-23'],
    now=pd.Timestamp("2025-10-23"),
    metric_name="Amount",
    token="TG_TOKEN",
    chat_id="CHAT_ID",
    cfg=cfg,
    also_return=True
)

# print(resp)  # по желанию