Skip to content

Optimised report loading for anomaly operator #897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ads/opctl/operator/lowcode/anomaly/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,4 @@ class OutputColumns(str, metaclass=ExtendedEnumMeta):


TODS_DEFAULT_MODEL = "ocsvm"
SUBSAMPLE_THRESHOLD = 1000
30 changes: 23 additions & 7 deletions ads/opctl/operator/lowcode/anomaly/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from ads.common.object_storage_details import ObjectStorageDetails
from ads.opctl import logger
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics, SUBSAMPLE_THRESHOLD
from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
from ads.opctl.operator.lowcode.common.utils import (
disable_print,
Expand Down Expand Up @@ -79,7 +79,7 @@ def generate_report(self):
anomaly_output, test_data, elapsed_time
)
table_blocks = [
rc.DataTable(df, label=col, index=True)
rc.DataTable(df.head(SUBSAMPLE_THRESHOLD) if self.spec.subsample_report_data and len(df) > SUBSAMPLE_THRESHOLD else df, label=col, index=True)
for col, df in self.datasets.full_data_dict.items()
]
data_table = rc.Select(blocks=table_blocks)
Expand All @@ -94,20 +94,36 @@ def generate_report(self):
anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
OutputColumns.ANOMALY_COL
]
anomaly_indices = [i for i, index in enumerate(anomaly_col) if index == 1]
downsampled_time_col = time_col
selected_indices = list(range(len(time_col)))
if self.spec.subsample_report_data:
non_anomaly_indices = [i for i in range(len(time_col)) if i not in anomaly_indices]
# Downsample non-anomalous data if it exceeds the threshold (1000)
if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
downsampled_non_anomaly_indices = non_anomaly_indices[::len(non_anomaly_indices)//SUBSAMPLE_THRESHOLD]
selected_indices = anomaly_indices + downsampled_non_anomaly_indices
selected_indices.sort()
downsampled_time_col = time_col[selected_indices]

columns = set(df.columns).difference({date_column})
for col in columns:
y = df[col].reset_index(drop=True)

downsampled_y = y[selected_indices]

fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
ax.grid()
ax.plot(time_col, y, color="black")
for i, index in enumerate(anomaly_col):
if index == 1:
ax.scatter(time_col[i], y[i], color="red", marker="o")
ax.plot(downsampled_time_col, downsampled_y, color="black")
# Plot anomalies
for i in anomaly_indices:
ax.scatter(time_col[i], y[i], color="red", marker="o")
plt.xlabel(date_column)
plt.ylabel(col)
plt.title(f"`{col}` with reference to anomalies")
figure_blocks.append(rc.Widget(ax))
blocks.append(rc.Group(*figure_blocks, label=target))

blocks.append(rc.Group(*figure_blocks, label=target))
plots = rc.Select(blocks)

report_sections = []
Expand Down
1 change: 1 addition & 0 deletions ads/opctl/operator/lowcode/anomaly/operator_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class AnomalyOperatorSpec(DataClassSerializable):
model: str = None
model_kwargs: Dict = field(default_factory=dict)
contamination: float = None
subsample_report_data: bool = None

def __post_init__(self):
"""Adjusts the specification details."""
Expand Down
4 changes: 4 additions & 0 deletions ads/opctl/operator/lowcode/anomaly/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -377,4 +377,8 @@ spec:
type: dict
required: false

subsample_report_data:
type: boolean
required: false

type: dict