Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion ads/opctl/operator/lowcode/anomaly/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,32 @@ class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
UNSUPERVISED_UNIFY95 = "unsupervised_unify95"
UNSUPERVISED_UNIFY95_LOG_LOSS = "unsupervised_unify95_log_loss"
UNSUPERVISED_N1_EXPERTS = "unsupervised_n-1_experts"

RECALL = "Recall"
PRECISION = "Precision"
ACCURACY = "Accuracy"
F1_SCORE = "f1_score"
FP = "False Positive"
FN = "False Negative"
TP = "True Positive"
TN = "True Negative"
ROC_AUC = "ROC_AUC"
PRC_AUC = "PRC_AUC"
MCC = "MCC"
MEAN_RECALL = "Mean Recall"
MEAN_PRECISION = "Mean Precision"
MEAN_ACCURACY = "Mean Accuracy"
MEAN_F1_SCORE = "Mean f1_score"
MEAN_ROC_AUC = "Mean ROC_AUC"
MEAN_PRC_AUC = "Mean PRC_AUC"
MEAN_MCC = "Mean MCC"
MEDIAN_RECALL = "Median Recall"
MEDIAN_PRECISION = "Median Precision"
MEDIAN_ACCURACY = "Median Accuracy"
MEDIAN_F1_SCORE = "Median f1_score"
MEDIAN_ROC_AUC = "Median ROC_AUC"
MEDIAN_PRC_AUC = "Median PRC_AUC"
MEDIAN_MCC = "Median MCC"
ELAPSED_TIME = "Elapsed Time"

class OutputColumns(str, metaclass=ExtendedEnumMeta):
ANOMALY_COL = "anomaly"
Expand Down
28 changes: 23 additions & 5 deletions ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ def _load_data(self, spec):


class AnomalyOutput:
def __init__(self):
def __init__(self, date_column):
self.category_map = dict()
self.date_column = date_column

def add_output(self, category: str, anomalies: pd.DataFrame, scores: pd.DataFrame):
self.category_map[category] = (anomalies, scores)
Expand All @@ -83,15 +84,29 @@ def get_scores_by_cat(self, category: str):

def get_inliers_by_cat(self, category: str, data: pd.DataFrame):
anomaly = self.get_anomalies_by_cat(category)
scores = self.get_scores_by_cat(category)
inlier_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 0]

return data.iloc[inlier_indices]
inliers = data.iloc[inlier_indices]
if scores is not None and not scores.empty:
inliers = pd.merge(
inliers,
scores,
on=self.date_column,
how='inner')
return inliers

def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
anomaly = self.get_anomalies_by_cat(category)
scores = self.get_scores_by_cat(category)
outliers_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 1]

return data.iloc[outliers_indices]
outliers = data.iloc[outliers_indices]
if scores is not None and not scores.empty:
outliers = pd.merge(
outliers,
scores,
on=self.date_column,
how='inner')
return outliers

def get_inliers(self, full_data_dict):
inliers = pd.DataFrame()
Expand Down Expand Up @@ -128,3 +143,6 @@ def get_scores(self, target_category_columns):
score[target_category_columns[0]] = category
scores = pd.concat([scores, score], axis=0, ignore_index=True)
return scores

def get_num_anomalies_by_cat(self, category: str):
return (self.category_map[category][0][OutputColumns.ANOMALY_COL] == 1).sum()
32 changes: 28 additions & 4 deletions ads/opctl/operator/lowcode/anomaly/model/automlx.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd

from ads.common.decorator.runtime_dependency import runtime_dependency
from .anomaly_dataset import AnomalyOutput

from .base_model import AnomalyOperatorBaseModel
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
Expand All @@ -22,11 +23,34 @@ class AutoMLXOperatorModel(AnomalyOperatorBaseModel):
),
)
def _build_model(self) -> pd.DataFrame:
est = automl.Pipeline(task='anomaly_detection')


date_column = self.spec.datetime_column.name
dataset = self.datasets
est.fit(dataset.data, y=None)
y_pred = est.predict(dataset.data)
dataset.data[OutputColumns.ANOMALY_COL] = y_pred

full_data_dict = dataset.full_data_dict

anomaly_output = AnomalyOutput(date_column=date_column)

# Iterate over the full_data_dict items
for target, df in full_data_dict.items():
est = automl.Pipeline(task='anomaly_detection')
est.fit(df, y=None)
y_pred = est.predict(df)
scores = est.predict_proba(df)

anomaly = pd.DataFrame({
date_column: df[date_column],
OutputColumns.ANOMALY_COL: y_pred
})
score = pd.DataFrame({
date_column: df[date_column],
OutputColumns.SCORE_COL: [item[1] for item in scores]
})
anomaly_output.add_output(target, anomaly, score)

return anomaly_output


def _generate_report(self):
import datapane as dp
Expand Down
9 changes: 2 additions & 7 deletions ads/opctl/operator/lowcode/anomaly/model/autots.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,7 @@ def _build_model(self) -> AnomalyOutput:

full_data_dict = dataset.full_data_dict

target_category_column = (
self.spec.target_category_columns[0]
if self.spec.target_category_columns is not None
else None
)

anomaly_output = AnomalyOutput()
anomaly_output = AnomalyOutput(date_column=date_column)

# Iterate over the full_data_dict items
for target, df in full_data_dict.items():
Expand All @@ -70,6 +64,7 @@ def _build_model(self) -> AnomalyOutput:
columns={score.columns.values[0]: OutputColumns.SCORE_COL},
inplace=True,
)
score = 1-score
score = score.reset_index(drop=False)

col = anomaly.columns.values[0]
Expand Down
Loading