In [1]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install --upgrade azure-ai-anomalydetector




In [3]:
import os
import time
from datetime import datetime

from azure.ai.anomalydetector import AnomalyDetectorClient
from azure.ai.anomalydetector.models import DetectionRequest, ModelInfo
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import HttpResponseError

In [4]:
anomaly_detector_endpoint = "https://adsensor.cognitiveservices.azure.com/"
subscription_key = "01031723c0da4ecca2234367f9a5ccb2"

In [5]:
class MultivariateSample():

    def __init__(self, subscription_key, anomaly_detector_endpoint, data_source=None):
        self.sub_key = subscription_key
        self.end_point = anomaly_detector_endpoint

        # Create an Anomaly Detector client

        # <client>
        self.ad_client = AnomalyDetectorClient(AzureKeyCredential(self.sub_key), self.end_point)
        # </client>

        self.data_source = "https://adstorageacct.blob.core.windows.net/sampledata/weather_data.zip?sp=r&st=2021-06-22T20:07:54Z&se=2021-06-30T04:07:54Z&spr=https&sv=2020-02-10&sr=b&sig=a%2FlRQGIP57yFXzJrT52JGXpnk9%2FSFuFB3KQ8YYhdJoQ%3D"
 
    def train(self, start_time, end_time, max_tryout=500):

        try:
            print("entering traing.....")

            # Number of models available now
            model_list = list(self.ad_client.list_multivariate_model(skip=0, top=10000))
            #model_list = list(self.ad_client.list_multivariate_model(skip=0, top=5))

            print("before print model list.....")
            print("{:d} available models before training.".format(len(model_list)))

            # Use sample data to train the model
            print("Training new model...")
            data_feed = ModelInfo(start_time=start_time, end_time=end_time, source=self.data_source)
            response_header = \
            self.ad_client.train_multivariate_model(data_feed, cls=lambda *args: [args[i] for i in range(len(args))])[-1]
            trained_model_id = response_header['Location'].split("/")[-1]

            # Model list after training
            new_model_list = list(self.ad_client.list_multivariate_model(skip=0, top=10000))

            # Wait until the model is ready. It usually takes several minutes
            model_status = None
            tryout_count = 0
            while (tryout_count < max_tryout and model_status != "READY"):
                model_status = self.ad_client.get_multivariate_model(trained_model_id).model_info.status
                tryout_count += 1
                time.sleep(2)

            assert model_status == "READY"

            print("Done.", "\n--------------------")
            print("{:d} available models after training.".format(len(new_model_list)))
            
            # Return the latest model id
            return trained_model_id

        except Exception as e:
            print("Error code: ", e)
    

    def detect(self, model_id, start_time, end_time, max_tryout=500):

        # Detect anomaly in the same data source (but a different interval)
        try:
            detection_req = DetectionRequest(source=self.data_source, start_time=start_time, end_time=end_time)
            response_header = self.ad_client.detect_anomaly(model_id, detection_req,
                                                            cls=lambda *args: [args[i] for i in range(len(args))])[-1]
            result_id = response_header['Location'].split("/")[-1]

            # Get results (may need a few seconds)
            r = self.ad_client.get_detection_result(result_id)
            tryout_count = 0
            while r.summary.status != "READY" and tryout_count < max_tryout:
                time.sleep(1)
                r = self.ad_client.get_detection_result(result_id)
                tryout_count += 1

            if r.summary.status != "READY":
                print("Request timeout after %d tryouts.".format(max_tryout))
                return None

        except HttpResponseError as e:
            print('Error code: {}'.format(e.error.code), 'Error message: {}'.format(e.error.message))
        except Exception as e:
            raise e

        return r
    
    
    
    def export_model(self, model_id, model_path="/model.zip"):

        # Export the model
        model_stream_generator = self.ad_client.export_model(model_id)
        with open(model_path, "wb") as f_obj:
            while True:
                try:
                    f_obj.write(next(model_stream_generator))
                except StopIteration:
                    break
                except Exception as e:
                    raise e
                    
                    
                    
    def delete_model(self, model_id):

        # Delete the mdoel
        self.ad_client.delete_multivariate_model(model_id)
        model_list_after_delete = list(self.ad_client.list_multivariate_model(skip=0, top=10000))
        print("{:d} available models after deletion.".format(len(model_list_after_delete)))


In [6]:
if __name__ == '__main__':
    subscription_key = "01031723c0da4ecca2234367f9a5ccb2"
    anomaly_detector_endpoint = "https://adsensor.cognitiveservices.azure.com/"
        
    data_source = "https://adstorageacct.blob.core.windows.net/sampledata/weather_data.zip?sp=r&st=2021-06-22T20:07:54Z&se=2021-06-30T04:07:54Z&spr=https&sv=2020-02-10&sr=b&sig=a%2FlRQGIP57yFXzJrT52JGXpnk9%2FSFuFB3KQ8YYhdJoQ%3D"

    # Create a new sample and client
    sample = MultivariateSample(subscription_key, anomaly_detector_endpoint, data_source=None)


    # Train a new model
    model_id = sample.train(datetime(2021, 6, 22, 16, 5, 0), datetime(2021, 6, 24, 12, 21, 0))
    #model_id = sample.train(dt1, dt2)
    print("Model ID: ", model_id)

    # Reference
    result = sample.detect(model_id, datetime(2021, 6, 24, 12, 21, 0), datetime(2021, 6, 24, 18, 4, 0))
    print("Result ID:\t", result.result_id)
    print("Result summary:\t", result.summary)
    print("Result length:\t", len(result.results))

    print("Exporting model....")
    # Export model
    sample.export_model(model_id, "model.zip")

    # Delete model
    #sample.delete_model(model_id)

entering traing.....


Datetime with no tzinfo will be considered UTC.
Datetime with no tzinfo will be considered UTC.


before print model list.....
0 available models before training.
Training new model...


Datetime with no tzinfo will be considered UTC.
Datetime with no tzinfo will be considered UTC.


Done. 
--------------------
1 available models after training.
Model ID:  fc52f8ae-d39d-11eb-91b1-526a6ad7ffd7
Result ID:	 2a86ee88-d39e-11eb-91b1-526a6ad7ffd7
Result summary:	 {'additional_properties': {}, 'status': 'READY', 'errors': [], 'variable_states': [<azure.ai.anomalydetector.models._models_py3.VariableState object at 0x0000026EE6B4E0D0>, <azure.ai.anomalydetector.models._models_py3.VariableState object at 0x0000026EE6BE10A0>], 'setup_info': <azure.ai.anomalydetector.models._models_py3.DetectionRequest object at 0x0000026EE6B4E1F0>}
Result length:	 344
Exporting model....


In [7]:
import requests
import json

In [8]:
ENDPOINT = "adsensor.cognitiveservices.azure.com/anomalydetector/v1.1-preview"
HEADERS = {
    "Ocp-Apim-Subscription-Key": "01031723c0da4ecca2234367f9a5ccb2"
}

API_MODEL = "https://{endpoint}/multivariate/models"
API_MODEL_STATUS = "https://{endpoint}/multivariate/models/{model_id}"
API_MODEL_INFERENCE = "https://{endpoint}/multivariate/models/{model_id}/detect"
API_RESULTS = "https://{endpoint}/multivariate/results/{result_id}"
API_EXPORT = "https://{endpoint}/multivariate/models/{model_id}/export"
API_DELETE = "https://{endpoint}/multivariate/models/{model_id}"

SOURCE_BLOB_SAS = "https://adstorageacct.blob.core.windows.net/sampledata/weather_data.zip?sp=r&st=2021-06-22T20:07:54Z&se=2021-06-30T04:07:54Z&spr=https&sv=2020-02-10&sr=b&sig=a%2FlRQGIP57yFXzJrT52JGXpnk9%2FSFuFB3KQ8YYhdJoQ%3D"

In [9]:
#mid = "25d16144-d398-11eb-8b76-526a6ad7ffd7"
#res = requests.get(API_DELETE.format(endpoint=ENDPOINT, model_id = mid), headers=HEADERS)
#assert res.status_code == 200, f"Error occured. Error message: {res.content}"
#print(res.content)

In [10]:
res = requests.get(API_MODEL.format(endpoint=ENDPOINT), headers=HEADERS)
assert res.status_code == 200, f"Error occured. Error message: {res.content}"
print(res.content)

b'{"models": [{"modelId": "fc52f8ae-d39d-11eb-91b1-526a6ad7ffd7", "createdTime": "2021-06-22T21:08:26Z", "lastUpdatedTime": "2021-06-22T21:09:41Z", "status": "READY", "displayName": "", "variablesCount": 2}], "currentCount": 1, "maxCount": 300, "nextLink": ""}\n'


In [22]:
SLIDING_WINDOW = 200
data = {
    'slidingWindow': SLIDING_WINDOW,
    'alignPolicy': {
        'alignMode': 'Outer',
        'fillNAMethod': 'Linear', 
        'paddingValue': 0
    },
    'source': SOURCE_BLOB_SAS,
    'startTime': '2021-06-22T16:05:00Z', 
    'endTime': '2021-06-24T12:21:00Z', 
    'displayName': 'SampleRequest'
}

res = requests.post(API_MODEL.format(endpoint=ENDPOINT), data=json.dumps(data), headers=HEADERS)
assert res.status_code == 201, f"Error occured. Error message: {res.content}"
print(res.content)
location = res.headers['Location']
print(location)
model_id = location[location.rindex('/')+1:]
print(model_id)

b'"Success"\n'
https://adsensor.cognitiveservices.azure.com:443/anomalydetector/v1.1-preview/multivariate/models/eaf1d31c-d39f-11eb-9a02-2e45e2422c72
eaf1d31c-d39f-11eb-9a02-2e45e2422c72


In [23]:
res = requests.get(API_MODEL_STATUS.format(endpoint=ENDPOINT, model_id = model_id), headers=HEADERS)
assert res.status_code == 200, f"Error occured. Error message: {res.content}"
res_content = json.loads(res.content)
print(json.dumps(res_content))
print(res_content['modelInfo']['status'])

{"modelId": "eaf1d31c-d39f-11eb-9a02-2e45e2422c72", "createdTime": "2021-06-22T21:22:16Z", "lastUpdatedTime": "2021-06-22T21:22:21Z", "modelInfo": {"slidingWindow": 200, "alignPolicy": {"alignMode": "Outer", "fillNAMethod": "Linear", "paddingValue": 0}, "source": "https://adstorageacct.blob.core.windows.net/sampledata/weather_data.zip?sp=r&st=2021-06-22T20:07:54Z&se=2021-06-30T04:07:54Z&spr=https&sv=2020-02-10&sr=b&sig=a%2FlRQGIP57yFXzJrT52JGXpnk9%2FSFuFB3KQ8YYhdJoQ%3D", "startTime": "2021-06-22T16:05:00Z", "endTime": "2021-06-24T12:21:00Z", "displayName": "SampleRequest", "status": "RUNNING", "errors": [], "diagnosticsInfo": {"modelState": {"epochIds": [], "trainLosses": [], "validationLosses": [], "latenciesInSeconds": []}, "variableStates": [{"variable": "temperature", "filledNARatio": 0.0, "effectiveCount": 2657, "startTime": "2021-06-22T16:05:00Z", "endTime": "2021-06-24T12:21:00Z", "errors": []}, {"variable": "humidity", "filledNARatio": 0.0, "effectiveCount": 2657, "startTime": 

In [26]:
data = {
    'source': SOURCE_BLOB_SAS,
    'startTime': '2021-06-22 16:05:00Z',
    'endTime': '2021-06-24 12:21:00Z'
}

res = requests.post(API_MODEL_INFERENCE.format(endpoint=ENDPOINT, model_id=model_id), 
                    data=json.dumps(data), headers=HEADERS)
assert res.status_code == 201, f"Error occured. Error message: {res.content}"
print(res.content)
result_id = res.headers['location'].split("/")[-1]
print(f"result id = {result_id}")

b'"Success"\n'
result id = 20d0acce-d3a0-11eb-9a02-2e45e2422c72


In [27]:
from bokeh.io import output_file, show, output_notebook, save
from bokeh.layouts import gridplot
from bokeh.plotting import figure
from matplotlib import pyplot
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.palettes import Dark2_5 as palette
import pandas as pd
import numpy as np
import os
import itertools  
import shutil
import uuid
import zipfile
from urllib.request import urlretrieve
%matplotlib inline
output_notebook()

def unzip_file(zip_src, dst_dir):
    r = zipfile.is_zipfile(zip_src)
    if r:
        fz = zipfile.ZipFile(zip_src, 'r')
        print(fz)
        for file in fz.namelist():
            fz.extract(file, dst_dir)
    else:
        print('This is not zip')
        
def load_data(local_data_path, start, end):
    new_dir = os.path.join('.', str(uuid.uuid1()))
    shutil.rmtree(new_dir, ignore_errors=True)
    os.mkdir(new_dir)
    unzip_file(local_data_path, new_dir)
    files = os.listdir(new_dir)
    frames = []
    for file in files:
        if file[-4:] != '.csv':
            continue
        frame = pd.read_csv('{}\\{}'.format(new_dir, file))
        var = file[:file.find('.csv')]
        frame = frame.rename(columns={'value': var})
        frame = frame[frame['timestamp'] >= start]
        frame = frame[frame['timestamp'] <= end]
        frame['timestamp'] = pd.to_datetime(frame['timestamp'])
        frame.set_index(['timestamp'], inplace=True)
        frames.append(frame)
    shutil.rmtree(new_dir, ignore_errors=True)
    return frames


def plot_lines_multi(x, y, p, color, name, t_str="hover,save,pan,box_zoom,reset,wheel_zoom", t_loc='above'):
    '''...
    '''
    p.line(x, y, color=color, legend_label=name)

def draw(data_source, local_data_path, result_id, sensitivity, start, end):
    urlretrieve(data_source, local_data_path)
    print(local_data_path, result_id, sensitivity, start, end)
    series = load_data(local_data_path, start, end)
    p_list = []
    colors = itertools.cycle(palette)
    # p_value = figure(background_fill_color="#fafafa", x_axis_type="datetime")
    for var, color in zip(series, colors):
        name = var.columns.values[0]
        p_value = figure(background_fill_color="#fafafa", x_axis_type="datetime")
        plot_lines_multi(var.index, var[name], p_value, color, name)
        p_list.append(p_value)
    header = HEADERS
    raw_result = json.loads(requests.get(API_RESULTS.format(endpoint=ENDPOINT, result_id=result_id), headers=header).content)
    if raw_result['summary']['status'] != 'READY':
        print("result not ready")
        return
    filter_item = list(filter(lambda x: 'value' in x and 'isAnomaly' in x['value'] and x['timestamp'] >= start and x['timestamp'] <= end, raw_result['results']))
    timestamps = [item['timestamp'] for item in filter_item]
    isAnomaly = [item['value']['isAnomaly'] for item in filter_item]
    RawScore = [item['value']['score'] for item in filter_item]
    Severity = [item['value']['severity'] for item in filter_item]
    result = pd.DataFrame({'Timestamp': timestamps, 'isAnomaly': isAnomaly, 'RawScore': RawScore, 'Severity': Severity})
    result.loc[(result.Severity <= (1 - sensitivity)) & (result.isAnomaly == True), 'isAnomaly'] = False
    result['Timestamp'] = pd.to_datetime(result['Timestamp'])
    result.set_index(['Timestamp'], inplace=True)
    result = result.reindex(['isAnomaly', 'RawScore', 'Severity'], axis=1)
    colors = ['red', 'blue', 'black']
    for col, color in zip(result.columns, colors):
        p = figure(background_fill_color="#fafafa", x_axis_type="datetime")
        p.line(result.index, result[col], color=color, alpha=0.8, legend_label=col)
        p_list.append(p)
    grid = gridplot([[x] for x in p_list], sizing_mode='scale_width', plot_height=50)
    
    
    show(grid)
    result = result.sort_values(by=['RawScore'], ascending=False)
    
    #   TESTING
    print("Result size:  ", result.size)
    
    
    top_anomaly = list(result[result.isAnomaly].index.strftime('%Y-%m-%dT%H:%M:%SZ'))[0]
    print("Top Anomaly Timestamp is : {0}".format(top_anomaly))
    
    
      #   TESTING
    top_anomaly_list = list(result[result.isAnomaly])[0]
    print("Anomaly list:  ", top_anomaly_list)    
    
    return series, raw_result, top_anomaly

def show_contribution(series, raw_result, anomaly_timestamp):
    anomaly_result = [x for x in raw_result['results'] if 'contributors' in x['value'] and x['timestamp'] == top_anomaly][0]
    contributors = [x['variable'] for x in anomaly_result['value']['contributors']]
    series_index = pd.DataFrame({'index': list(range(0, len(series))), 'name': [x.columns[0] for x in series]})
    series_index = series_index.set_index('name')
    sorted_series = [series[i][series[i].index <= top_anomaly].tail(SLIDING_WINDOW) for i in series_index.reindex(contributors)['index'].values]
    p_list = []
    colors = itertools.cycle(palette)
    for var, color in zip(sorted_series, colors):
        name = var.columns.values[0]
        p_value = figure(background_fill_color="#fafafa", x_axis_type="datetime")
        plot_lines_multi(var.index, var[name], p_value, color, name)
        p_list.append(p_value)
    grid = gridplot([[x] for x in p_list], sizing_mode='scale_width', plot_height=50)
    show(grid)

In [28]:
print(f"result id = {result_id}")
data_source = SOURCE_BLOB_SAS
local_data_path = "weather_data.zip"
severity = 0.6
start_date = "2021-06-22 16:05:00"
end_date = "2021-06-24 12:21:00"
series, raw_result, top_anomaly = draw(data_source, local_data_path, result_id, severity, start_date, end_date)

result id = 20d0acce-d3a0-11eb-9a02-2e45e2422c72
weather_data.zip 20d0acce-d3a0-11eb-9a02-2e45e2422c72 0.6 2021-06-22 16:05:00 2021-06-24 12:21:00
<zipfile.ZipFile filename='weather_data.zip' mode='r'>


Result size:   5145
Top Anomaly Timestamp is : 2021-06-23T07:13:00Z
Anomaly list:   isAnomaly
