In [None]:
#!/usr/bin/env python
# coding: utf-8

from azure.cognitiveservices.anomalydetector import AnomalyDetectorClient
from azure.cognitiveservices.anomalydetector.models import Request, Point, Granularity,     APIErrorException
from msrest.authentication import CognitiveServicesCredentials
import pandas as pd
import numpy as np
import os
import time
import pickle
import glob
from datetime import datetime, timezone, timedelta
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
import dateutil.parser


SUBSCRIPTION_KEY = 'USE YOUR OWN KEY'
ANOMALY_DETECTOR_ENDPOINT = 'USE YOUR OWN END POINT'

client = AnomalyDetectorClient(ANOMALY_DETECTOR_ENDPOINT, CognitiveServicesCredentials(SUBSCRIPTION_KEY))


SMAP = ('P-1', 'S-1', 'E-1', 'E-2', 'E-3', 'E-4', 'E-5', 'E-6', 'E-7','E-8', 'E-9', 'E-10', 'E-11', 'E-12', 'E-13', 'A-1', 'D-1', 'P-3','D-2', 'D-3', 'D-4', 'A-2', 'A-3', 'A-4', 'G-1', 'G-2', 'D-5','D-6', 'D-7', 'F-1', 'P-4', 'G-3', 'T-1', 'T-2', 'D-8', 'D-9','F-2', 'G-4', 'T-3', 'D-11', 'D-12', 'B-1', 'G-6', 'G-7', 'P-7', 'R-1', 'A-5', 'A-6', 'A-7', 'D-13', 'A-8', 'A-9', 'F-3')
MSL = ('M-6', 'M-1', 'M-2', 'S-2', 'P-10', 'T-4', 'T-5', 'F-7', 'M-3', 'M-4', 'M-5', 'P-15', 'C-1', 'C-2', 'T-12', 'T-13', 'F-4', 'F-5', 'D-14','T-9', 'P-14', 'T-8', 'P-11', 'D-15', 'D-16', 'M-7', 'F-8')

YAHOOA1_SIGNALS = ('real_59', 'real_65', 'real_64', 'real_58', 'real_66', 'real_67', 'real_63', 'real_62', 'real_60', 'real_48', 'real_49', 'real_61', 'real_12', 'real_5', 'real_4', 'real_13', 'real_39', 'real_11', 'real_6', 'real_7', 'real_10', 'real_38', 'real_14', 'real_28', 'real_3', 'real_2', 'real_29', 'real_15', 'real_17', 'real_1', 'real_16', 'real_33', 'real_27', 'real_26', 'real_32', 'real_18', 'real_24', 'real_30', 'real_31', 'real_25', 'real_19', 'real_21', 'real_35', 'real_34', 'real_20', 'real_36', 'real_22', 'real_9', 'real_8', 'real_23', 'real_37', 'real_50', 'real_44', 'real_45', 'real_51', 'real_47', 'real_53', 'real_52', 'real_46', 'real_42', 'real_56', 'real_57', 'real_43', 'real_55', 'real_41', 'real_40', 'real_54')
YAHOOA2_SIGNALS = ('synthetic_85','synthetic_91','synthetic_46','synthetic_52','synthetic_53','synthetic_47','synthetic_90','synthetic_84','synthetic_92','synthetic_86','synthetic_51','synthetic_45','synthetic_79','synthetic_78','synthetic_100','synthetic_44','synthetic_50','synthetic_87','synthetic_93','synthetic_97','synthetic_83','synthetic_68','synthetic_54','synthetic_40','synthetic_41','synthetic_55','synthetic_69','synthetic_82','synthetic_96','synthetic_80','synthetic_94','synthetic_43','synthetic_57','synthetic_56','synthetic_42','synthetic_95','synthetic_81','synthetic_25','synthetic_31','synthetic_19','synthetic_18','synthetic_30','synthetic_24','synthetic_32','synthetic_26','synthetic_27','synthetic_33','synthetic_8','synthetic_37','synthetic_23','synthetic_22','synthetic_36','synthetic_9','synthetic_20','synthetic_34','synthetic_35','synthetic_21','synthetic_7','synthetic_10','synthetic_38','synthetic_39','synthetic_11','synthetic_6','synthetic_4','synthetic_13','synthetic_12','synthetic_5','synthetic_1','synthetic_16','synthetic_17','synthetic_2','synthetic_29','synthetic_15','synthetic_14','synthetic_28','synthetic_3','synthetic_98','synthetic_67','synthetic_73','synthetic_72','synthetic_66','synthetic_99','synthetic_70','synthetic_64','synthetic_58','synthetic_59','synthetic_65','synthetic_71','synthetic_49','synthetic_75','synthetic_61','synthetic_60','synthetic_74','synthetic_48','synthetic_89','synthetic_62','synthetic_76','synthetic_77','synthetic_63','synthetic_88')
YAHOOA3_SIGNALS = ('A3Benchmark-TS12', 'A3Benchmark-TS13', 'A3Benchmark-TS11', 'A3Benchmark-TS39', 'A3Benchmark-TS38', 'A3Benchmark-TS10', 'A3Benchmark-TS28', 'A3Benchmark-TS14', 'A3Benchmark-TS15', 'A3Benchmark-TS29', 'A3Benchmark-TS17', 'A3Benchmark-TS16', 'A3Benchmark-TS65', 'A3Benchmark-TS71', 'A3Benchmark-TS59', 'A3Benchmark-TS5', 'A3Benchmark-TS4', 'A3Benchmark-TS58', 'A3Benchmark-TS70', 'A3Benchmark-TS64', 'A3Benchmark-TS72', 'A3Benchmark-TS66', 'A3Benchmark-TS6', 'A3Benchmark-TS99', 'A3Benchmark-TS98', 'A3Benchmark-TS7', 'A3Benchmark-TS67', 'A3Benchmark-TS73', 'A3Benchmark-TS77', 'A3Benchmark-TS63', 'A3Benchmark-TS88', 'A3Benchmark-TS3', 'A3Benchmark-TS2', 'A3Benchmark-TS89', 'A3Benchmark-TS62', 'A3Benchmark-TS76', 'A3Benchmark-TS48', 'A3Benchmark-TS60', 'A3Benchmark-TS74', 'A3Benchmark-TS1', 'A3Benchmark-TS75', 'A3Benchmark-TS61', 'A3Benchmark-TS49', 'A3Benchmark-TS44', 'A3Benchmark-TS50', 'A3Benchmark-TS78', 'A3Benchmark-TS87', 'A3Benchmark-TS93', 'A3Benchmark-TS92', 'A3Benchmark-TS86', 'A3Benchmark-TS79', 'A3Benchmark-TS51', 'A3Benchmark-TS45', 'A3Benchmark-TS53', 'A3Benchmark-TS47', 'A3Benchmark-TS90', 'A3Benchmark-TS84', 'A3Benchmark-TS85', 'A3Benchmark-TS91', 'A3Benchmark-TS46', 'A3Benchmark-TS52', 'A3Benchmark-TS56', 'A3Benchmark-TS42', 'A3Benchmark-TS95', 'A3Benchmark-TS81', 'A3Benchmark-TS80', 'A3Benchmark-TS94', 'A3Benchmark-TS43', 'A3Benchmark-TS57', 'A3Benchmark-TS69', 'A3Benchmark-TS41', 'A3Benchmark-TS55', 'A3Benchmark-TS9', 'A3Benchmark-TS82', 'A3Benchmark-TS96', 'A3Benchmark-TS97', 'A3Benchmark-TS83', 'A3Benchmark-TS8', 'A3Benchmark-TS54', 'A3Benchmark-TS40', 'A3Benchmark-TS68', 'A3Benchmark-TS100', 'A3Benchmark-TS27', 'A3Benchmark-TS33', 'A3Benchmark-TS32', 'A3Benchmark-TS26', 'A3Benchmark-TS30', 'A3Benchmark-TS24', 'A3Benchmark-TS18', 'A3Benchmark-TS19', 'A3Benchmark-TS25', 'A3Benchmark-TS31', 'A3Benchmark-TS35', 'A3Benchmark-TS21', 'A3Benchmark-TS20', 'A3Benchmark-TS34', 'A3Benchmark-TS22', 'A3Benchmark-TS36', 'A3Benchmark-TS37', 'A3Benchmark-TS23')
YAHOOA4_SIGNALS = ('A4Benchmark-TS99', 'A4Benchmark-TS72', 'A4Benchmark-TS66', 'A4Benchmark-TS67', 'A4Benchmark-TS73', 'A4Benchmark-TS98', 'A4Benchmark-TS65', 'A4Benchmark-TS71', 'A4Benchmark-TS59', 'A4Benchmark-TS58', 'A4Benchmark-TS70', 'A4Benchmark-TS64', 'A4Benchmark-TS48', 'A4Benchmark-TS60', 'A4Benchmark-TS74', 'A4Benchmark-TS75', 'A4Benchmark-TS61', 'A4Benchmark-TS49', 'A4Benchmark-TS88', 'A4Benchmark-TS77', 'A4Benchmark-TS63', 'A4Benchmark-TS62', 'A4Benchmark-TS76', 'A4Benchmark-TS89', 'A4Benchmark-TS11', 'A4Benchmark-TS39', 'A4Benchmark-TS38', 'A4Benchmark-TS10', 'A4Benchmark-TS8', 'A4Benchmark-TS12', 'A4Benchmark-TS13', 'A4Benchmark-TS9', 'A4Benchmark-TS100', 'A4Benchmark-TS17', 'A4Benchmark-TS16', 'A4Benchmark-TS28', 'A4Benchmark-TS14', 'A4Benchmark-TS15', 'A4Benchmark-TS29', 'A4Benchmark-TS2', 'A4Benchmark-TS30', 'A4Benchmark-TS24', 'A4Benchmark-TS18', 'A4Benchmark-TS19', 'A4Benchmark-TS25', 'A4Benchmark-TS31', 'A4Benchmark-TS3', 'A4Benchmark-TS1', 'A4Benchmark-TS27', 'A4Benchmark-TS33', 'A4Benchmark-TS32', 'A4Benchmark-TS26', 'A4Benchmark-TS4', 'A4Benchmark-TS22', 'A4Benchmark-TS36', 'A4Benchmark-TS37', 'A4Benchmark-TS23', 'A4Benchmark-TS5', 'A4Benchmark-TS7', 'A4Benchmark-TS35', 'A4Benchmark-TS21', 'A4Benchmark-TS20', 'A4Benchmark-TS34', 'A4Benchmark-TS6', 'A4Benchmark-TS90', 'A4Benchmark-TS84', 'A4Benchmark-TS53', 'A4Benchmark-TS47', 'A4Benchmark-TS46', 'A4Benchmark-TS52', 'A4Benchmark-TS85', 'A4Benchmark-TS91', 'A4Benchmark-TS87', 'A4Benchmark-TS93', 'A4Benchmark-TS44', 'A4Benchmark-TS50', 'A4Benchmark-TS78', 'A4Benchmark-TS79', 'A4Benchmark-TS51', 'A4Benchmark-TS45', 'A4Benchmark-TS92', 'A4Benchmark-TS86', 'A4Benchmark-TS82', 'A4Benchmark-TS96', 'A4Benchmark-TS69', 'A4Benchmark-TS41', 'A4Benchmark-TS55', 'A4Benchmark-TS54', 'A4Benchmark-TS40', 'A4Benchmark-TS68', 'A4Benchmark-TS97', 'A4Benchmark-TS83', 'A4Benchmark-TS95', 'A4Benchmark-TS81', 'A4Benchmark-TS56', 'A4Benchmark-TS42', 'A4Benchmark-TS43', 'A4Benchmark-TS57', 'A4Benchmark-TS80', 'A4Benchmark-TS94')


ECG_SIGNALS = ('ECG123','ECG122','ECG108','ECG121','ECG109','ECG119','ECG118','ECG124','ECG222','ECG223','ECG209','ECG221','ECG220','ECG234','ECG208','ECG230','ECG219','ECG231','ECG233','ECG232','ECG203','ECG217','ECG202','ECG228','ECG214','ECG200','ECG201','ECG215','ECG205','ECG210','ECG212','ECG213','ECG207','ECG116','ECG102','ECG103','ECG117','ECG101','ECG115','ECG114','ECG100','ECG104','ECG111','ECG105','ECG113','ECG107','ECG106','ECG112')
SYNTH_SIGNALS = ('synthetic_custom3','synthetic_custom4','synthetic_custom25','synthetic_custom26','synthetic_custom27','synthetic_custom28','synthetic_custom29','synthetic_custom30','synthetic_custom5','synthetic_custom6','synthetic_custom7','synthetic_custom8','synthetic_custom9','synthetic_custom10','synthetic_custom11','synthetic_custom12','synthetic_custom13','synthetic_custom14','synthetic_custom15','synthetic_custom16','synthetic_custom17','synthetic_custom18','synthetic_custom19','synthetic_custom20','synthetic_custom21','synthetic_custom22','synthetic_custom23','synthetic_custom24')

artificialWithAnomaly = ('art_daily_flatmiddle', 'art_daily_jumpsdown', 'art_daily_jumpsup', 'art_daily_nojump', 'art_increase_spike_density', 'art_load_balancer_spikes')
realAdExchange = ('exchange-2_cpc_results', 'exchange-2_cpm_results', 'exchange-3_cpc_results', 'exchange-3_cpm_results', 'exchange-4_cpm_results')
realAWSCloudwatch = ('ec2_cpu_utilization_5f5533', 'ec2_cpu_utilization_24ae8d', 'ec2_cpu_utilization_53ea38', 'ec2_cpu_utilization_77c1ca', 'ec2_cpu_utilization_825cc2', 'ec2_cpu_utilization_ac20cd', 'ec2_cpu_utilization_c6585a', 'ec2_cpu_utilization_fe7f93', 'ec2_disk_write_bytes_1ef3de', 'ec2_disk_write_bytes_c0d644', 'ec2_network_in_5abac7', 'ec2_network_in_257a54', 'elb_request_count_8c0756', 'grok_asg_anomaly', 'iio_us-east-1_i-a2eb1cd9_NetworkIn', 'rds_cpu_utilization_cc0c53', 'rds_cpu_utilization_e47b3b')
realKnownCause = ('ambient_temperature_system_failure', 'cpu_utilization_asg_misconfiguration', 'ec2_request_latency_system_failure', 'machine_temperature_system_failure', 'nyc_taxi', 'rogue_agent_key_hold', 'rogue_agent_key_updown')
realTraffic = ('occupancy_6005', 'occupancy_t4013', 'speed_6005', 'speed_7578', 'speed_t4013', 'TravelTime_387', 'TravelTime_451')
realTweets = ('Twitter_volume_AAPL', 'Twitter_volume_AMZN', 'Twitter_volume_CRM', 'Twitter_volume_CVS', 'Twitter_volume_FB', 'Twitter_volume_GOOG', 'Twitter_volume_IBM', 'Twitter_volume_KO', 'Twitter_volume_PFE', 'Twitter_volume_UPS')

NASA = [SMAP, MSL]
NASA_NAME = ['SMAP', 'MSL']

YAHOO = [YAHOOA1_SIGNALS, YAHOOA2_SIGNALS, YAHOOA3_SIGNALS, YAHOOA4_SIGNALS]
YAHOO_NAME = ['YAHOOA1', 'YAHOOA2', 'YAHOOA3', 'YAHOOA4']

NAB = [artificialWithAnomaly, realAdExchange, realAWSCloudwatch, realKnownCause, realTraffic, realTweets]
NAB_NAME = ['artificialWithAnomaly', 'realAdExchange', 'realAWSCloudwatch', 'realKnownCause', 'realTraffic', 'realTweets']

ALL = NASA + YAHOO + NAB
ALL_NAME = NASA_NAME + YAHOO_NAME + NAB_NAME


def time_aggregate(X, agg):
    time_column = 'timestamp'
    interval = agg # duration not steps.
    X = X.sort_values(time_column).set_index(time_column)
    
    start_ts = X.index.values[0]
    max_ts = X.index.values[-1]

    values = list()
    index = list()
    while start_ts <= max_ts:
        end_ts = start_ts + interval
        subset = X.loc[start_ts:end_ts - 1]
        aggregated = np.nanmean(subset)
        values.append(aggregated)
        index.append(start_ts)
        start_ts = end_ts

    return pd.DataFrame({'timestamp': np.asarray(index), 'value': np.asarray(values)})

def generate_timestamps(X):
    time_column = 'timestamp'
    X = X.sort_values(time_column)
    
    interval = 3600
    
    X['timestamp'] = [interval * i for i in range(len(X))]
    return X

def min_max_scale(X):
    scaler = MinMaxScaler(feature_range=(-1, 1))
    return scaler.fit_transform(X)

def impute(X):
    imp = SimpleImputer()
    return imp.fit_transform(X)

def convert_date(x):
    return datetime.fromtimestamp(x).strftime("%Y-%m-%dT%H:%M:%S.%fZ")


def get_azure_detection(series, gran, cust_inter):
    request = Request(series=series, granularity=gran, custom_interval=cust_inter)

    print('Detecting anomalies in {} the entire time series.'.format(signal))

    response = None
    try:
        response = client.entire_detect(request)
    except Exception as e:
        if isinstance(e, APIErrorException):
            print('Error code: {}'.format(e.error.code),
                'Error message: {}'.format(e.error.message))
        else:
            print(e)

    if response:
        anom = np.array(response.is_anomaly)
        if True in response.is_anomaly:
            length = len(anom[anom == True])
            print('{} anomalies were detected in time series {}.'.format(length, signal+'_'+str(j)))
        else:
            print('No anomalies were detected in the time series.')
    else:
        print('No anomalies were detected in time series {}.'.format(signal+'_'+str(j)))
        anom = [False] * len(series)
        
        
    return anom

if __name__ == "__main__":
    
    ## settings

    data = NAB
    data_name = NAB_NAME
    agg = 600
    gran = Granularity.minutely
    cust_inter = 10


    url = "https://d3-ai-orion.s3.amazonaws.com/"

    for i, dataset in enumerate(data):
        for signal in dataset:
            df = pd.read_csv(url + signal + '.csv')
            df = time_aggregate(df, agg)
            df['timestamp'] = df['timestamp'].apply(convert_date)
            df['value'] = min_max_scale(np.array(df['value']).reshape(-1, 1))
            df['value'] = impute(np.array(df['value']).reshape(-1, 1))

            series = []
            for index, row in df.iterrows():
                series.append(Point(timestamp=row['timestamp'], value=row['value']))
            all_anomaly = get_azure_detection(series, gran, cust_inter)

            now = datetime.now()
            current_time = now.strftime("%Y%m%d_%H%M%S")
            file = open('Azure/' + data_name[i] + '/' + signal + '.pkl', 'wb')
            pickle.dump(all_anomaly, file)
            file.close()
            time.sleep(10) # for request to not timeout
            print()

