In [None]:
from common.data_frame_columns import TIMESTAMP
from common.data_frame_columns import PM10, PM2_5, PM1
from common.date_time_helper import convert_to_datetime
from common.endpoints_urls import endpoints_config
from data_management.data_crawler import DataManager
from data_management.labeled_data_generator import LabeledDataGenerator, DataLabel

In [None]:
date_strings = ['01.01.2021 00:00', '31.12.2023 23:59']
test_dates_string = ['01.01.2024 00:00', '30.04.2024 23:59']

training_dates = [convert_to_datetime(date_strings[0]), convert_to_datetime(date_strings[1])]
test_dates = [convert_to_datetime(test_dates_string[0]), convert_to_datetime(test_dates_string[1])]

datas = DataManager(True).get_all_endpoints_data(endpoints_config, update=False)

column = PM10
L = LabeledDataGenerator(column)

test_data = L.generate_labeled_data(datas[:1], test_dates[0], test_dates[1], 50, 60)
# single_data = L.generate_labeled_data(datas[:1], training_dates[0], training_dates[1], 40)
# multi_data = L.generate_labeled_data(datas, training_dates[0], training_dates[1], 40)

In [None]:
from collections import Counter
from detectors.z_score_detector import ZScoreDetector
import pandas as pd

column = PM10
thresholds = [1.0, 1.5, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]


def debug_print(threshold, correct_anomaly, anomaly, normal, wrong_predicted):
    print(
        f'    [Threshold: {threshold}] Anomaly: {correct_anomaly}/{anomaly} ({round(correct_anomaly / anomaly * 100, 2)}) | Normal {round(correct_normal / normal * 100, 2)} | {round((correct_anomaly + correct_normal) / (anomaly + normal) * 100, 2)}%')

    # counted_values = Counter(wrong_predicted)
    # # Print the counts
    # for value, count in counted_values.items():
    #     print(f"{value}: {count}")


final_results = []

#single sensor detection
name = 'MAD_sensor_level'

zscore = ZScoreDetector()
wrong_predicted = []
mid_results = []
print(f'{name}:')
for threshold in thresholds:
    correct_anomaly = 0
    correct_normal = 0
    anomaly = 0
    normal = 0
    for dataframe, label in test_data:
        if not dataframe.empty:
            start_time = dataframe[TIMESTAMP].min()
            end_time = dataframe[TIMESTAMP].max()
            outliers = ZScoreDetector().detect_by_mad(dataframe, column, start_time, end_time, threshold)
            if label.value > 0:
                anomaly += 1
                if (len(outliers) > 5):
                    correct_anomaly += 1
                else:
                    wrong_predicted.append(label)
            if label.value == 0:
                normal += 1
                if (len(outliers) < 5):
                    correct_normal += 1

    debug_print(threshold, correct_anomaly, anomaly, normal, wrong_predicted)
    mid_results.append([name, threshold, round((correct_anomaly + correct_normal) / (anomaly + normal) * 100, 2)])

final_results.append(mid_results)

wrong_predicted = []
name = 'AVG_sensor_level'
print(f"{name}:")
mid_results = []
for threshold in thresholds:
    correct_anomaly = 0
    correct_normal = 0
    anomaly = 0
    normal = 0
    for dataframe, label in test_data:
        if not dataframe.empty:
            start_time = dataframe[TIMESTAMP].min()
            end_time = dataframe[TIMESTAMP].max()
            outliers = ZScoreDetector().detect_by_avg(dataframe, column, start_time, end_time, threshold)
            if label.value > 0:
                anomaly += 1
                if (len(outliers) > 5):
                    correct_anomaly += 1
                else:
                    wrong_predicted.append(label)
            if label.value == 0:
                normal += 1
                if (len(outliers) < 5):
                    correct_normal += 1

    debug_print(threshold, correct_anomaly, anomaly, normal, wrong_predicted)
    mid_results.append([name, threshold, round((correct_anomaly + correct_normal) / (anomaly + normal) * 100, 2)])

final_results.append(mid_results)

name = 'MAD_network_level'
wrong_predicted = []
mid_results = []
print(f"{name}:")
for threshold in thresholds:
    correct_anomaly = 0
    correct_normal = 0
    anomaly = 0
    normal = 0
    for dataframe, label in test_data:
        if not dataframe.empty:
            start_time = dataframe[TIMESTAMP].min()
            end_time = dataframe[TIMESTAMP].max()
            outliers = ZScoreDetector().detect_by_mad_network_level(datas, dataframe, column, start_time, end_time,
                                                                    threshold)
            if label.value > 0:
                anomaly += 1
                if (len(outliers) > 5):
                    correct_anomaly += 1
                else:
                    wrong_predicted.append(label)
            if label.value == 0:
                normal += 1
                if (len(outliers) < 5):
                    correct_normal += 1

    debug_print(threshold, correct_anomaly, anomaly, normal, wrong_predicted)
    mid_results.append([name, threshold, round((correct_anomaly + correct_normal) / (anomaly + normal) * 100, 2)])

final_results.append(mid_results)

wrong_predicted = []
name = 'AVG_network_level'
mid_results = []
print(f"{name}")
for threshold in thresholds:
    correct_anomaly = 0
    correct_normal = 0
    anomaly = 0
    normal = 0
    for dataframe, label in test_data:
        if not dataframe.empty:
            start_time = dataframe[TIMESTAMP].min()
            end_time = dataframe[TIMESTAMP].max()
            outliers = ZScoreDetector().detect_by_avg_network_level(datas, dataframe, column, start_time, end_time,
                                                                    threshold)
            if label.value > 0:
                anomaly += 1
                if (len(outliers) > 5):
                    correct_anomaly += 1
                else:
                    wrong_predicted.append(label)
            if label.value == 0:
                normal += 1
                if (len(outliers) < 5):
                    correct_normal += 1

    debug_print(threshold, correct_anomaly, anomaly, normal, wrong_predicted)
    mid_results.append([name, threshold, round((correct_anomaly + correct_normal) / (anomaly + normal) * 100, 2)])

final_results.append(mid_results)





In [None]:
from matplotlib import pyplot as plt


def draw_results(final_results, title):
    print(f"Plotting {title}")
    plt.figure(figsize=(10, 8))

    for result in final_results:
        name = result[0][0]
        plt.plot([x[1] for x in result], [x[2] for x in result], label=name)

    plt.xlabel('Threshold')
    plt.ylabel('Accuracy [%]')
    plt.legend()
    plt.title(title)
    plt.grid(True)
    plt.ylim(0, 101)
    plt.show()


draw_results(final_results, "Accuracy (ZScore)")


In [None]:
from detectors.pseudo_periodic import PseudoPeriodicDetector
import pandas as pd

column = PM10
sensor_data = datas[0]
thresholds = [1.0, 1.5, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ,12]
windows = [1,2,3,4,5,6,7,8,9,10,11,12]
history_max = 15
final_results = []

#single sensor detection


def debug_print(threshold, correct_anomaly, anomaly, normal, history, window, zscore, level):
    # print(
    #     f'    [Threshold: {threshold}][History: {history}][Window: {window}] Anomaly: {correct_anomaly}/{anomaly} ({round(correct_anomaly / anomaly * 100, 2)}) | Normal {round(correct_normal / normal * 100, 2)} | {round((correct_anomaly + correct_normal) / (anomaly + normal) * 100, 2)}%')
    acc = round((correct_anomaly + correct_normal) / (anomaly + normal) * 100, 2)
    if acc > 70.:
        print(f' {zscore} & {level} & {threshold} & {history} & {window} & {acc} \\ ')
        print('\hline')

    # counted_values = Counter(wrong_predicted)
    # # Print the counts
    # for value, count in counted_values.items():
    #     print(f"{value}: {count}")



name = 'PERIODIC_MAD sensor level'
zscore = "MAD"
lvl = "Sensor"
print(f"{name}:")
for window in windows:
    for history in range(1, history_max):
        for threshold in thresholds:
            data_length = 0
            correct_anomaly = 0
            correct_normal = 0
            anomaly = 0
            normal = 0
            for dataframe, label in test_data:
                if not dataframe.empty:
                    data_length += 1
                    start_time = dataframe[TIMESTAMP].min()
                    end_time = dataframe[TIMESTAMP].max()
                    destroyed = sensor_data.copy()
                    destroyed.name = sensor_data.name
                    destroyed.update(dataframe)
                    outliers = PseudoPeriodicDetector().detect_by_periodic_mad(destroyed, column, start_time, end_time,
                                                                               time_step_in_hours=4,
                                                                               threshold=threshold,
                                                                               max_depth_in_days=history)
                    if label.value > 0:
                        anomaly += 1
                        if (len(outliers) > 5):
                            correct_anomaly += 1
                    if label.value == 0:
                        normal += 1
                        if (len(outliers) < 5):
                            correct_normal += 1
    
            debug_print(threshold, correct_anomaly, anomaly, normal, history, window, zscore, lvl)
        print()

name = 'PERIODIC_AVG sensor level'
zscore = "AVG"
lvl = "Sensor"
print(f"{name}:")
for window in windows:
    for history in range(1, history_max):
        for threshold in thresholds:
            data_length = 0
            correct_anomaly = 0
            correct_normal = 0
            anomaly = 0
            normal = 0
    
            for dataframe, label in test_data:
                if not dataframe.empty:
                    data_length += 1
                    start_time = dataframe[TIMESTAMP].min()
                    end_time = dataframe[TIMESTAMP].max()
                    destroyed = sensor_data.copy()
                    destroyed.name = sensor_data.name
                    destroyed.update(dataframe)
                    outliers = PseudoPeriodicDetector().detect_by_periodic_avg(destroyed, column, start_time, end_time,
                                                                                time_step_in_hours=4,
                                                                               threshold=threshold,
                                                                               max_depth_in_days=history)
                    if label.value > 0:
                        anomaly += 1
                        if (len(outliers) > 5):
                            correct_anomaly += 1
                    if label.value == 0:
                        normal += 1
                        if (len(outliers) < 5):
                            correct_normal += 1
    
            debug_print(threshold, correct_anomaly, anomaly, normal, history, window, zscore, lvl)
        print()

name = 'PERIODIC_MAD network level'
zscore = "MAD"
lvl = "Network"
print(f"{name}:")
for window in windows:
    for history in range(1, history_max):
        for threshold in thresholds:
            data_length = 0
            correct_anomaly = 0
            correct_normal = 0
            anomaly = 0
            normal = 0
            for dataframe, label in test_data:
                if not dataframe.empty:
                    data_length += 1
                    start_time = dataframe[TIMESTAMP].min()
                    end_time = dataframe[TIMESTAMP].max()
                    destroyed = sensor_data.copy()
                    destroyed.name = sensor_data.name
                    destroyed.update(dataframe)
                    outliers = PseudoPeriodicDetector().detect_by_periodic_mad_network_level(datas, destroyed, column,
                                                                                             start_time, end_time,
                                                                                              time_step_in_hours=4,
                                                                                             threshold=threshold,
                                                                                             max_depth_in_days=history)
                    if label.value > 0:
                        anomaly += 1
                        if (len(outliers) > 5):
                            correct_anomaly += 1
                    if label.value == 0:
                        normal += 1
                        if (len(outliers) < 5):
                            correct_normal += 1
    
            debug_print(threshold, correct_anomaly, anomaly, normal, history, window, zscore, lvl)
        print()

name = 'PERIODIC_AVG network level'
zscore = "AVG"
lvl = "Network"
print(f"{name}:")
for window in windows:
    for history in range(1, history_max):
        for threshold in thresholds:
            data_length = 0
            correct_anomaly = 0
            correct_normal = 0
            anomaly = 0
            normal = 0
            for dataframe, label in test_data:
                if not dataframe.empty:
                    data_length += 1
                    start_time = dataframe[TIMESTAMP].min()
                    end_time = dataframe[TIMESTAMP].max()
                    destroyed = sensor_data.copy()
                    destroyed.name = sensor_data.name
                    destroyed.update(dataframe)
                    outliers = PseudoPeriodicDetector().detect_by_periodic_avg_network_level(datas, destroyed, column,
                                                                                             start_time, end_time,
                                                                                             time_step_in_hours=4,
                                                                                             threshold=threshold,
                                                                                             max_depth_in_days=history)
                    if label.value > 0:
                        anomaly += 1
                        if (len(outliers) > 5):
                            correct_anomaly += 1
                    if label.value == 0:
                        normal += 1
                        if (len(outliers) < 5):
                            correct_normal += 1
    
            debug_print(threshold, correct_anomaly, anomaly, normal, history, window, zscore, lvl)
        print()


