In [None]:
# Visualize anomalies detected by all versions of RePad2 and MIURA
# Plots true anomalies in red on top of the main dataset plot
# Top plot is the main dataset, and the following subplots are the detected anomalies
# Detections are show as binary series, where 1 indicates an anomaly

import pandas as pd
import matplotlib.pyplot as plt
from influxdb_client import InfluxDBClient
from pandas.plotting import register_matplotlib_converters

register_matplotlib_converters()

# InfluxDB settings
influxdb_url = "http://localhost:8086"
token = "random_token"
org = "ORG"
bucket = "system_state"
dataset = "TEMP_org" # Name of the measurements in the bucket
dataset_name = "TEMP"
bucket_ano = "anomalies" # Name of the bucket containing the anomaly markers
dataset_labels = "labels_TEMP_seq" # Name of the measurements in the bucket containing the labels
dataset_results = "repad2-temp-result-minmax" # Could be any of the detection results (Made by EVAL-versions)
start_time = "1997-04-10T00:00:00Z"

measurements = [
#    f"{dataset}"
    f"repad2-{dataset_name.lower()}-detection-minmax",
    f"repad2-{dataset_name.lower()}-detection-robust",
    f"repad2-{dataset_name.lower()}-detection-standard",
    f"miura-{dataset_name.lower()}-detection-minmax",
    f"miura-{dataset_name.lower()}-detection-robust",
    f"miura-{dataset_name.lower()}-detection-standard",
]

names = [
    "RePad2-MinMax",
    "RePad2-Robust",
    "RePad2-Standard",
    "MIURA-MinMax",
    "MIURA-Robust",
    "MIURA-Standard",
]

# Instantiate the InfluxDB client
client = InfluxDBClient(url=influxdb_url, token=token, org=org)
query_api = client.query_api()

# Create a figure with subplots
fig, axs = plt.subplots(len(measurements) + 1, 1, figsize=(12, 5), sharex=True, gridspec_kw = {'wspace':0.15, 'hspace':0.15}, dpi=300)

# Main dataset plot
dataset_query = f'''
from(bucket: "{bucket}")
    |> range(start: time(v: "{start_time}"))
    |> filter(fn: (r) => r["_measurement"] == "{dataset}")
    |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
'''
dataset_df = query_api.query_data_frame(query=dataset_query)
for i, ax in enumerate(axs):
    if i == 0:
        ax.plot(dataset_df['_time'], dataset_df['value'], color='black', label=f'{dataset}')
    else:
        detection_query = f'''
        from(bucket: "{bucket_ano}")
            |> range(start: time(v: "{start_time}"))
            |> filter(fn: (r) => r["_measurement"] == "{measurements[i-1]}")
            |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
        '''

        marker_df = query_api.query_data_frame(query=detection_query)
        binary_series = pd.Series(0, index=dataset_df['_time'])
        binary_series[marker_df['_time']] = 1
        ax.step(binary_series.index, binary_series, where='post', label=names[i-1], color='black')

# Create an overlay axis for drawing the labels
overlay_ax = fig.add_subplot(111, label="overlay", frame_on=False)
overlay_ax.set_xticks([])
overlay_ax.set_yticks([])
overlay_ax.set_xlim(axs[0].get_xlim())
overlay_ax.set_ylim(0, 1)

# Labels query and draw lines across all subplots (overlay)
labels_query = f'''
from(bucket: "{bucket}")
    |> range(start: time(v: "{start_time}"))
    |> filter(fn: (r) => r["_measurement"] == "{dataset_labels}")
    |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
'''
labels_df = query_api.query_data_frame(query=labels_query)

# Result query for grouping the anomalies
result_query = f'''
from(bucket: "{bucket_ano}")
    |> range(start: time(v: "{start_time}"))
    |> filter(fn: (r) => r["_measurement"] == "{dataset_results}")
    |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
'''
result_df = query_api.query_data_frame(query=result_query)

# Merge the labels and results
merged_labels = pd.merge(result_df, labels_df, on='_time', how='inner')
# Create groups based on neighbouring T-values
merged_labels['group'] = (merged_labels['T'].diff() != 1).cumsum()

# Draw the anomalies on the overlay axis
if not merged_labels.empty:
    for group, group_df in merged_labels.groupby('group'):
        start = group_df['_time'].iloc[0]
        end = group_df['_time'].iloc[-1]
        overlay_ax.axvspan(start, end, color='red', alpha=0.45, lw=2)

# Configure plot settings
for ax in axs:
    leg = ax.legend(loc='lower right', fontsize='small', handlelength=0, handletextpad=0)
    leg.legend_handles[0].set_visible(False)
    ax.grid(True)

axs[-1].set_xlabel('Timestamp')
plt.grid(True)
plt.tight_layout() # Avoids overlapping labels with legend, 

# Save the plot as a PDF file
plt.savefig(f'{dataset_labels}.pdf', format='pdf', bbox_inches='tight')

plt.show()


In [None]:
# Older and alternative way of plotting the data as script above
import pandas as pd
import matplotlib.pyplot as plt
from influxdb_client import InfluxDBClient
from pandas.plotting import register_matplotlib_converters

register_matplotlib_converters()

# Settings
influxdb_url = "http://localhost:8086"
token = "random_token"
org = "ORG"
bucket = "system_state"
dataset = "CC2"
bucket_ano = "anomalies"
dataset_labels = "labels_CC2"
start_time = "1997-04-10T00:00:00Z"

measurements = [
    "repad2-cc2-detection-minmax",
    "repad2-cc2-detection-robust",
    "repad2-cc2-detection-standard",
    "miura-cc2-detection-minmax",
    "miura-cc2-detection-robust",
    "miura-cc2-detection-standard",
]

# Instantiate the InfluxDB client
client = InfluxDBClient(url=influxdb_url, token=token, org=org)
query_api = client.query_api()

# Main dataset plot
dataset_query = f'''
from(bucket: "{bucket}")
    |> range(start: time(v: "{start_time}"))
    |> filter(fn: (r) => r["_measurement"] == "{dataset}")
    |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
'''
dataset_df = query_api.query_data_frame(query=dataset_query)

# Create a figure with subplots
fig, axs = plt.subplots(len(measurements) + 1, 1, figsize=(12, 8), sharex=True)
axs[0].plot(dataset_df['_time'], dataset_df['value'], label='Value', color='black')

# Overlay subplot for labels
#fig.subplots_adjust(hspace=0)  # Adjust horizontal space to minimize gaps between subplots
overlay_ax = fig.add_subplot(111, label="overlay", zorder=-1)
overlay_ax.set_frame_on(False)  # Turn off frame to not cover other plots
overlay_ax.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)

# Plot each measurement as binary series
for i, measurement in enumerate(measurements, start=1):
    detection_query = f'''
    from(bucket: "{bucket_ano}")
        |> range(start: time(v: "{start_time}"))
        |> filter(fn: (r) => r["_measurement"] == "{measurement}")
        |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
    '''
    marker_df = query_api.query_data_frame(query=detection_query)
    # Create a series of zeros
    binary_series = pd.Series(0, index=dataset_df['_time'])
    # Set detected anomalies to 1
    binary_series[marker_df['_time']] = 1
    axs[i].step(binary_series.index, binary_series, where='post', label=measurement, color='black')

 # Labels query and draw lines across all subplots
labels_query = f'''
from(bucket: "{bucket}")
    |> range(start: time(v: "{start_time}"))
    |> filter(fn: (r) => r["_measurement"] == "{dataset_labels}")
    |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
'''
labels_df = query_api.query_data_frame(query=labels_query)
# Plot labels across all subplots
if not labels_df.empty:
    for timestamp in labels_df['_time']:
        for ax in axs:
            ax.axvline(x=timestamp, color='red', linestyle=':', linewidth=2, clip_on=False, label='Label' if 'Label' not in ax.get_legend_handles_labels()[1] else "") 

# Configure plot settings
for ax in axs:
    ax.legend()
    ax.grid(True)

#plt.legend()
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.xticks(rotation=45, fontsize=8)
plt.gca().margins(x=0)
plt.grid(True)
plt.tight_layout()
plt.show()
