In [None]:
from influxdb_client import InfluxDBClient
import pandas as pd

# Configuration
influxdb_url = "http://localhost:8086"
token = "random_token"
org = "ORG"
bucket = "system_state"
labels = "labels_CC2-seq"
bucket_ano = "anomalies"
dataset = "base_result-CC2-MinMax"
detections = "base_detection-CC2-MinMax"
margin = 7  # Margin of T values

# Initialize client
client = InfluxDBClient(url=influxdb_url, token=token, org=org)
query_api = client.query_api()

start_time = "1997-04-10T00:00:00Z"

# Construct and fetch data
queries = {
    "labels": f'''
        from(bucket: "{bucket}")
        |> range(start: time(v: "{start_time}"))
        |> filter(fn: (r) => r["_measurement"] == "{labels}")
        |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
    ''',
    "dataset": f'''
        from(bucket: "{bucket_ano}")
        |> range(start: time(v: "{start_time}"))
        |> filter(fn: (r) => r["_measurement"] == "{dataset}")
        |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
    ''',
    "detections": f'''
        from(bucket: "{bucket_ano}")
        |> range(start: time(v: "{start_time}"))
        |> filter(fn: (r) => r["_measurement"] == "{detections}")
        |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")
    '''
}
# Query data and store in dictionary
results = {key: query_api.query_data_frame(query=queries[key]) for key in queries}

# Check if any dataset is empty
if any(df.empty for df in results.values()):
    print("One or more datasets are empty. Check data and queries.")
else:
    # Prepare datasets
    for key, df in results.items():
        df['timestamp'] = pd.to_datetime(df['_time'])

    # Align T values with labels and sort by T (Time)
    full_labels = pd.merge(results['dataset'], results['labels'], on='timestamp', how='inner')
    full_labels = full_labels.sort_values(by='T')

    # Create groups based on neighbouring T-values
    full_labels['group'] = (full_labels['T'].diff() != 1).cumsum()

    # Track detection and label matches
    detection_matches = {index: False for index in results['detections'].index}
    label_matches = {index: False for index in full_labels.index}

    # Iterate over each anomaly group and check for detections within the margin
    for group_id, group in full_labels.groupby('group'):
        for index, detection in results['detections'].iterrows():
            if any((detection['T'] >= row['T'] - margin) and (detection['T'] <= row['T'] + margin) for _, row in group.iterrows()):
                detection_matches[index] = True
                # Corrected line to update label matches using the index from iterrows
                label_matches.update({idx: True for idx, _ in group.iterrows()})

    # Calculate true positives, false positives, and false negatives
    true_positives  = sum(match for match in detection_matches.values())
    false_positives = sum(not match for match in detection_matches.values())
    false_negatives = sum(not match for match in label_matches.values())

    # Metrics calculations
    precision = true_positives / (true_positives + false_positives) if true_positives + false_positives > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0

    print(f"True Positives: {true_positives}, False Positives: {false_positives}, False Negatives: {false_negatives}")
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1_score:.4f}")
