In [1]:
# Import libaries

# Standard libaries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns; sns.set()   
import configparser
import os
from pathlib import Path
import argparse
import logging
import datetime
import inspect
from sklearn.utils import resample
import re
from datetime import datetime
from datetime import timedelta
import plotly.express as px
import plotly.graph_objs as go
import math
import pickle

# Additional
import matplotlib.dates as mdates
import joblib
import time # to claculate the runtime of models
from pathlib import Path 
import pymannkendall as mk # Kendall tau trend package

# Internal Packages
from analyse_df import analyse_df
from rename_columns import rename_columns
import plot_settings
from glob import glob

from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

In [2]:
# Get the current directory
current_dir = os.getcwd()

# Define the paths to the 'data' and 'export' directories
data_path = os.path.join(current_dir, 'data')
export_path = os.path.join(current_dir, 'export')

# Define path of single-node-model files
single_node_path = os.path.join(export_path, 'single_node', 'unscaled', '90_seconds_intervals')

# Define path of network model files
network_path = os.path.join(export_path, 'network')

# Get a list of all CSV files in the single-node-model directory
file_path_test_single_node = glob(os.path.join(single_node_path, 'df_test_elba_rocket_*'))

# Get a list of all CSV files in the network model directory
file_path_test_network = glob(os.path.join(network_path, 'df_test_elba_rocket_network*'))

In [3]:
# Import single-node files
df_list = []
list_sensornodes= []

# Import data for single node model
for file in file_path_test_single_node:
    # Extract sensornode and scaler name from the filename
    filename = os.path.splitext(os.path.basename(file))[0]
    sensornode_name, scaler_name = filename.split('_')[-2:]
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file, index_col=0)
    
    # Keep only 'fire_label' and 'model_prediction' columns
    df = df[['fire_label', 'model_prediction']]
    
    # Group by the index and aggregate by keeping the most frequent value in each group
    df_grouped = df.groupby(df.index).agg(lambda x: x.mode()[0])
    print(len(df_grouped))
    
    # Append the grouped DataFrame to the list
    df_list.append(df_grouped)
    
    list_sensornodes.append(sensornode_name)

# Concatenate all DataFrames in the list into a single DataFrame
df_prediction_single_node_model = pd.concat(df_list, ignore_index=True)

1761
1761
1761
1761
1761
1761
1761
1761
1761


In [4]:
# Import network files
df_list = []

# Import data for single node model
for file in file_path_test_network:
    # Extract sensornode and scaler name from the filename
    #filename = os.path.splitext(os.path.basename(file))[0]
    #sensornode_name, scaler_name = filename.split('_')[-2:]
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file, index_col=0)
    
    # Keep only 'fire_label' and 'model_prediction' columns
    df = df[['fire_label', 'model_prediction']]
    
    # Group by the index and aggregate by keeping the most frequent value in each group
    df_grouped = df.groupby(df.index).agg(lambda x: x.mode()[0])
    print(len(df_grouped))
    
    # Append the grouped DataFrame to the list
    df_list.append(df_grouped)

# Concatenate all DataFrames in the list into a single DataFrame
df_prediction_network_model = pd.concat(df_list, ignore_index=True)

1761


In [5]:
df_prediction_network_model

Unnamed: 0,fire_label,model_prediction
0,NoFire,NoFire
1,NoFire,NoFire
2,NoFire,NoFire
3,NoFire,NoFire
4,NoFire,NoFire
...,...,...
1756,NoFire,NoFire
1757,NoFire,NoFire
1758,NoFire,NoFire
1759,NoFire,NoFire


## Single node model

In [6]:
# derive confusion matrix for single node model (mean over all node positions)

# Extract the ground truth and predictions
y_true = df_prediction_single_node_model['fire_label']
y_pred = df_prediction_single_node_model['model_prediction']

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

In [7]:
# calculate scores for single-node-model
# FPR, recall (TPR), precision (PPV), F1-score + confusion matrix für beide models (TP, FP, TN, FN)

# Extract the values from the confusion matrix
TN, FP, FN, TP = conf_matrix.ravel()

# devide by the number of sensor nodes to get the counts for equal number of intervals then the network model
# Divide the values by the defined number
TN /= len(list_sensornodes)
FP /= len(list_sensornodes)
FN /= len(list_sensornodes)
TP /= len(list_sensornodes)

# Calculate metrics manually
FPR = FP / (FP + TN)  # False Positive Rate
TPR = TP / (TP + FN)  # True Positive Rate (Recall)
PPV = TP / (TP + FP)  # Precision (Positive Predictive Value)
F1 = 2 * PPV * TPR / (PPV + TPR)  # F1-score

# Create a DataFrame to store the metrics
metrics_df_single_node = pd.DataFrame({
    'Metric': ['True Positives (TP)', 'False Positives (FP)', 'True Negatives (TN)', 'False Negatives (FN)', 
               'FPR', 'Recall (TPR)', 'Precision (PPV)', 'F1-score'],
    'Value': [TP, FP, TN, FN, FPR, TPR, PPV, F1]
})

# Convert the DataFrame to a LaTeX table
latex_table = metrics_df_single_node.to_latex(index=False, float_format="%.2f")
print(latex_table)

\begin{tabular}{lr}
\toprule
              Metric &   Value \\
\midrule
 True Positives (TP) & 1645.11 \\
False Positives (FP) &    2.44 \\
 True Negatives (TN) &   94.56 \\
False Negatives (FN) &   18.89 \\
                 FPR &    0.03 \\
        Recall (TPR) &    0.99 \\
     Precision (PPV) &    1.00 \\
            F1-score &    0.99 \\
\bottomrule
\end{tabular}



  latex_table = metrics_df_single_node.to_latex(index=False, float_format="%.2f")


## Network model

In [8]:
# derive confusion matrix for network model

# Extract the ground truth and predictions
y_true = df_prediction_network_model['fire_label']
y_pred = df_prediction_network_model['model_prediction']

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

In [9]:
# calculate scores for network model
# FPR, recall (TPR), precision (PPV), F1-score + confusion matrix für beide models (TP, FP, TN, FN)

# Extract the values from the confusion matrix
TN, FP, FN, TP = conf_matrix.ravel()

# Calculate metrics manually
FPR = FP / (FP + TN)  # False Positive Rate
TPR = TP / (TP + FN)  # True Positive Rate (Recall)
PPV = TP / (TP + FP)  # Precision (Positive Predictive Value)
F1 = 2 * PPV * TPR / (PPV + TPR)  # F1-score

# Create a DataFrame to store the metrics
metrics_df_network = pd.DataFrame({
    'Metric': ['True Positives (TP)', 'False Positives (FP)', 'True Negatives (TN)', 'False Negatives (FN)', 
               'FPR', 'Recall (TPR)', 'Precision (PPV)', 'F1-score'],
    'Value': [TP, FP, TN, FN, FPR, TPR, PPV, F1]
})

# Convert the DataFrame to a LaTeX table
latex_table = metrics_df_network.to_latex(index=False, float_format="%.2f")
print(latex_table)

\begin{tabular}{lr}
\toprule
              Metric &   Value \\
\midrule
 True Positives (TP) & 1664.00 \\
False Positives (FP) &    1.00 \\
 True Negatives (TN) &   96.00 \\
False Negatives (FN) &    0.00 \\
                 FPR &    0.01 \\
        Recall (TPR) &    1.00 \\
     Precision (PPV) &    1.00 \\
            F1-score &    1.00 \\
\bottomrule
\end{tabular}



  latex_table = metrics_df_network.to_latex(index=False, float_format="%.2f")


In [10]:
# Set 'Metric' as index for both DataFrames
metrics_df_network.set_index('Metric', inplace=True)
metrics_df_single_node.set_index('Metric', inplace=True)

# Combine the DataFrames side by side
metrics_df_combined = pd.concat([metrics_df_network, metrics_df_single_node], axis=1)
metrics_df_combined.columns = ['Network Model', 'Single Node Model']

# Optionally, format the DataFrame for LaTeX output
latex_table = metrics_df_combined.to_latex(index=True, float_format="%.2f")

# Print the LaTeX table
print(latex_table)

\begin{tabular}{lrr}
\toprule
{} &  Network Model &  Single Node Model \\
Metric               &                &                    \\
\midrule
True Positives (TP)  &        1664.00 &            1645.11 \\
False Positives (FP) &           1.00 &               2.44 \\
True Negatives (TN)  &          96.00 &              94.56 \\
False Negatives (FN) &           0.00 &              18.89 \\
FPR                  &           0.01 &               0.03 \\
Recall (TPR)         &           1.00 &               0.99 \\
Precision (PPV)      &           1.00 &               1.00 \\
F1-score             &           1.00 &               0.99 \\
\bottomrule
\end{tabular}



  latex_table = metrics_df_combined.to_latex(index=True, float_format="%.2f")
