# Detecting Early Fire Indicator Patterns in Multivariate-Time Series Based on a Multi-Sensor Node Network

## Libaries

In [1]:
# Standard libaries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns; sns.set()   
import configparser
import os
from pathlib import Path
import argparse
import logging
import datetime
import inspect
from sklearn.utils import resample
import re
from datetime import datetime
from datetime import timedelta
import plotly.express as px
import plotly.graph_objs as go
import math

# skLearn
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis    
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, confusion_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle

#Sktime
from sktime.classification.kernel_based import RocketClassifier
from sktime.datatypes import check_raise
from sktime.datatypes import mtype
from sktime.datatypes import check_is_mtype
from sktime.transformations.panel.padder import PaddingTransformer
from sktime.transformations.series.summarize import SummaryTransformer
from sktime.datatypes import convert_to
from sktime.datatypes import convert
from sktime.transformations.panel.rocket import MiniRocketMultivariate

# Additional
import matplotlib.dates as mdates
import joblib
import time # to claculate the runtime of models
from pathlib import Path 
import pymannkendall as mk # Kendall tau trend package

# Internal Packages
from analyse_df import analyse_df
from rename_columns import rename_columns
import plot_settings

# SHAP Explanation
import shap

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


In [2]:
# Get the current directory
current_dir = os.getcwd()
data_path = os.path.join(current_dir, 'data')

In [3]:
directory_export = os.path.join(current_dir, 'export')

# Check if the directory exists
if not os.path.exists(directory_export):
    # Create the directory if it doesn't exist
    os.makedirs(directory_export)

## Variables

In [4]:
# Define variables

# X_t # multivariate time series including all sensor node positions
# X_t_j # multivariate time series for one sensor node position j
# X_t_j_m # univariate time series for one sensor node position and one measurement m

# I_w # non-overlapping interval with interval length w
w = 15 # interval length w
l = 10 # length of one subsequence
# S_l # non-overlappung subsequence within interval I with subsequence length l (schould be the smallest possible intrval, 10s)

In [5]:
# Define Features
FEATURES_STATIC = ['CO_Room',
                    'H2_Room',
                    'VOC_Room_RAW',
                    'PM05_Room',
                    'PM10_Room',
                   ]

FEATURES_TREND = ['CO_Room_Trend',
                    'H2_Room_Trend',
                    'VOC_Room_RAW_Trend',
                    'PM05_Room_Trend',
                    'PM10_Room_Trend', 
                    ]

# Combine to one FEATURES list
FEATURES = FEATURES_STATIC #+ FEATURES_TREND

## Data Preparation

In [6]:
# Import data
# Construct the absolute path to the data file
file_name = 'elba_dataset_pp_3.csv'
data_file_path = os.path.join(data_path, file_name)

X_t = pd.read_csv(data_file_path, index_col=0)

In [7]:
# Convert 'Date' column to int64 data type
X_t['Date'] = X_t['Date'].astype('int64')

In [8]:
# Use only number of sensornode instead of string name
X_t['Sensor_ID'] = X_t['Sensor_ID'].str[-2:].astype(int) # anpassen auf Zwei Stellen!!

In [9]:
# Drop non relevant labels
X_t = X_t.drop(columns = ['scenario_label', 
                          'progress_label', 
                          'anomaly_label', 
                          'ternary_label', 
                          'Motion_Room', 
                          'Motion_Room_Trend'])

In [10]:
# Rename columns
X_t = X_t.rename(columns={"Date": "timepoints"})

In [11]:
# Transform X_t in mtype="pd-multiindex" format for sktime
# doc.: https://github.com/sktime/sktime/blob/main/examples/AA_datatypes_and_datasets.ipynb

X_t.set_index(['Interval_label','Sensor_ID', 'timepoints'], inplace=True)

X_t = X_t.groupby(level=[0,1,2], sort=True).sum()

X_t['timepoints'] = X_t.groupby(['Interval_label','Sensor_ID']).cumcount()
X_t['timepoints'] = X_t['timepoints'].apply(lambda x: x*10)

X_t = X_t.set_index('timepoints', append=True)
X_t = X_t.droplevel(2)

# Filter featrue columns based on FEATURES
# data_columns = [col for col in X_t.columns if col != 'fire_label'] # Backup
data_columns = FEATURES

# Pivot the DataFrame to have Sensor_ID levels as columns
X_t_pivot = X_t.pivot_table(index=['Interval_label', 'timepoints'], columns='Sensor_ID', values=data_columns)

# Flatten the column MultiIndex
X_t_pivot.columns = [f"{col[0]}_{col[1]}" for col in X_t_pivot.columns]

# Pivot the original DataFrame to get the label_column based on Interval_label
label_column_df = X_t[['fire_label']].reset_index().drop_duplicates(subset=['Interval_label'])
label_column_df = label_column_df.set_index('Interval_label')
label_column_df = label_column_df.drop(columns=['Sensor_ID', 'timepoints'])

# Add label via list
# Create list of indexes from df
list_interval_indexes = list(X_t_pivot.index.get_level_values(0))

# Create a dictionary from the DataFrame for quick lookup
dict_indexes = label_column_df['fire_label'].to_dict()

# Replace values in the list with corresponding values from the DataFrame
fire_labels_list = [dict_indexes[idx] for idx in list_interval_indexes]

# Add label list to df
X_t_pivot['fire_label'] = fire_labels_list

In [12]:
# Create list of sensor IDs
list_sensornodes = ['8',
                     '9',
                     '10',
                     '11',
                     '12',
                     '13',
                     '14',
                     '15',
                     '16']

In [13]:
# Build model for every unique sensor node 
for sensornode in list_sensornodes:
    
    # Filter columns containing "2" after the last "_"
    locked_columns = [col for col in X_t_pivot.columns if col.split("_")[-1] == sensornode]+ ["fire_label"]

    # Create a new DataFrame with only the locked columns
    data_temp = X_t_pivot[locked_columns]
    
    # rename the columns (delete the sensor node number)
    data_temp.columns = [col.rsplit('_', 1)[0] if '_' in col else col for col in data_temp.columns]

    # Derive Train and hold-out (test) set
    split_interval = 1300
    df_train = data_temp.loc[data_temp.index.get_level_values('Interval_label') <= split_interval]
    df_test = data_temp.loc[data_temp.index.get_level_values('Interval_label') > split_interval]
    
    # Derive X_train, X_test etc. 
    feature_columns = data_temp.columns.difference(['fire'])
    # X_train
    X_train = df_train[feature_columns]
    # X_test
    X_test = df_test[feature_columns]
    # y_train
    y_train = df_train['fire'].groupby('Interval_label').first().to_numpy()
    # y_test
    y_test = df_test['fire'].groupby('Interval_label').first().to_numpy()
    
    # Scaling: static for train and test set, not per subsequence

    # Define Scaler
    scaler = MinMaxScaler()
    # Separate data columns for scaling
    data_cols = X_train.columns
    # X_train_scaled
    X_train_scaled = X_train.copy()
    X_train_scaled[data_cols] = scaler.fit_transform(X_train_scaled[data_cols])
    # # X_test_scaled
    X_test_scaled = X_test.copy()
    X_test_scaled[data_cols] = scaler.fit_transform(X_test_scaled[data_cols])
    
    # Build model
    clf = RocketClassifier(num_kernels=500) 
    clf.fit(X_train, y_train) #X_train_scaled
    # Save model
    # Construct the absolute path to the data file
    file_name = f'model_rocket_{sensornode}.pkl'
    data_file_path = os.path.join(directory_export, file_name)
    
    with open(data_file_path, 'wb') as file:
        pickle.dump(clf, file)