## Emergency Modeling

In [5]:
import pandas as pd
import os
from datetime import timedelta
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS
from sklearn.preprocessing import LabelEncoder
from base.influx_utils import fetch_all_sensor_data, delete_last_x_hours
from occupancy_model import (
    prepare_data_for_occupancy_model,
    train_occupancy_model,
    map_sensor_to_room,
    calculate_times_in_each_room
)
from base.minio_utils import save_model_to_minio, load_model_from_minio

In [12]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [13]:
# Fetch sensor data
sensor_data = fetch_all_sensor_data(start_hours=6, interval_hours=162)
sensor_data_df = prepare_data_for_occupancy_model(sensor_data)

In [16]:
sensor_data_df.head(10)

Unnamed: 0,sensor,bucket,timestamp,value,type,sensor_encoded
0,kitchen_PIR,1_5_9,2025-01-03 13:42:51.271,kitchen,sensor,1
1,kitchen_PIR,1_5_9,2025-01-03 13:44:26.684,kitchen,sensor,1
2,kitchen_PIR,1_5_9,2025-01-03 13:47:28.058,kitchen,sensor,1
3,kitchen_PIR,1_5_9,2025-01-03 13:48:28.673,kitchen,sensor,1
4,kitchen_PIR,1_5_9,2025-01-03 13:49:53.355,kitchen,sensor,1
5,kitchen_PIR,1_5_9,2025-01-03 13:51:14.364,kitchen,sensor,1
6,kitchen_PIR,1_5_9,2025-01-03 13:51:36.486,kitchen,sensor,1
7,kitchen_PIR,1_5_9,2025-01-03 13:53:19.286,kitchen,sensor,1
8,kitchen_PIR,1_5_9,2025-01-03 13:53:30.340,kitchen,sensor,1
9,kitchen_PIR,1_5_9,2025-01-03 14:00:02.945,kitchen,sensor,1


In [18]:
sensor_data_df_aggregated = calculate_times_in_each_room(sensor_data_df) # It will also exclude door events after aggregation
sensor_data_df_aggregated.head(10)

Unnamed: 0,group_id,room,start_time,end_time,duration,duration_seconds
0,1,kitchen,2025-01-03 13:42:51.271,2025-01-03 17:46:11.561,0 days 04:03:20.290000,14600.29
1,2,bathroom,2025-01-03 17:51:25.015,2025-01-03 18:05:48.730,0 days 00:14:23.715000,863.715
2,3,kitchen,2025-01-03 18:18:39.708,2025-01-03 18:32:01.323,0 days 00:13:21.615000,801.615
3,4,bathroom,2025-01-03 18:32:05.765,2025-01-03 18:32:05.765,0 days 00:00:00,0.0
4,5,kitchen,2025-01-03 18:32:25.884,2025-01-03 18:32:25.884,0 days 00:00:00,0.0
5,6,bathroom,2025-01-03 18:32:31.137,2025-01-03 18:32:31.137,0 days 00:00:00,0.0
6,7,kitchen,2025-01-03 18:32:34.757,2025-01-03 18:35:28.531,0 days 00:02:53.774000,173.774


In [19]:
sensor_data_df_aggregated.groupby("room").count()

Unnamed: 0_level_0,group_id,start_time,end_time,duration,duration_seconds
room,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bathroom,3,3,3,3,3
kitchen,4,4,4,4,4


In [None]:
stats_df = train_occupancy_model(sensor_data_df)
save_model_to_minio(stats_df, "occupancy")

In [None]:
stats1 = load_model_from_minio("occupancy", 1)
stats2 = load_model_from_minio("occupancy", 2)

In [None]:
stats1

Unnamed: 0,room,mean,std
0,bathroom,906.3316,2345.213762
1,kitchen,213.838955,474.832911
2,livingroom,317.616,1220.240813


In [None]:
# Second to last model
stats2

Unnamed: 0,room,mean,std
0,bathroom,208.242158,421.324547
1,kitchen,636.825295,1217.544011
2,livingroom,79.945778,115.666383


## Motion Modeling

In [None]:
from motion_model import train_motion_model
from base.minio_utils import save_model_to_minio, load_model_from_minio

In [None]:
motion_model1 = train_motion_model(start_hours=24*7*6, interval_hours=24*7, time_threshold_seconds=1800)
motion_model2 = train_motion_model(start_hours=24*7*5, interval_hours=24*7, time_threshold_seconds=1800)

In [None]:
motion_model1

Unnamed: 0,from,to,leave_time,enter_time
0,kitchen,livingroombedarea,2024-11-22 19:36:03.029,2024-11-22 19:36:13.902
1,livingroombedarea,livingroomdoor,2024-11-22 19:36:41.381,2024-11-22 19:36:52.000
2,livingroomdoor,kitchen,2024-11-22 19:36:52.000,2024-11-22 19:36:54.404
3,kitchen,bathroom,2024-11-22 19:39:30.877,2024-11-22 19:39:32.858
4,bathroom,kitchen,2024-11-22 19:39:32.858,2024-11-22 19:39:39.640
...,...,...,...,...
544,livingroomdoor,kitchen,2024-11-28 12:25:31.000,2024-11-28 12:35:55.519
545,kitchen,livingroomdoor,2024-11-28 12:40:24.627,2024-11-28 12:44:23.000
546,livingroomdoor,kitchen,2024-11-28 12:44:23.000,2024-11-28 12:48:28.413
547,kitchen,livingroomdoor,2024-11-28 12:56:54.202,2024-11-28 12:57:06.000


In [None]:
motion_model2

Unnamed: 0,from,to,leave_time,enter_time
0,livingroomdoor,kitchen,2024-11-29 15:51:17.000,2024-11-29 16:03:53.625
1,kitchen,livingroomdoor,2024-11-29 16:06:44.833,2024-11-29 16:06:54.000
2,livingroomdoor,kitchen,2024-11-29 16:06:54.000,2024-11-29 16:07:01.214
3,kitchen,livingroomdoor,2024-11-29 16:11:49.475,2024-11-29 16:11:52.000
4,livingroomdoor,kitchen,2024-11-29 16:11:52.000,2024-11-29 16:11:56.395
...,...,...,...,...
318,bathroom,kitchen,2024-12-06 12:44:43.474,2024-12-06 12:44:43.797
319,kitchen,bathroom,2024-12-06 12:54:25.683,2024-12-06 12:55:11.787
320,bathroom,kitchen,2024-12-06 12:55:40.744,2024-12-06 12:58:44.665
321,kitchen,bathroom,2024-12-06 13:08:29.198,2024-12-06 13:10:52.828


In [None]:
save_model_to_minio(motion_model2, "motion")
save_model_to_minio(motion_model1, "motion")

In [None]:
old = load_model_from_minio("motion", 2)
new = load_model_from_minio("motion", 1)

In [None]:
old

Unnamed: 0,from,to,leave_time,enter_time
0,livingroomdoor,kitchen,2024-11-29 15:51:17.000,2024-11-29 16:03:53.625
1,kitchen,livingroomdoor,2024-11-29 16:06:44.833,2024-11-29 16:06:54.000
2,livingroomdoor,kitchen,2024-11-29 16:06:54.000,2024-11-29 16:07:01.214
3,kitchen,livingroomdoor,2024-11-29 16:11:49.475,2024-11-29 16:11:52.000
4,livingroomdoor,kitchen,2024-11-29 16:11:52.000,2024-11-29 16:11:56.395
...,...,...,...,...
318,bathroom,kitchen,2024-12-06 12:44:43.474,2024-12-06 12:44:43.797
319,kitchen,bathroom,2024-12-06 12:54:25.683,2024-12-06 12:55:11.787
320,bathroom,kitchen,2024-12-06 12:55:40.744,2024-12-06 12:58:44.665
321,kitchen,bathroom,2024-12-06 13:08:29.198,2024-12-06 13:10:52.828


In [None]:
new

Unnamed: 0,from,to,leave_time,enter_time
0,kitchen,livingroombedarea,2024-11-22 19:36:03.029,2024-11-22 19:36:13.902
1,livingroombedarea,livingroomdoor,2024-11-22 19:36:41.381,2024-11-22 19:36:52.000
2,livingroomdoor,kitchen,2024-11-22 19:36:52.000,2024-11-22 19:36:54.404
3,kitchen,bathroom,2024-11-22 19:39:30.877,2024-11-22 19:39:32.858
4,bathroom,kitchen,2024-11-22 19:39:32.858,2024-11-22 19:39:39.640
...,...,...,...,...
544,livingroomdoor,kitchen,2024-11-28 12:25:31.000,2024-11-28 12:35:55.519
545,kitchen,livingroomdoor,2024-11-28 12:40:24.627,2024-11-28 12:44:23.000
546,livingroomdoor,kitchen,2024-11-28 12:44:23.000,2024-11-28 12:48:28.413
547,kitchen,livingroomdoor,2024-11-28 12:56:54.202,2024-11-28 12:57:06.000


## Burglary Model

In [1]:
from burglary_model import train_burglary_model

In [3]:
train_burglary_model(start_hours=24*7*6, interval_hours=24*7*5, time_threshold_seconds=1800)

Model training completed.


In [None]:
from motion_model import train_motion_model
from base.minio_utils import load_model_from_minio, save_model_to_minio
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pickle
import base64
from io import BytesIO, StringIO
import json
from typing import Optional

In [None]:
motion_model = train_motion_model(start_hours = 24 * 7 * 8, interval_hours= 24 * 7 * 8, time_threshold_seconds =1800)

In [None]:
motion_model.head(10)

Unnamed: 0,from,to,leave_time,enter_time
0,livingroomdoor,kitchen,2024-11-08 22:53:03,2024-11-08 22:53:47
1,kitchen,bathroom,2024-11-08 23:17:30,2024-11-08 23:17:42
2,bathroom,livingroombedarea,2024-11-08 23:23:00,2024-11-08 23:23:28
3,livingroombedarea,livingroomdoor,2024-11-08 23:23:28,2024-11-08 23:23:57
4,livingroomdoor,kitchen,2024-11-08 23:23:57,2024-11-08 23:24:02
5,kitchen,livingroombedarea,2024-11-08 23:24:20,2024-11-08 23:24:31
6,livingroombedarea,livingroomdoor,2024-11-08 23:38:19,2024-11-08 23:38:33
7,livingroomdoor,kitchen,2024-11-08 23:38:33,2024-11-08 23:38:38
8,kitchen,livingroombedarea,2024-11-08 23:39:13,2024-11-08 23:39:57
9,livingroombedarea,livingroomdoor,2024-11-09 07:10:22,2024-11-09 07:10:55


In [None]:
class BurglaryDetector:
    def __init__(self, contamination='auto', random_state=42, model_type: str = 'burglary'):
        """
        Initializes the BurglaryDetector with specified Isolation Forest parameters.

        Parameters:
        - contamination: float, 'auto' or float, the proportion of anomalies in the data set.
        - random_state: int, random seed for reproducibility.
        - model_type: str, identifier for the model type (used in MinIO storage).
        """
        self.contamination = contamination
        self.random_state = random_state
        self.model = None
        self.preprocessor = None
        self.feature_columns = None
        self.model_type = model_type  # e.g., 'burglary'

    def _feature_engineering(self, df: pd.DataFrame):
        """
        Performs feature engineering on the DataFrame.

        Parameters:
        - df: pandas DataFrame with columns ['from', 'to', 'leave_time', 'enter_time'].

        Returns:
        - df_features: pandas DataFrame with engineered features for modeling.
        - df_original: pandas DataFrame with original and some engineered features for reporting.
        """
        # Retain original timestamps and additional features for reporting
        df_original = df[['from', 'to', 'leave_time', 'enter_time']].copy()

        # Encode categorical variables
        categorical_features = ['from', 'to']
        self.preprocessor = ColumnTransformer(
            transformers=[
                ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
            ],
            remainder='passthrough'  # Keep other columns
        )

        # Convert datetime columns
        df['leave_time'] = pd.to_datetime(df['leave_time'])
        df['enter_time'] = pd.to_datetime(df['enter_time'])

        # Feature engineering: duration and temporal features
        df['duration'] = (df['enter_time'] - df['leave_time']).dt.total_seconds()

        # Extract temporal features from leave_time
        df['leave_hour'] = df['leave_time'].dt.hour
        df['leave_minute'] = df['leave_time'].dt.minute
        df['leave_second'] = df['leave_time'].dt.second

        # Extract temporal features from enter_time
        df['enter_hour'] = df['enter_time'].dt.hour
        df['enter_minute'] = df['enter_time'].dt.minute
        df['enter_second'] = df['enter_time'].dt.second

        # Retain 'duration' and 'leave_hour' in df_original for visualization
        df_original['duration'] = df['duration']
        df_original['leave_hour'] = df['leave_hour']

        # Drop original datetime columns
        df.drop(['leave_time', 'enter_time'], axis=1, inplace=True)

        # The DataFrame to be used for modeling
        df_features = df.copy()

        return df_features, df_original

    def train(self, df: pd.DataFrame):
        """
        Trains the Isolation Forest model on the provided DataFrame.

        Parameters:
        - df: pandas DataFrame with columns ['from', 'to', 'leave_time', 'enter_time'].
        """
        # Feature engineering
        df_features, _ = self._feature_engineering(df.copy())

        # Separate features and prepare the pipeline
        X = df_features.copy()

        # Initialize Isolation Forest within a Pipeline
        self.model = Pipeline(steps=[
            ('preprocessor', self.preprocessor),
            ('classifier', IsolationForest(
                n_estimators=100,
                contamination=self.contamination,
                random_state=self.random_state
            ))
        ])

        # Fit the model
        self.model.fit(X)

        # Store feature columns after preprocessing for reference
        self.feature_columns = self.model.named_steps['preprocessor'].get_feature_names_out()

        print("Model training completed.")

    def detect(self, new_df: pd.DataFrame):
        """
        Detects anomalies in the new motion data.

        Parameters:
        - new_df: pandas DataFrame with columns ['from', 'to', 'leave_time', 'enter_time'] representing the last hour's motions.

        Returns:
        - result_df: pandas DataFrame with original and some engineered features along with 'anomaly' and 'anomaly_label' columns.
        - is_burglary: Boolean indicating whether an anomaly (potential burglary) was detected.
        """
        if self.model is None:
            raise Exception("The model has not been trained yet. Call the train() method first.")

        # Feature engineering
        df_features, df_original = self._feature_engineering(new_df.copy())

        # Predict anomalies
        predictions = self.model.predict(df_features)

        # Append predictions to the original DataFrame
        df_original['anomaly'] = predictions

        # Map predictions to more interpretable labels
        df_original['anomaly_label'] = df_original['anomaly'].map({1: 'Normal', -1: 'Anomaly'})

        # Identify if any anomaly exists in the last hour
        is_burglary = df_original['anomaly'].isin([-1]).any()

        print(f"Anomaly detected: {'Yes' if is_burglary else 'No'}")
        return df_original, is_burglary

    def visualize_anomalies(self, df_processed: pd.DataFrame):
        """
        Visualizes anomalies using a scatter plot.

        Parameters:
        - df_processed: pandas DataFrame after prediction with 'anomaly_label' column.
        """
        # Check if required columns are present
        if not {'leave_hour', 'duration', 'anomaly_label'}.issubset(df_processed.columns):
            raise ValueError("The DataFrame must contain 'leave_hour', 'duration', and 'anomaly_label' columns for visualization.")

        plt.figure(figsize=(10, 6))
        sns.scatterplot(data=df_processed, x='leave_hour', y='duration', hue='anomaly_label', palette=['blue', 'red'])
        plt.title('Anomaly Detection based on Leave Hour and Duration')
        plt.xlabel('Leave Hour')
        plt.ylabel('Duration (seconds)')
        plt.legend(title='Status')
        plt.show()

    def save_model(self):
        """
        Saves the trained model to MinIO using the provided save_model_to_minio function.
        The model is serialized using pickle and encoded in base64 to be stored as a JSON-compatible string.
        """
        if self.model is None:
            raise Exception("No model to save. Train the model before saving.")

        # Serialize the model using pickle to bytes
        serialized_model = pickle.dumps(self.model)

        # Encode the serialized model to a base64 string
        encoded_model = base64.b64encode(serialized_model).decode('utf-8')

        # Create a DataFrame to store the encoded model
        model_df = pd.DataFrame({
            'model_type': [self.model_type],
            'model_data': [encoded_model]
        })

        # Save the DataFrame to MinIO
        save_model_to_minio(model_df, self.model_type)

        print("Model saved to MinIO successfully.")

    def load_model(self, version: int = 1):
        """
        Loads the trained model from MinIO using the provided load_model_from_minio function.
        The model is deserialized from a base64-encoded string.

        Parameters:
        - version: An integer specifying which version to load (1 for latest, 2 for second-to-last).
                   Defaults to 1.
        """
        # Load the DataFrame containing the encoded model from MinIO
        model_df = load_model_from_minio(self.model_type, version=version)

        if model_df is None or model_df.empty:
            raise Exception(f"Failed to load model version {version} from MinIO.")

        # Extract the encoded model string
        encoded_model = model_df['model_data'].iloc[0]

        # Decode the base64 string to bytes
        serialized_model = base64.b64decode(encoded_model.encode('utf-8'))

        # Deserialize the model using pickle
        self.model = pickle.loads(serialized_model)

        print(f"Model version {version} loaded from MinIO successfully.")

In [None]:
# Ensure that the CSV has columns: ['from', 'to', 'leave_time', 'enter_time']
df = motion_model
# Initialize the detector
detector = BurglaryDetector(contamination=0.01, model_type='burglary')  # Adjust contamination as needed
# Train the model
detector.train(df)
# Save the trained model to MinIO
try:
    detector.save_model()
except Exception as e:
    base_logger.error(f"Error saving model: {e}")

Model training completed.
Model saved to MinIO successfully.


In [None]:
# Initialize the detector
detector = BurglaryDetector(contamination=0.01, model_type='burglary')  # Adjust contamination as needed
# Load the trained model from MinIO
try:
    detector.load_model(version=1)  # Load the latest model
except Exception as e:
    base_logger.error(f"Error loading model: {e}")

Model version 1 loaded from MinIO successfully.


In [None]:
  # Perform anomaly detection on the new data
try:
    processed_data, is_burglary = detector.detect(motion_model.tail(5))
except Exception as e:
    base_logger.error(f"Error during anomaly detection: {e}")
    
# Print the processed data with anomaly labels
if not processed_data.empty:
    print("\nAnomaly Detection Results:")
    print(processed_data[['from', 'to', 'leave_time', 'enter_time', 'duration', 'leave_hour', 'anomaly_label']])


Anomaly detected: No

Anomaly Detection Results:
          from        to              leave_time              enter_time  \
2714  bathroom   kitchen 2025-01-03 11:19:50.113 2025-01-03 11:20:04.632   
2715   kitchen  bathroom 2025-01-03 11:31:46.201 2025-01-03 11:33:54.773   
2716  bathroom   kitchen 2025-01-03 11:33:54.773 2025-01-03 11:35:58.009   
2717   kitchen  bathroom 2025-01-03 12:18:42.269 2025-01-03 12:21:18.244   
2718  bathroom   kitchen 2025-01-03 12:21:32.604 2025-01-03 12:27:02.348   

      duration  leave_hour anomaly_label  
2714    14.519          11        Normal  
2715   128.572          11        Normal  
2716   123.236          11        Normal  
2717   155.975          12        Normal  
2718   329.744          12        Normal  
