# Smoke Sensor Anomaly Detection

## Import Necessary Libraries

In [1]:
## Import necessary libraries
import pandas as pd
import numpy as np
import random 
from urllib.parse import quote
from datetime import timedelta, datetime

## Import necessary API
import sys
sys.path.append('../../../../')
from api.v2.util.data_load import data_load
from api.v2.model.ResNet1d import ResNet1D, ResidualBlock
from api.v2.util.set_minmax import set_minmax_value
from api.v2.Preprocessing.MinMaxScaler import MinMaxScaler

## Import libraries for the model
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import trange
from sklearn.metrics import f1_score, classification_report

## Set path for saving model training results 
import os
os.makedirs('./result', exist_ok=True)

## Set Cuda for computation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

## Set random seed
def set_seed(seed_val):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

# Set seed
seed_val = 77
set_seed(seed_val)

cuda


## Base Parameter Setting
* Set parameters based on the information identified during EDA (Exploratory Data Analysis).

In [2]:
# Set Smoke Sensor Data Parameter

# Set MachBase Neo URL address
URL = 'http://127.0.0.1:5654'
# Set Tag Table Name
table = 'smoke'
# Select Tag Name -> Can Check Tag Names Using command 'show_column(URL, table)'
# Set Austria Tag Name 
tags = ['CNT', 'Fire Alarm', 'Humidity[%]', 'PM1.0', 'Pressure[hPa]', 'Raw Ethanol', 'Raw H2', 'TVOC[ppb]', 'Temperature[C]', 'eCO2[ppm]']
# Wrap each item in the list with single quotes and separate with commas
tags_ = ",".join(f"'{tag}'" for tag in tags)
# Set Tag Name
name = quote(tags_, safe=":/")
# Set resample Option -> D(day), H(hour), T(minute), S(second)
resample_freq = None
# Set Start time
start_time = '2025-01-16 00:00:00'
# Set End time 
end_time = '2025-01-16 17:23:49'
# Set TimeFormat - > 'default' or quote('2006-01-02 15:04:05.000000')(Divided down to the nanosecond)
timeformat = 'default'

## Model Configuration
* Using ResNet1d model.

In [3]:
# Model configuration parameters
 
# Set ResidualBlock
block = ResidualBlock
# Set the number of ResidualBlocks to use per layer
layers = [2,2,2,2]
# Set the number of classification categories
num_classes = 2
 
# Learning rate
lr = 0.01
 
# Model configuration
model = ResNet1D(block, layers, num_classes).to(device)
 
# Configure loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
 
# Check the model architecture
print(model)

ResNet1D(
  (conv1): Conv1d(1, 64, kernel_size=(7,), stride=(2,), padding=(3,))
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool1d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResidualBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), paddi

## Dataset & Loader Setup

In [4]:
class Smoke_Dataset(Dataset):

    def __init__(self, df):
        self.freq_data = df.iloc[:,:-1]
        self.label = df.iloc[:,-1:].values

    def __len__(self):
        return len(self.freq_data)

    def __getitem__(self, index):

        input_time_data = self.freq_data.iloc[index,:]
        input_time_data = torch.Tensor(input_time_data).expand(1, input_time_data.shape[0])
        label = self.label[index]

        return input_time_data, label

## Model Training

In [5]:
# Model training function
def train(epochs, start_time_train, end_time_train, unit, Fetch_size, URL, table, name, timeformat, resample_freq, scaler, Min, Max, batch_size):
    
    # Initialize training loss
    train_loss = []
    # Initialize training accuracy
    train_acc = []
    
    # Initialize best F1 Score value
    best_f1= 0
    
    # Start model training
    for epoch in epochs:
        
        # Set the model to training mode
        model.train()
        correct = 0
        total=0
        preds_ = []
        targets_ = []
        
        # Initialize loss and total step
        running_loss = 0.0
        total_step = 0
        
        # Set initial Time
        args = {unit: Fetch_size}
        start_time = start_time_train
        end_time = str(datetime.strptime(start_time_train, "%Y-%m-%d %H:%M:%S") + timedelta(**args))
        end_time_train_ = str(datetime.strptime(end_time_train, "%Y-%m-%d %H:%M:%S") + timedelta(seconds=1))
        
        # Set flag
        flag = False

        while end_time < end_time_train_:
            
            # Load batch data
            data = data_load(URL, table, name, start_time, end_time, timeformat, resample_freq)

            # Move the 'Fire Alarm' column to the last position
            data = data.reindex(columns=[col for col in data.columns if col != 'Fire Alarm'] + ['Fire Alarm'])

            # Convert the 'Fire Alarm' column to integer type
            data['Fire Alarm'] = data['Fire Alarm'].astype(int)
            
            # Apply MinMaxscaler
            data_scaled = scaler.fit_transform(data.iloc[:,:-1].values, Min.drop(columns=[1]).values, Max.drop(columns=[1]).values)
            
            # Set up the DataFrame
            data_ = pd.DataFrame(data_scaled)
            data_['Fire Alarm'] = data['Fire Alarm'].values
            
            # Drop NaN values
            data = data_.dropna()

            # Set up dataset & Loader
            train_ = Smoke_Dataset(data)

            train_dataloader = DataLoader(train_, batch_size, shuffle=True)

            # Print if the loaded data is empty
            if len(data) != 0:
                
                for batch_idx, (data, target) in enumerate(train_dataloader):
                    
                    # Check total batch count
                    total_step += 1
                    
                    data = data.to(device).float()
                    target = target.to(device).long().squeeze()
                    
                    optimizer.zero_grad()
                
                    # Input to the model
                    outputs = model(data)
                    
                    # Calculate loss
                    loss = criterion(outputs, target)
                    loss.backward()
                    optimizer.step()
                    
                    running_loss += loss.item()
                    
                    # Set label predictions 
                    _,pred = torch.max(outputs, dim=1)
                    target_ = target.view_as(pred)
                    correct += torch.sum(pred==target).item()
                    total += target.size(0)
                    
                    preds_.append(pred)
                    targets_.append(target_)
                    
            # Update start_time and end_time for next batch
            start_time = end_time
            end_time = str(datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S") + timedelta(**args))
            
            # Select the remaining portions at the end
            if end_time >= end_time_train and not flag:
                
                end_time = end_time_train
                flag = True   

        train_acc.append(100 * correct / total)
        train_loss.append(running_loss/total_step)
        print(f'\ntrain loss: {np.mean(train_loss)}, train acc: {(100 * correct / total):.4f}')

        # Combine predictions and labels collected from all batches
        preds_ = torch.cat(preds_).detach().cpu().numpy()
        targets_ = torch.cat(targets_).detach().cpu().numpy()
        
        f1score = f1_score(targets_, preds_,  average='macro')
        if best_f1 < f1score:
            best_f1 = f1score
            # Save the best model 
            with open("./result/Smoke_Sensor_Buffered.txt", "a") as text_file:
                print('epoch=====',epoch, file=text_file)
                print(classification_report(targets_, preds_, digits=4), file=text_file)
            print('model save')
            torch.save(model, f'./result/Smoke_Sensor_Buffered.pt') 
        epochs.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_f1}")
     
    return model

In [6]:
# Set number of epochs
epochs = trange(5, desc='training')
# Unit of time ('days', 'hours', 'minutes', 'seconds')
unit = 'hours'
Fetch_size = 5
# Set Batch Size
batch_size = 32
# Set Train Time
start_time_train = '2025-01-16 00:00:00'
end_time_train = '2025-01-16 10:00:00'
# Set up scalers
scaler = MinMaxScaler()
# Set Min, Max value
Min, Max = set_minmax_value(URL, table, name, start_time_train, end_time_train)

#################################################################Training#############################################################################################
train(epochs, start_time_train, end_time_train, unit, Fetch_size, URL, table, name, timeformat, resample_freq, scaler, Min, Max, batch_size)

training:   0%|          | 0/5 [00:00<?, ?it/s]

  return F.conv1d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass



train loss: 0.047222008734354184, train acc: 98.8834
model save

train loss: 0.03865609434235436, train acc: 99.2084
model save

train loss: 0.029725569302551946, train acc: 99.6111
model save

train loss: 0.026102612174553393, train acc: 99.5889

train loss: 0.022838966754839134, train acc: 99.7278
model save


ResNet1D(
  (conv1): Conv1d(1, 64, kernel_size=(7,), stride=(2,), padding=(3,))
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool1d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResidualBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), paddi

## Model Test

In [7]:
def test(model, start_time_test, end_time_test, unit, Fetch_size, URL, table, name, timeformat, resample_freq, scaler, Min, Max, batch_size):
    
    with torch.no_grad():
                
        model.eval()
        
        # Initial settings 
        output_test = []
        output_target = []
        
        # Set initial Time
        args = {unit: Fetch_size}
        start_time = start_time_test
        end_time = str(datetime.strptime(start_time_test, "%Y-%m-%d %H:%M:%S") + timedelta(**args))
        end_time_test_ = str(datetime.strptime(end_time_test, "%Y-%m-%d %H:%M:%S") + timedelta(seconds=1))
        
        # Set flag
        flag = False
        
        while end_time < end_time_test_:
            
            # Load batch data
            data = data_load(URL, table, name, start_time, end_time, timeformat, resample_freq)

            # Move the 'Fire Alarm' column to the last position
            data = data.reindex(columns=[col for col in data.columns if col != 'Fire Alarm'] + ['Fire Alarm'])

            # Convert the 'Fire Alarm' column to integer type
            data['Fire Alarm'] = data['Fire Alarm'].astype(int)
            
            # Apply MinMaxscaler
            data_scaled = scaler.fit_transform(data.iloc[:,:-1].values, Min.drop(columns=[1]).values, Max.drop(columns=[1]).values)
            
            # Set up the DataFrame
            data_ = pd.DataFrame(data_scaled)
            data_['Fire Alarm'] = data['Fire Alarm'].values
            
            # Drop NaN values
            data = data_.dropna()

            # Set up dataset & Loader
            test_ = Smoke_Dataset(data)

            test_dataloader = DataLoader(test_, batch_size, shuffle=True)
            
            # Print if the loaded data is empty
            if len(data) != 0:
                
                for batch_idx, (data, target) in enumerate(test_dataloader):
                    
                    data = data.to(device).float()
                    target = target.to(device).long().squeeze()
                
                    # Input to the model
                    outputs = model(data)
                    
                    # Set label predictions 
                    _,pred = torch.max(outputs, dim=1)
                    target_ = target.view_as(pred)
                    
                    output_test.append(pred)
                    output_target.append(target_)
        
            # Update start_time and end_time for next batch
            start_time = end_time
            end_time = str(datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S") + timedelta(**args))
            
            # Select the remaining portions at the end
            if end_time >= end_time_test and not flag:
                
                end_time = end_time_test
                flag = True   
 
        # Combine tensors into one
        combined_tensor_target = torch.cat(output_target, dim=0)
        combined_tensor_pred = torch.cat(output_test, dim=0)

        # Change to NumPy format
        real_values = combined_tensor_target.cpu().numpy()
        real_pred_values = combined_tensor_pred.cpu().numpy()

    return real_values, real_pred_values  

In [8]:
# Load the best model
model_ = torch.load(f'./result/Smoke_Sensor_Buffered.pt')

# Unit of time ('days', 'hours', 'minutes', 'seconds')
unit = 'hours'
Fetch_size = 5
# Set Test Tiem
start_time_test = '2025-01-16 10:00:00'
end_time_test = '2025-01-16 17:23:49'

##################################################################################Test############################################################################################################
real_values, real_pred_values = test(model_, start_time_test, end_time_test, unit, Fetch_size, URL, table, name, timeformat, resample_freq, scaler, Min, Max, batch_size)

  return F.conv1d(input, weight, bias, self.stride,


## Model Performance Evaluation

In [9]:
print(classification_report(real_values, real_pred_values))

              precision    recall  f1-score   support

           0       0.93      0.73      0.82     11518
           1       0.82      0.96      0.89     15113

    accuracy                           0.86     26631
   macro avg       0.88      0.85      0.85     26631
weighted avg       0.87      0.86      0.86     26631

