In [1]:
import os
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [2]:
BASE_PATH = r"C:\Users\Manaswini\Downloads\data"

FEATURES = [
    "Accelerometer1RMS",
    "Accelerometer2RMS",
    "Current",
    "Pressure",
    "Temperature",
    "Thermocouple",
    "Voltage",
    "Volume Flow RateRMS"
]

TIME_STEPS = 50

In [3]:
def create_sequences_supervised(data, labels, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len + 1):
        X.append(data[i:i+seq_len])
        y.append(labels[i + seq_len - 1])  
    return np.array(X), np.array(y)

In [5]:
normal_path = os.path.join(BASE_PATH, "anomaly-free", "anomaly-free.csv")

normal_df = pd.read_csv(normal_path, sep=";")
normal_df['datetime'] = pd.to_datetime(normal_df['datetime'])
normal_df.set_index('datetime', inplace=True)

normal_df = normal_df[FEATURES]

normal_labels = np.zeros(len(normal_df))

print(normal_df.shape)
print(normal_labels.shape)

(9405, 8)
(9405,)


In [6]:
fault_dfs = []
fault_labels = []

fault_folders = {
    "Valve1": range(16),        
    "Valve2": range(4),         
    "Other": range(1, 14)       
}

for folder, file_range in fault_folders.items():
    folder_path = os.path.join(BASE_PATH, folder)
    
    for i in file_range:
        file_path = os.path.join(folder_path, f"{i}.csv")
        
        df = pd.read_csv(file_path, sep=";")
        df['datetime'] = pd.to_datetime(df['datetime'])
        df.set_index('datetime', inplace=True)
        df = df[FEATURES]
        
        fault_dfs.append(df)
        fault_labels.append(np.ones(len(df)))  

fault_df = pd.concat(fault_dfs, axis=0)
fault_labels = np.concatenate(fault_labels)

print(fault_df.shape)
print(fault_labels.shape)

(36496, 8)
(36496,)


In [7]:
X_all = pd.concat([normal_df, fault_df], axis=0)
y_all = np.concatenate([normal_labels, fault_labels])

print(X_all.shape)
print(y_all.shape)

print((y_all == 0).sum())
print((y_all == 1).sum())

(45901, 8)
(45901,)
9405
36496


In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_all)

def create_sequences_supervised(X, y, time_steps):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

X_seq, y_seq = create_sequences_supervised(X_scaled, y_all, TIME_STEPS)

print(X_seq.shape)
print(y_seq.shape)

(45851, 50, 8)
(45851,)


In [10]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X_seq,
    y_seq,
    test_size=0.2,
    random_state=42,
    stratify=y_seq
)

print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)

(36680, 50, 8) (36680,)
(9171, 50, 8) (9171,)


In [11]:
y_train = y_train.astype(int)
y_val = y_val.astype(int)

print(np.bincount(y_train))
print(np.bincount(y_val))

[ 7484 29196]
[1871 7300]


In [12]:
from tensorflow.keras.optimizers import Adam

model = Sequential([
    LSTM(64, input_shape=(TIME_STEPS, X_train.shape[2]), return_sequences=True),
    Dropout(0.3),

    LSTM(32),
    Dropout(0.3),

    Dense(1, activation="sigmoid")  # binary classification
])

model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()


  super().__init__(**kwargs)


In [13]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

callbacks = [
    EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        "supervised_lstm_best.keras",
        monitor="val_loss",
        save_best_only=True
    )
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=64,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/30
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 34ms/step - accuracy: 0.9636 - loss: 0.0814 - val_accuracy: 0.9846 - val_loss: 0.0332
Epoch 2/30
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 33ms/step - accuracy: 0.9836 - loss: 0.0373 - val_accuracy: 0.9840 - val_loss: 0.0389
Epoch 3/30
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 33ms/step - accuracy: 0.9835 - loss: 0.0346 - val_accuracy: 0.9845 - val_loss: 0.0329
Epoch 4/30
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 34ms/step - accuracy: 0.9835 - loss: 0.0367 - val_accuracy: 0.9874 - val_loss: 0.0226
Epoch 5/30
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 33ms/step - accuracy: 0.9787 - loss: 0.0519 - val_accuracy: 0.9868 - val_loss: 0.0396
Epoch 6/30
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 37ms/step - accuracy: 0.9845 - loss: 0.0411 - val_accuracy: 0.9822 - val_loss: 0.0357
Epoch 7/30
[1m5

In [16]:
def create_sequences(X, y, time_steps):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

In [17]:
results_df = pd.DataFrame(columns=["Folder", "File", "Accuracy", "Precision", "Recall", "F1"])

test_folders = {
    "Valve1": 16,
    "Valve2": 4,
    "Other": list(range(1, 15))
}

for folder, files in test_folders.items():
    folder_path = os.path.join(BASE_PATH, folder)
    
    file_indices = files if isinstance(files, list) else range(files)
    
    for i in file_indices:
        file_path = os.path.join(folder_path, f"{i}.csv")
        
        df = pd.read_csv(file_path, sep=";")
        df['datetime'] = pd.to_datetime(df['datetime'])
        df.set_index('datetime', inplace=True)
        
        X = scaler.transform(df[FEATURES])
        y_true = np.ones(len(X)) 

        X_seq, y_seq = create_sequences(X, y_true, TIME_STEPS)
        
        y_pred_prob = model.predict(X_seq, verbose=0).ravel()
        y_pred = (y_pred_prob > 0.5).astype(int)
        
        acc = accuracy_score(y_seq, y_pred)
        prec = precision_score(y_seq, y_pred, zero_division=0)
        rec = recall_score(y_seq, y_pred)
        f1 = f1_score(y_seq, y_pred)
        
        results_df = pd.concat([
            results_df,
            pd.DataFrame([{
                "Folder": folder,
                "File": f"{i}.csv",
                "Accuracy": acc,
                "Precision": prec,
                "Recall": rec,
                "F1": f1
            }])
        ], ignore_index=True)
        
        print(f"{folder} file {i}.csv | Acc: {acc:.3f}, Prec: {prec:.3f}, Rec: {rec:.3f}, F1: {f1:.3f}")



  results_df = pd.concat([


Valve1 file 0.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 1.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 2.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 3.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 4.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 5.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 6.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 7.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 8.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 9.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 10.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 11.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 12.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 13.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, F1: 1.000
Valve1 file 14.csv | Acc: 1.000, Prec: 1.000, Rec: 1.000, 

In [20]:
SAVE_PATH = r"C:\Users\Manaswini\Downloads\anomaly\result_lstm_s"
results_df.to_csv(os.path.join(SAVE_PATH, "supervised_test_metrics.csv"), index=False)

In [21]:
import joblib
model.save(os.path.join(SAVE_PATH, "supervised_lstm_model.keras"))
joblib.dump(scaler, os.path.join(SAVE_PATH, "scaler.pkl"))

joblib.dump(FEATURES, os.path.join(SAVE_PATH, "features.pkl"))
joblib.dump(TIME_STEPS, os.path.join(SAVE_PATH, "timesteps.pkl"))

['C:\\Users\\Manaswini\\Downloads\\anomaly\\result_lstm_s\\timesteps.pkl']