In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

data = pd.read_csv(r"D:\Xempla\archive\MetroPT3(AirCompressor).csv")
data.head()

In [None]:
data['timestamp'] = pd.to_datetime(data['timestamp'])

start_date = "2020-02-01" 
end_date = "2020-04-18"  
normal_data = data[(data['timestamp'] >= start_date) & (data['timestamp'] < end_date)]
normal_data = normal_data.reset_index(drop=True)

print(normal_data.shape)
print(normal_data.head())

In [None]:
features = [
    'TP2',
    'TP3',
    'H1',
    'DV_pressure',
    'Reservoirs',
    'Oil_temperature',
    'Motor_current'
]

df = normal_data[features]
print(df)

In [None]:
print(normal_data.head())
print(normal_data.describe())

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)

import pandas as pd
df_scaled = pd.DataFrame(scaled_data, columns=df.columns)

print(df_scaled.head())

In [None]:
def create_sequence(data, seq_len):
    sequences = []
    for i in range(len(data) - seq_len):
        seq = data[i:i + seq_len]
        sequences.append(seq)
        
    return np.array(sequences)

data_array = df_scaled.to_numpy()
print(data_array)
print('=' * 20)
sequence_length = 60
X = create_sequence(data_array, sequence_length)

print("Shape of X:", X.shape)
print('=' * 20)

In [None]:
print(X)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Bidirectional, RepeatVector, TimeDistributed, Dense, LSTM
from tensorflow.keras.optimizers import Adam

time_steps = X.shape[1]
n_features = X.shape[2]

# Encoder
inputs = Input(shape=(time_steps, n_features))
encoded = Bidirectional(LSTM(64, activation='tanh', return_sequences=False))(inputs)
bottleneck = RepeatVector(time_steps)(encoded)

# Decoder
decoded = Bidirectional(LSTM(64, activation='tanh', return_sequences=True))(bottleneck)
output = TimeDistributed(Dense(n_features))(decoded)

autoencoder = Model(inputs, output)
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

autoencoder.summary()

In [None]:
print("X shape:", X.shape)
print("Min:", X.min())
print("Max:", X.max())
print("Mean:", X.mean())


In [None]:
import matplotlib.pyplot as plt
plt.plot(X[0])  # One sequence
plt.title("Sample Input Sequence")
plt.xlabel("Timesteps")
plt.ylabel("Scaled Sensor Values")
plt.show()


In [None]:
history = autoencoder.fit(
    X,X,
    epochs=50,
    batch_size=128,
    validation_split=0.1,
    shuffle=True
)

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title("Training Loss")
plt.xlabel("Epochs")
plt.ylabel("MSE Loss")
plt.show()

In [None]:
autoencoder.save("autoencoder_model.keras")


In [None]:
import joblib
joblib.dump(scaler, "scaler.pkl")

In [None]:
from tensorflow.keras.models import load_model
import joblib

autoencoder = load_model("autoencoder_model.keras")
scaler = joblib.load('scaler.pkl')

In [None]:
data_features = data[features]
data_scaled = scaler.transform(data_features)

In [None]:
x_full = create_sequence(data_scaled, seq_len=60)
print("X_full shape:", x_full.shape)

In [None]:
preds = autoencoder.predict(x_full, batch_size=128)
mse = np.mean((x_full - preds) ** 2, axis=(1, 2)) 


In [None]:
threshold = np.percentile(mse, 95)  # top 5% as anomalies

print(threshold)

In [None]:
import matplotlib.pyplot as plt

plt.hist(mse, bins=100)
plt.axvline(threshold, color='r', linestyle='--')
plt.title("Reconstruction Loss Distribution")
plt.xlabel("Loss")
plt.ylabel("Frequency")
plt.show()


In [None]:
full_data = pd.read_csv(r"archive\MetroPT3(AirCompressor).csv", parse_dates=["timestamp"])
full_data = full_data[['timestamp'] + features]

anomaly_df =  full_data[( full_data["timestamp"] >= "2020-04-18 00:00:00") & (full_data["timestamp"] <= "2020-04-18 23:59:59")]


In [None]:
test_df = data[(data["timestamp"] >= "2020-04-18 00:00:00") & (data["timestamp"] <= "2020-04-18 23:59:59")]

# Preprocess
test_scaled = scaler.transform(test_df[features])
test_seq = create_sequence(test_scaled, seq_len=60)

# Predict loss
reconstructions = autoencoder.predict(test_seq)
losses = np.mean((test_seq - reconstructions) ** 2, axis=(1, 2))


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 5))
plt.plot(losses, label="Reconstruction Loss")
plt.axhline(threshold, color="red", linestyle="--", label="Threshold")
plt.legend()
plt.title("Anomaly Detection in Failure Window")
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest

def evaluate_reconstruction(model, X_test, threshold=None, plot=True):
    """
    Evaluates reconstruction error and detects anomalies.
    
    Args:
        model: trained autoencoder
        X_test: test data, shape (samples, time_steps, features)
        threshold: if given, use this fixed MSE threshold. Otherwise use IsolationForest.
        plot: whether to show plots

    Returns:
        losses: reconstruction errors
        labels: 1 (normal), -1 (anomaly)
    """
    # 1. Predict
    reconstructions = model.predict(X_test, verbose=0)

    # 2. Compute reconstruction losses (MSE per sequence)
    losses = np.mean((X_test - reconstructions) ** 2, axis=(1, 2))

    # 3. Detect anomalies
    if threshold is not None:
        labels = np.where(losses > threshold, -1, 1)  # -1: anomaly
    else:
        iso_forest = IsolationForest(contamination=0.01, random_state=42)
        labels = iso_forest.fit_predict(losses.reshape(-1, 1))

    # 4. Plot
    if plot:
        plt.figure(figsize=(10, 4))
        plt.plot(losses, label="Reconstruction Loss")
        if threshold:
            plt.axhline(threshold, color="red", linestyle="--", label="Threshold")
        plt.title("Reconstruction Losses with Anomaly Labels")
        plt.xlabel("Sequence Index")
        plt.ylabel("Loss")
        plt.legend()
        plt.show()

    return losses, labels


In [None]:
losses, labels = evaluate_reconstruction(autoencoder, test_seq, threshold=0.005)
