# LSTM-Autoencoder + Supervised LSTM Failure Prediction (Per Room)

In [None]:
import sqlite3
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score
from keras.models import Model, Sequential
from keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Input
import matplotlib.pyplot as plt
import time

SEQUENCE_LENGTH = 30
feature_cols = ['temperature', 'airquality', 'daylight', 'light']

df = pd.read_sql_query("SELECT * FROM sensor_data_history WHERE timestamp >= datetime('now', '-120 days')", sqlite3.connect("room_data.db"))
df['timestamp'] = pd.to_datetime(df['timestamp'])

def compute_aqi(value):
    if value <= 500:
        return 500
    elif value <= 1000:
        return 1000
    elif value <= 1500:
        return 1500
    else:
        return 2000

def create_sequences(X, y, sequence_length):
    Xs, ys = [], []
    for i in range(len(X) - sequence_length):
        Xs.append(X.iloc[i:(i + sequence_length)].values)
        ys.append(y.iloc[i + sequence_length])
    return np.array(Xs), np.array(ys)

def process_room(room_id):
    print(f"\n Processing room {room_id}")
    room_df = df[df['room_id'] == room_id].copy().sort_values('timestamp').dropna()

    room_df['OAQI'] = room_df['airquality'].apply(compute_aqi)
    room_df['failure'] = (room_df['OAQI'] > 100).astype(int)
    room_df['failure_future'] = room_df['failure'].shift(-SEQUENCE_LENGTH)
    room_df = room_df.dropna()

    if room_df.empty or room_df.shape[0] < SEQUENCE_LENGTH:
        print("Not enough data")
        return

    scaler = StandardScaler()
    try:
        room_df[feature_cols] = scaler.fit_transform(room_df[feature_cols])
    except ValueError as e:
        print(f"Skipping room {room_id} due to preprocessing error: {e}")
        return

    X = room_df[feature_cols]
    y = room_df['failure_future']
    timestamps = room_df['timestamp']
    full_df = room_df.copy()

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
    ts_train, ts_test = train_test_split(timestamps, test_size=0.25, shuffle=False)

    if len(X_train) < SEQUENCE_LENGTH or len(X_test) < SEQUENCE_LENGTH:
        print("Not enough sequence data after split")
        return

    X_train_seq, y_train_seq = create_sequences(X_train, y_train, SEQUENCE_LENGTH)
    X_test_seq, y_test_seq = create_sequences(X_test, y_test, SEQUENCE_LENGTH)
    ts_test_seq = ts_test[SEQUENCE_LENGTH:].reset_index(drop=True)
    df_test_seq = full_df.iloc[-len(ts_test_seq):].reset_index(drop=True)

    lstm_model = Sequential()
    lstm_model.add(LSTM(64, input_shape=(SEQUENCE_LENGTH, len(feature_cols))))
    lstm_model.add(Dense(1, activation='sigmoid'))
    lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    lstm_model.fit(X_train_seq, y_train_seq, epochs=10, batch_size=32, verbose=0)

    inputs = Input(shape=(SEQUENCE_LENGTH, len(feature_cols)))
    encoded = LSTM(64)(inputs)
    repeat = RepeatVector(SEQUENCE_LENGTH)(encoded)
    decoded = LSTM(64, return_sequences=True)(repeat)
    output = TimeDistributed(Dense(len(feature_cols)))(decoded)
    ae_model = Model(inputs, output)
    ae_model.compile(optimizer='adam', loss='mse')
    ae_model.fit(X_train_seq, X_train_seq, epochs=10, batch_size=32, verbose=0)

    X_test_pred = ae_model.predict(X_test_seq)
    mse = np.mean(np.power(X_test_seq - X_test_pred, 2), axis=(1,2))
    thresh = np.percentile(mse, 95)
    AE_anomalies = (mse > thresh).astype(int)

    anomalous_indices = np.where(AE_anomalies == 1)[0]
    print("\n Anomalous Hours (LSTM-AE):")
    for idx in anomalous_indices:
        print(ts_test_seq.iloc[idx])

    conn = sqlite3.connect("room_data.db", timeout=30)
    cursor = conn.cursor()
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS anomalies (
      room_id INTEGER,
      timestamp DATETIME,
      temperature REAL,
      airquality REAL,
      daylight REAL,
      light INTEGER,
      source_model TEXT,
      PRIMARY KEY (room_id, timestamp, source_model)
    )
    """)
    for idx in anomalous_indices:
        ts = ts_test_seq.iloc[idx].strftime("%Y-%m-%d %H:%M:%S")
        row = df_test_seq.iloc[idx]
        for attempt in range(5):
            try:
                cursor.execute('''
                    SELECT COUNT(*) FROM anomalies
                    WHERE room_id = ? AND timestamp = ? AND source_model = ?
                ''', (room_id, ts, 'LSTM-AE'))
                if cursor.fetchone()[0] == 0:
                    cursor.execute('''
                        INSERT INTO anomalies
                        (room_id, timestamp, temperature, airquality, daylight, light, source_model)
                        VALUES (?, ?, ?, ?, ?, ?, ?)
                    ''', (
                        room_id, ts, row['temperature'], row['airquality'], row['daylight'], row['light'], 'LSTM-AE'
                    ))
                conn.commit()
                break
            except sqlite3.OperationalError as e:
                print(f"Retrying DB write due to: {e}")
                time.sleep(1)
    cursor.close()
    conn.close()

    lstm_preds = (lstm_model.predict(X_test_seq) > 0.5).astype(int).flatten()
    ensemble_preds = ((lstm_preds + AE_anomalies) > 0).astype(int)

    print("Supervised LSTM")
    print(classification_report(y_test_seq, lstm_preds))
    print("LSTM-AE")
    print(classification_report(y_test_seq, AE_anomalies))
    print("Ensemble")
    print(classification_report(y_test_seq, ensemble_preds))

    plt.figure(figsize=(10, 4))
    plt.plot(y_test_seq, label='True')
    plt.plot(lstm_preds, label='LSTM')
    plt.plot(AE_anomalies, label='AE')
    plt.plot(ensemble_preds, label='Ensemble')
    plt.legend()
    plt.title(f"Room {room_id} - Failure Prediction")
    plt.show()



In [None]:
process_room(1)

In [None]:
process_room(2)

In [None]:
process_room(3)

In [None]:
process_room(4)

In [None]:
process_room(6)

In [None]:
process_room(7)

In [None]:
process_room(8)

In [None]:
process_room(9)

In [None]:
process_room(20)

In [None]:
process_room(21)

In [None]:
process_room(27)

In [None]:
process_room(28)

In [None]:
process_room(29)

In [None]:
process_room(30)

In [None]:
process_room(32)

In [None]:
process_room(33)

In [None]:
process_room(34)

In [None]:
process_room(35)

In [None]:
process_room(36)

In [None]:
process_room(37)

In [None]:
process_room(38)

In [None]:
process_room(39)

In [None]:
process_room(40)

In [None]:
process_room(41)

In [None]:
process_room(51)

In [None]:
process_room(56)

In [None]:
process_room(57)

In [None]:
process_room(59)

In [None]:
process_room(60)

In [None]:
process_room(87)

In [None]:
process_room(88)

In [None]:
process_room(89)

In [None]:
process_room(90)

In [None]:
process_room(91)

In [None]:
process_room(92)

In [None]:
process_room(93)

In [None]:
process_room(94)

In [None]:
process_room(95)

In [None]:
process_room(104)

In [None]:
process_room(105)

In [None]:
process_room(115)

In [None]:
process_room(116)

In [None]:
process_room(117)

In [None]:
process_room(261)

In [None]:
process_room(297)

In [None]:
process_room(311)