In [1]:
# --- Import additional layers and tools for experimentation ---
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import Bidirectional, Dropout, Input, Layer
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras import regularizers
import tensorflow.keras.backend as K
import pandas as pd
import numpy as np



In [8]:
from pathlib import Path
import pandas as pd
p_meta = Path(r"D:/PROGONSAI/milestone_2/week_4/Day_16/sequence_metadata_with_RUL.csv")
print("metadata exists:", p_meta.exists())
meta = pd.read_csv(p_meta)
print("metadata shape:", meta.shape)
print(meta.head())
# if there's a column indicating the original sequence length or n_features, print columns:
print("columns:", meta.columns.tolist())


metadata exists: True
metadata shape: (152559, 6)
   engine_id  cycle  max_cycle_x    RUL  max_cycle_y  max_cycle_new
0          1    8.0        321.0  313.0        321.0          321.0
1          1    8.0        321.0  313.0        321.0          321.0
2          1    9.0        321.0  312.0        321.0          321.0
3          1    9.0        321.0  312.0        321.0          321.0
4          1    9.0        321.0  312.0        321.0          321.0
columns: ['engine_id', 'cycle', 'max_cycle_x', 'RUL', 'max_cycle_y', 'max_cycle_new']


In [9]:
# Recovery helper: try factor pairs and show quick sanity checks
from pathlib import Path
import numpy as np
import pandas as pd
import math

p = Path(r"D:/PROGONSAI/milestone_2/week_4/Day_16/rolling_window_sequences_float32.npy")
meta = pd.read_csv(Path(r"D:/PROGONSAI/milestone_2/week_4/Day_16/sequence_metadata_with_RUL.csv"))
N = meta.shape[0]
print("N (windows) =", N)
print("file size (bytes) =", p.stat().st_size)

# Step 1: load raw float32 flat array
arr = np.fromfile(str(p), dtype=np.float32)
print("flat length (elements) =", arr.size)
if arr.size != (p.stat().st_size // 4):
    print("Warning: file size not divisible by 4 exactly.")
per_window = arr.size // N
print("per_window elements =", per_window, "(should be 1980)")

# Step 2: list factor pairs of per_window (reasonable feature counts up to 200)
pairs = []
for f in range(1,201):           # test feature counts 1..200
    if per_window % f == 0:
        w = per_window // f
        if 1 <= w <= 2000:      # reasonable timesteps limit
            pairs.append((w, f))
print("Candidate (window_size, n_features) pairs (first 40):")
print(pairs[:40])

# Step 3: try reshaping for each candidate and run sanity checks
def sanity_check(X):
    # quick checks on values to detect obvious garbage
    s = {}
    s['shape'] = X.shape
    s['dtype'] = X.dtype
    # stats on entire array
    s['global_min'] = float(np.nanmin(X))
    s['global_max'] = float(np.nanmax(X))
    s['global_mean'] = float(np.nanmean(X))
    # stats on first window (index 0)
    first = X[0]
    s['first_min'] = float(np.nanmin(first))
    s['first_max'] = float(np.nanmax(first))
    s['first_mean'] = float(np.nanmean(first))
    return s

attempts = []
for (w,f) in pairs:
    expected = N * w * f
    if expected != arr.size:
        continue
    try:
        X = arr.reshape((N, w, f))
        stats = sanity_check(X)
        attempts.append((w,f,stats))
        # print summary for first few attempts (more verbose)
        print(f"\nTried reshape -> (N, {w}, {f}) = {X.shape}")
        print(" dtype:", stats['dtype'])
        print(" global min/max/mean:", stats['global_min'], stats['global_max'], stats['global_mean'])
        print(" first window min/max/mean:", stats['first_min'], stats['first_max'], stats['first_mean'])
        # show first 6 values of first feature series (first window, first feature)
        print(" sample of first window, feature 0 (first 6 values):", X[0, :6, 0].tolist())
    except Exception as e:
        print("Failed reshape for", (w,f), "->", e)

if not attempts:
    raise RuntimeError("No valid reshape attempts — something unexpected. But arr length/division matched so there should be candidates.")

# Step 4: when you identify the correct pair, save it (uncomment and edit pair_to_save)
# Example: pair_to_save = (30, 66)
pair_to_save = None   # <-- set this to the correct (window_size, n_features) AFTER you inspect above attempts

if pair_to_save is not None:
    w,f = pair_to_save
    X = arr.reshape((N, w, f))
    print("Saving recovered array to rolling_window_sequences_float32_recovered.npy and .npz ...")
    np.save("rolling_window_sequences_float32_recovered.npy", X, allow_pickle=False)
    np.savez_compressed("rolling_window_sequences_recovered.npz", X=X)
    print("Saved shapes:", X.shape)
else:
    print("\nNo save performed. Inspect the printed candidate pairs above and set `pair_to_save` to the right (window_size, n_features).")


N (windows) = 152559
file size (bytes) = 1208267280
flat length (elements) = 302066820
per_window elements = 1980 (should be 1980)
Candidate (window_size, n_features) pairs (first 40):
[(1980, 1), (990, 2), (660, 3), (495, 4), (396, 5), (330, 6), (220, 9), (198, 10), (180, 11), (165, 12), (132, 15), (110, 18), (99, 20), (90, 22), (66, 30), (60, 33), (55, 36), (45, 44), (44, 45), (36, 55), (33, 60), (30, 66), (22, 90), (20, 99), (18, 110), (15, 132), (12, 165), (11, 180), (10, 198)]

Tried reshape -> (N, 1980, 1) = (152559, 1980, 1)
 dtype: float32
 global min/max/mean: -6.302419662475586 4.867166042327881 0.006761365570127964
 first window min/max/mean: -3.4811782836914062 2.4503772258758545 0.260800838470459
 sample of first window, feature 0 (first 6 values): [1.0759191513061523, 1.168420672416687, 0.3459184467792511, -1.1964118480682373, -0.9895810484886169, -0.917426347732544]

Tried reshape -> (N, 990, 2) = (152559, 990, 2)
 dtype: float32
 global min/max/mean: -6.302419662475586 

### 1. Deeper Stacked LSTM Model with Dropout and L2 Regularization

A deeper stacked LSTM model consists of multiple LSTM layers placed sequentially, allowing the network to learn complex temporal dependencies and hierarchical features by capturing short-term patterns in lower layers and longer-term ones in higher layers. To improve generalization and reduce overfitting, dropout is applied after each LSTM layer, randomly dropping neurons during training to prevent reliance on any single unit, which is important for deep models with many parameters. Additionally, L2 regularization adds a penalty on the squared magnitude of weights during training, encouraging smaller weights and controlling model complexity, making the combination of stacked LSTM, dropout, and L2 regularization a practical approach to building deep, robust, and expressive models for time-series forecasting tasks.

In [10]:
# Model with 2 stacked LSTM layers, dropout and L2 weight regularization
def build_stacked_lstm_model(input_shape, lstm_units=64, dropout_rate=0.3, l2_reg=1e-4):
    model = Sequential([
        LSTM(lstm_units, activation='tanh', return_sequences=True,
             kernel_regularizer=regularizers.l2(l2_reg),
             input_shape=input_shape),
        Dropout(dropout_rate),
        LSTM(lstm_units, activation='tanh', return_sequences=False,
             kernel_regularizer=regularizers.l2(l2_reg)),
        Dropout(dropout_rate),
        Dense(1)
    ])
    return model

model_stacked = build_stacked_lstm_model(input_shape=(X.shape[1], X.shape[2]))
model_stacked.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model_stacked.summary()


  super().__init__(**kwargs)


### 2. Bidirectional LSTM Model

A bidirectional LSTM (BiLSTM) enhances the traditional LSTM by processing input sequences in both forward (past to future) and backward (future to past) directions, enabling the model to access context from both preceding and succeeding time steps. This dual approach provides richer sequence understanding and captures dependencies that unidirectional LSTMs might miss, making BiLSTMs especially valuable in tasks like Remaining Useful Life (RUL) prediction and natural language processing. The outputs from both directions are combined—usually by concatenation or summation—to form a comprehensive representation at each time step. While bidirectional LSTMs improve context awareness and accuracy, they require the entire sequence upfront, leading to increased training time and computational complexity, thus being better suited for offline or batch processing rather than real-time streaming.

In [11]:
# Bidirectional LSTM layer to capture forward and backward temporal dependencies
def build_bidirectional_lstm_model(input_shape, lstm_units=64, dropout_rate=0.3):
    model = Sequential([
        Bidirectional(LSTM(lstm_units, activation='tanh', return_sequences=False),
                      input_shape=input_shape),
        Dropout(dropout_rate),
        Dense(1)
    ])
    return model

### 3. Attention Mechanism Layer (Basic Additive Attention)

The attention mechanism enables neural networks to dynamically focus on the most relevant parts of input sequences by computing alignment scores between the current state and each sequence element using learned weights and biases. These scores are normalized via softmax to generate attention weights that highlight important time steps, producing a weighted sum that emphasizes critical temporal information instead of treating all inputs equally. This approach overcomes the limitations of fixed-length summaries in standard RNNs or LSTMs by selectively pooling important features, improving model capacity and accuracy in tasks like language translation and time-series forecasting. Implemented as an attention layer over LSTM hidden states, it assigns importance scores that help the model focus on key sequence parts indicative of outcomes like remaining useful life, while also adding interpretability by allowing visualization of which time frames influenced predictions—making attention highly useful in predictive maintenance and RUL estimation.

In [12]:
# Simple Attention Layer Definition
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='att_weight', shape=(input_shape[-1], 1),
                                 initializer='random_normal', trainable=True)
        self.b = self.add_weight(name='att_bias', shape=(input_shape[1], 1),
                                 initializer='zeros', trainable=True)
        super(Attention, self).build(input_shape)

    def call(self, x):
        e = K.tanh(K.dot(x, self.W) + self.b)
        a = K.softmax(e, axis=1)
        output = x * a
        return K.sum(output, axis=1)

# Model with attention after LSTM
from tensorflow.keras.models import Model

def build_lstm_attention_model(input_shape, lstm_units=64, dropout_rate=0.3):
    inputs = Input(shape=input_shape)
    lstm_out = LSTM(lstm_units, return_sequences=True)(inputs)
    attention_out = Attention()(lstm_out)
    dropout_out = Dropout(dropout_rate)(attention_out)
    outputs = Dense(1)(dropout_out)
    model = Model(inputs=inputs, outputs=outputs)
    return model

model_attention = build_lstm_attention_model(input_shape=(X.shape[1], X.shape[2]))
model_attention.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model_attention.summary()



