In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import MinMaxScaler
import os
import warnings
warnings.filterwarnings("ignore")

2025-12-11 07:28:41.372009: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765438121.559506      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765438121.614603      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

In [2]:
base_path = "/kaggle/input/cmapss1"
print(os.listdir(base_path))

['train_FD001.txt']


In [3]:
def load_and_process_data(filepath):
    print(f"Loading data from {filepath}...")
    
    cols = ['id', 'cycle', 'setting1', 'setting2', 'setting3'] + [f's{i}' for i in range(1, 22)]
    
    data = pd.read_csv(filepath, sep='\s+', header=None, names=cols)
    print(f"Original Data Shape: {data.shape}")

    # RUL
    rul = data.groupby('id')['cycle'].max().reset_index()
    rul.columns = ['id', 'max']
    data = data.merge(rul, on='id', how='left')
    data['RUL'] = data['max'] - data['cycle']
    data.drop('max', axis=1, inplace=True)
    
    print("RUL Calculation Complete.")
    return data

In [4]:
def process_for_training(data, sequence_length=50):
    print("Starting Normalization and Windowing...")
    # Excluding ID, Cycle,RUL 
    train_cols = [col for col in data.columns if col not in ['id', 'cycle', 'RUL']]
    
    # Normalize0-1
    scaler = MinMaxScaler()
    data[train_cols] = scaler.fit_transform(data[train_cols])
    print("Data Normalized.")

    sequences = []
    labels = []
    
    for engine_id in data['id'].unique():
        engine_data = data[data['id'] == engine_id]
        
        features = engine_data[train_cols].values
        target = engine_data['RUL'].values
        
        if len(engine_data) < sequence_length:
            continue
            
        for i in range(len(engine_data) - sequence_length):
            window_data = features[i : i + sequence_length]
            label_value = target[i + sequence_length]
            
            sequences.append(window_data)
            labels.append(label_value)
            
    return np.array(sequences), np.array(labels)

In [5]:
def create_lstm_model(input_shape):
    """
    Optimized Neural Network Architecture.
    Changes: Increased units, added BatchNormalization, and L2 Regularization.
    """
    model = Sequential([
        # Layer 1: Wider LSTM (128 units) + L2 Regularization
        LSTM(units=128, return_sequences=True, input_shape=input_shape, 
             kernel_regularizer=l2(0.001)),
        BatchNormalization(), 
        Dropout(0.3),         # Increased dropout to fight overfitting
        
        # Layer 2: Deep LSTM (64 units)
        LSTM(units=64, return_sequences=True, kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.3),
        
        # Layer 3: Final LSTM (32 units) 
        LSTM(units=32, return_sequences=False),
        Dropout(0.2),
        
        # Output Layer
        Dense(units=1)
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
        loss='mean_squared_error', 
        metrics=['mae', tf.keras.metrics.RootMeanSquaredError(name='rmse')]
    )
    
    return model

In [6]:
FILE_PATH = '/kaggle/input/cmapss1/train_FD001.txt' 
    
SEQUENCE_LENGTH = 50

df = load_and_process_data(FILE_PATH)

X_train, y_train = process_for_training(df, SEQUENCE_LENGTH)

model = create_lstm_model((X_train.shape[1], X_train.shape[2]))

# ReduceLROnPlateau
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=0.00001,
    verbose=1
)

# EarlyStopping
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

print("\nStarting Optimized Training...")
history = model.fit(
    X_train, y_train,
    epochs=40,            # Increased epochs because we have EarlyStopping
    batch_size=32,
    validation_split=0.1,
    callbacks=[lr_scheduler, early_stopping],
    verbose=1
)

Loading data from /kaggle/input/cmapss1/train_FD001.txt...
Original Data Shape: (20631, 26)
RUL Calculation Complete.
Starting Normalization and Windowing...
Data Normalized.


I0000 00:00:1765438137.326032      20 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0



Starting Optimized Training...
Epoch 1/40


I0000 00:00:1765438142.797511      61 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m440/440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 14ms/step - loss: 8734.5459 - mae: 75.3386 - rmse: 93.4196 - val_loss: 9955.7285 - val_mae: 77.7182 - val_rmse: 99.7774 - learning_rate: 0.0010
Epoch 2/40
[1m440/440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 6531.4419 - mae: 61.6680 - rmse: 80.8064 - val_loss: 13343.0918 - val_mae: 95.0364 - val_rmse: 115.5114 - learning_rate: 0.0010
Epoch 3/40
[1m440/440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 5357.7129 - mae: 53.1248 - rmse: 73.1723 - val_loss: 6695.8828 - val_mae: 58.5845 - val_rmse: 81.8269 - learning_rate: 0.0010
Epoch 4/40
[1m440/440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 4163.6494 - mae: 44.7736 - rmse: 64.5163 - val_loss: 5579.0400 - val_mae: 52.6633 - val_rmse: 74.6913 - learning_rate: 0.0010
Epoch 5/40
[1m440/440[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: 3398.1924 - mae: 38.4340 - rm

In [7]:
model.summary()