In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# Load data
df = pd.read_csv(r"E:\Milan/time_series_dataset24.csv")
df

Unnamed: 0,PIN1,PTC,PDT1,PDT2,PGV1,PGV2,PGV3,PDT3,PDT4,Patm,...,FricTorque,Pin,Pdiff,GV,AGV,ATB1,ATB2,Group,Stage,Head
0,0.002862,0.001300,0.004675,0.004430,0.006501,0.005931,0.006195,0.020413,0.024108,6.285425,...,1.246962,6.118898,6.649683,2.636216,0.297385,-0.163528,0.224599,DPL,1,24
1,0.002862,0.001297,0.004674,0.004431,0.006519,0.005950,0.006197,0.020705,0.022757,6.291337,...,1.250575,6.142219,6.677930,2.635310,0.205243,-0.051941,0.307536,DPL,1,24
2,0.002862,0.001292,0.004673,0.004432,0.006524,0.005949,0.006208,0.021536,0.021985,6.287724,...,1.245649,6.096892,6.639173,2.634754,0.124694,-0.024502,0.252041,DPL,1,24
3,0.002863,0.001287,0.004673,0.004432,0.006537,0.005971,0.006211,0.021012,0.020271,6.283783,...,1.266670,6.125139,6.676617,2.634428,0.108219,-0.042185,0.274605,DPL,1,24
4,0.002865,0.001284,0.004673,0.004431,0.006531,0.005995,0.006229,0.019912,0.018715,6.288381,...,1.247619,6.032514,6.704535,2.634260,0.229651,-0.048283,0.203255,DPL,1,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13823995,0.002673,0.001582,0.004092,0.003841,0.007138,0.006577,0.006932,0.021125,-0.006264,6.318599,...,1.142187,5.881754,6.534067,4.384009,0.244296,0.208429,0.197766,FL,9,24
13823996,0.002672,0.001587,0.004089,0.003838,0.007111,0.006554,0.006877,0.022708,-0.005751,6.314329,...,1.134304,5.889965,6.556730,4.383941,0.122864,0.210258,0.222160,FL,9,24
13823997,0.002671,0.001589,0.004089,0.003834,0.007081,0.006527,0.006842,0.027942,-0.002138,6.316629,...,1.127078,5.876498,6.520928,4.383734,0.261382,0.145623,0.202035,FL,9,24
13823998,0.002670,0.001585,0.004087,0.003829,0.007069,0.006505,0.006827,0.026696,-0.002301,6.320242,...,1.135289,5.878797,6.571511,4.383563,0.193038,0.041963,0.208134,FL,9,24


In [3]:
df = df.drop(['PIN1','PTC','WTmp','Head','Patm','rpm','Speed','Flow','Thrust','GenTorque','FricTorque','Pin','Pdiff','GV','AGV','Group','PDT2','PGV1','PGV3','PDT4'], axis= 1)
print(df)

              PDT1      PGV2      PDT3      ATB1      ATB2  Stage
0         0.004675  0.005931  0.020413 -0.163528  0.224599      1
1         0.004674  0.005950  0.020705 -0.051941  0.307536      1
2         0.004673  0.005949  0.021536 -0.024502  0.252041      1
3         0.004673  0.005971  0.021012 -0.042185  0.274605      1
4         0.004673  0.005995  0.019912 -0.048283  0.203255      1
...            ...       ...       ...       ...       ...    ...
13823995  0.004092  0.006577  0.021125  0.208429  0.197766      9
13823996  0.004089  0.006554  0.022708  0.210258  0.222160      9
13823997  0.004089  0.006527  0.027942  0.145623  0.202035      9
13823998  0.004087  0.006505  0.026696  0.041963  0.208134      9
13823999  0.004087  0.006499  0.024260  0.105378  0.202035      9

[13824000 rows x 6 columns]


In [4]:
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
import numpy as np

# --- Load Full Dataset ---
features = ['PDT1', 'PGV2', 'PDT3', 'ATB1', 'ATB2']
X = df[features].values
y = df['Stage'].values

# --- Normalization ---
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)

# --- Shift Labels to 0-based ---
y = y - 1

# --- Time Series Windowing ---
window_size = 30
stride = 5

X_windows = []
y_windows = []

for i in range(0, len(X_scaled) - window_size, stride):
    X_windows.append(X_scaled[i:i + window_size])
    y_windows.append(y[i + window_size - 1])

X_windows = np.array(X_windows)
y_windows = np.array(y_windows)

print("\n✅ After windowing → Found stages and counts:")
unique, counts = np.unique(y_windows, return_counts=True)
for stage, count in zip(unique + 1, counts):
    print(f"Stage {stage}: {count} samples")

# --- ✅ Subsample (Optional) Only if ALL 9 stages are present ---
if len(unique) < 9:
    print("\n⚠️ Not all stages present. Skipping subsampling to preserve rare classes.")
else:
    print("\n✅ All stages present. Subsampling dataset to reduce size.")
    X_windows, _, y_windows, _ = train_test_split(
        X_windows, y_windows,
        test_size=0.5,
        random_state=42,
        stratify=y_windows
    )

# --- Train/Test Split (Stratify to balance stages) ---
X_train, X_test, y_train, y_test = train_test_split(
    X_windows, y_windows,
    test_size=0.2,
    random_state=42,
    stratify=y_windows
)

# --- Final Stage Counts in Train/Test ---
print("\n✅ Final train/test stage distribution:")
train_stages, train_counts = np.unique(y_train, return_counts=True)
test_stages, test_counts = np.unique(y_test, return_counts=True)

for stage, train_c, test_c in zip(train_stages + 1, train_counts, test_counts):
    print(f"Stage {stage}: Train = {train_c}, Test = {test_c}")

# --- Output Final Shapes ---
print("\n✅ Final dataset shapes:")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)



✅ After windowing → Found stages and counts:
Stage 1: 307195 samples
Stage 2: 307200 samples
Stage 3: 307200 samples
Stage 4: 307200 samples
Stage 5: 307200 samples
Stage 6: 307200 samples
Stage 7: 307200 samples
Stage 8: 307200 samples
Stage 9: 307199 samples

✅ All stages present. Subsampling dataset to reduce size.

✅ Final train/test stage distribution:
Stage 1: Train = 122877, Test = 30720
Stage 2: Train = 122880, Test = 30720
Stage 3: Train = 122880, Test = 30720
Stage 4: Train = 122880, Test = 30720
Stage 5: Train = 122880, Test = 30720
Stage 6: Train = 122880, Test = 30720
Stage 7: Train = 122880, Test = 30720
Stage 8: Train = 122880, Test = 30720
Stage 9: Train = 122880, Test = 30720

✅ Final dataset shapes:
X_train shape: (1105917, 30, 5)
X_test shape: (276480, 30, 5)
y_train shape: (1105917,)
y_test shape: (276480,)


In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [6]:
import tensorflow as tf
from tensorflow.keras import layers, models
from keras_tuner import HyperParameters
from keras_tuner.tuners import RandomSearch
from tensorflow.keras.callbacks import EarlyStopping

def build_model(hp):
    model = models.Sequential()

    # Conv1D Layer 1
    model.add(layers.Conv1D(
        filters=hp.Int('conv1_filters', 64, 256, step=32),
        kernel_size=hp.Choice('conv1_kernel', [3, 12]),
        activation=hp.Choice('conv1_activation', ['relu', 'tanh']),
        padding='same',
        input_shape=(30, 5)
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling1D(pool_size=2))

    # Conv1D Layer 2
    model.add(layers.Conv1D(
        filters=hp.Int('conv2_filters', 64, 512, step=64),
        kernel_size=hp.Choice('conv2_kernel', [3, 8]),
        activation=hp.Choice('conv2_activation', ['relu', 'tanh']),
        padding='same'
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling1D(pool_size=2))

    # Conv1D Layer 3
    model.add(layers.Conv1D(
        filters=hp.Int('conv3_filters', 64, 512, step=64),
        kernel_size=hp.Choice('conv3_kernel', [3, 5]),
        activation=hp.Choice('conv3_activation', ['relu', 'tanh']),
        padding='same'
    ))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling1D(pool_size=2))

    # LSTM Layer 1
    model.add(layers.LSTM(
        units=hp.Int('lstm1_units', 32, 128, step=32),
        return_sequences=True,
        dropout=hp.Float('lstm1_dropout', 0.0, 0.5, step=0.1)
    ))

    # LSTM Layer 2
    model.add(layers.LSTM(
        units=hp.Int('lstm2_units', 32, 128, step=32),
        return_sequences=False,
        dropout=hp.Float('lstm2_dropout', 0.0, 0.5, step=0.1)
    ))

    # Dense Layer 1
    model.add(layers.Dense(
        units=hp.Int('dense1_units', 64, 256, step=64),
        activation=hp.Choice('dense1_activation', ['relu', 'tanh'])
    ))
    model.add(layers.BatchNormalization())

    # Dense Layer 2
    model.add(layers.Dense(
        units=hp.Int('dense2_units', 32, 128, step=32),
        activation=hp.Choice('dense2_activation', ['relu', 'tanh'])
    ))
    model.add(layers.BatchNormalization())

    # Output Layer
    model.add(layers.Dense(9, activation='softmax'))  # 9 classes

    # Tunable Optimizer (Fixed Learning Rate)
    optimizer_choice = hp.Choice('optimizer', ['adam', 'nadam', 'rmsprop'])
    fixed_lr = 1e-3  # Fixed learning rate

    if optimizer_choice == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=fixed_lr)
    elif optimizer_choice == 'nadam':
        optimizer = tf.keras.optimizers.Nadam(learning_rate=fixed_lr)
    else:
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=fixed_lr)

    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

    


In [7]:
# Step 2: Set up the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=1,
    directory='my_tuner_results',
    project_name='cnn_rnn_fault_detection'
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
#  Train with tuner
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [9]:
tuner.search(
    X_train, y_train,
    epochs=15,
    validation_split=0.2,
    batch_size= 128,
    callbacks=[early_stop],
    verbose=2
)


Trial 10 Complete [03h 21m 37s]
val_accuracy: 0.7947365045547485

Best val_accuracy So Far: 0.8114556074142456
Total elapsed time: 1d 06h 45m 40s


In [10]:
tuner.results_summary()

Results summary
Results in my_tuner_results\cnn_rnn_fault_detection
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 03 summary
Hyperparameters:
conv1_filters: 256
conv1_kernel: 12
conv1_activation: tanh
conv2_filters: 320
conv2_kernel: 3
conv2_activation: relu
conv3_filters: 256
conv3_kernel: 5
conv3_activation: relu
lstm1_units: 96
lstm1_dropout: 0.0
lstm2_units: 128
lstm2_dropout: 0.1
dense1_units: 192
dense1_activation: relu
dense2_units: 96
dense2_activation: tanh
optimizer: nadam
Score: 0.8114556074142456

Trial 09 summary
Hyperparameters:
conv1_filters: 96
conv1_kernel: 3
conv1_activation: relu
conv2_filters: 448
conv2_kernel: 8
conv2_activation: tanh
conv3_filters: 384
conv3_kernel: 3
conv3_activation: relu
lstm1_units: 32
lstm1_dropout: 0.0
lstm2_units: 64
lstm2_dropout: 0.2
dense1_units: 192
dense1_activation: tanh
dense2_units: 32
dense2_activation: relu
optimizer: nadam
Score: 0.7947365045547485

Trial 08 summary
Hyperparameters:
conv1_filters: 

In [11]:
# Step 5: Get the best model
best_model = tuner.get_best_models(num_models=1)[0]
best_hps = tuner.get_best_hyperparameters(1)[0]

print("\n✅ Best Hyperparameters Found:")
for param in best_hps.values:
    print(f"{param}: {best_hps.get(param)}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



✅ Best Hyperparameters Found:
conv1_filters: 256
conv1_kernel: 12
conv1_activation: tanh
conv2_filters: 320
conv2_kernel: 3
conv2_activation: relu
conv3_filters: 256
conv3_kernel: 5
conv3_activation: relu
lstm1_units: 96
lstm1_dropout: 0.0
lstm2_units: 128
lstm2_dropout: 0.1
dense1_units: 192
dense1_activation: relu
dense2_units: 96
dense2_activation: tanh
optimizer: nadam


  saveable.load_own_variables(weights_store.get(inner_path))
