In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, InputLayer, Activation
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [2]:
def create_sequences(df, features, seq_len):
    X, y = [], []
    data = df[features + ['label']].fillna(0).values.astype(np.float32)
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len, :-1])
        y.append(data[i+seq_len-1, -1])
    return np.array(X), np.array(y)

In [None]:
# 2. Load and Prepare Data
df = pd.read_csv(r'C:\Surya\RVU\mahe-hackathon\model\modbus_packet_features_dpkt_labeled.csv')  # Use your filename
direction_encoded = pd.get_dummies(df['direction'], prefix='direction')
df = pd.concat([df.drop('direction', axis=1), direction_encoded], axis=1)
features = ['length', 'modbus_func_code', 'unit_id', 'iat','transaction_id'] + list(direction_encoded.columns)
SEQ_LEN = 10

X, y = create_sequences(df, features, SEQ_LEN)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [6]:
# 3. Build CNN-LSTM Model in Keras
model = Sequential([
    InputLayer(shape=(SEQ_LEN, len(features))),
    Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [7]:
# 4. Callbacks for Early Stopping & Model Saving
callbacks = [
    EarlyStopping(patience=3, restore_best_weights=True, verbose=1),
    ModelCheckpoint('keras_cnn_lstm_model.h5', save_best_only=True, monitor='val_loss', verbose=1)
]

In [11]:
# 5. Train Model
history = model.fit(
    X_train, y_train, 
    epochs=20,
    batch_size=64,
    validation_split=0.1,
    callbacks=callbacks,
    verbose=2
)

Epoch 1/20

Epoch 1: val_loss improved from inf to 0.01186, saving model to keras_cnn_lstm_model.h5




94499/94499 - 286s - 3ms/step - accuracy: 0.9983 - loss: 0.0118 - val_accuracy: 0.9984 - val_loss: 0.0119
Epoch 2/20

Epoch 2: val_loss did not improve from 0.01186
94499/94499 - 285s - 3ms/step - accuracy: 0.9984 - loss: 0.0115 - val_accuracy: 0.9984 - val_loss: 0.0120
Epoch 3/20

Epoch 3: val_loss did not improve from 0.01186
94499/94499 - 281s - 3ms/step - accuracy: 0.9984 - loss: 0.0119 - val_accuracy: 0.9984 - val_loss: 0.0120
Epoch 4/20

Epoch 4: val_loss did not improve from 0.01186
94499/94499 - 281s - 3ms/step - accuracy: 0.9984 - loss: 0.0119 - val_accuracy: 0.9984 - val_loss: 0.0119
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 1.


In [12]:
# 6. Evaluate Model
y_pred_prob = model.predict(X_test).ravel()
y_pred = (y_pred_prob >= 0.5).astype('int')
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
try:
    auc = roc_auc_score(y_test, y_pred_prob)
except Exception:
    auc = float('nan')
print(f'Test Accuracy: {acc:.4f}')
print(f'Test F1-score: {f1:.4f}')
print(f'Test ROC-AUC: {auc:.4f}')

[1m52500/52500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 1ms/step
Test Accuracy: 0.9984
Test F1-score: 0.9230
Test ROC-AUC: 0.8919
