In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, auc, balanced_accuracy_score
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout



In [3]:
train_path = '../../feature_extracted_data/training_features_01.csv'
train_data = pd.read_csv(train_path)
train_data.shape

(51700, 560)

In [4]:
print(train_data.head())

   AN311_window_1_mean  AN311_window_1_std  AN311_window_1_min  \
0             3.392500            0.139142                 3.2   
1             3.372500            0.099132                 3.2   
2             3.356667            0.121610                 3.2   
3             3.380833            0.130573                 3.2   
4             3.355000            0.125731                 3.1   

   AN311_window_1_max  AN311_window_2_mean  AN311_window_2_std  \
0                 3.7             3.356667            0.121610   
1                 3.7             3.380833            0.130573   
2                 3.6             3.355000            0.125731   
3                 3.6             3.392500            0.152295   
4                 3.5             3.402500            0.145179   

   AN311_window_2_min  AN311_window_2_max  AN311_window_3_mean  \
0                 3.2                 3.6             3.355000   
1                 3.2                 3.6             3.392500   
2       

In [7]:
train_label_path = '../../extracted_data/train_labels_full_with_columns.csv'
train_labels = pd.read_csv(train_label_path)
train_labels.shape

(51700, 3)

In [9]:
train_labels.head()

Unnamed: 0,MM263,MM264,MM256
0,normal,normal,normal
1,normal,normal,normal
2,normal,normal,normal
3,normal,normal,normal
4,normal,normal,normal


In [11]:
test_data = pd.read_csv("../../feature_extracted_data/test_features_01.csv")
test_data.shape

(5076, 560)

In [13]:
test_data.head()

Unnamed: 0,AN311_window_1_mean,AN311_window_1_std,AN311_window_1_min,AN311_window_1_max,AN311_window_2_mean,AN311_window_2_std,AN311_window_2_min,AN311_window_2_max,AN311_window_3_mean,AN311_window_3_std,...,V_window_3_min,V_window_3_max,V_window_4_mean,V_window_4_std,V_window_4_min,V_window_4_max,V_window_5_mean,V_window_5_std,V_window_5_min,V_window_5_max
0,4.073333,0.124989,3.9,4.4,4.061667,0.105026,3.9,4.2,4.158333,0.104549,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3.345833,0.246271,2.9,3.9,3.205,0.133448,2.8,3.7,3.274167,0.198954,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.148333,0.100816,3.9,4.3,4.056667,0.098939,3.9,4.2,4.1,0.125167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3.1775,0.350823,2.7,4.0,3.215833,0.31649,2.7,3.9,3.261667,0.192,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3.918333,0.172715,3.6,4.2,3.79,0.159896,3.6,4.2,3.873333,0.185173,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
test_labels = pd.read_csv("../../extracted_data/test_labels_full_with_columns.csv")
print(test_labels.shape)
print(test_labels.head())

(5076, 3)
    MM263   MM264    MM256
1  normal  normal   normal
2  normal  normal   normal
3  normal  normal   normal
4  normal  normal   normal


### Combine with original data & feature extracted

In [17]:
original_train_data = pd.read_csv("../../extracted_data/training_data_full_with_columns.csv")
original_train_data.shape

(51700, 16800)

In [18]:
original_test_data = pd.read_csv("../../extracted_data/test_data_full_with_columns.csv")
original_test_data.shape

(5076, 16800)

In [19]:
merged_train_df = pd.concat([original_train_data, train_data], axis=1)
merged_train_df.shape

(51700, 17360)

In [20]:
merged_test_df = pd.concat([original_test_data, test_data], axis=1)
merged_test_df.shape

(5076, 17360)

In [21]:
# Separate features and target
X_train = merged_train_df
y_train = train_labels['MM263']

X_test = merged_test_df
y_test = test_labels['MM263']

In [22]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(51700, 17360)
(51700,)
(5076, 17360)
(5076,)


In [34]:
# Convert DataFrame to numpy array before scaling and reshaping
X_train_array = X_train.to_numpy()
X_test_array = X_test.to_numpy()

# Scaling the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_array)
X_test_scaled = scaler.transform(X_test_array)

# Reshape inputs for LSTM [samples, time steps, features]
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)


### Baseline model

In [36]:
def LSTM_model(dropout ):

    # Build LSTM model
    model = Sequential([
        LSTM(50, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2]), return_sequences=True),
        Dropout(dropout),
        LSTM(50),
        Dropout(dropout),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['AUC'])
    
    # Train the model
    history = model.fit(X_train_reshaped, y_train, epochs=50, batch_size=64, 
                        validation_data=(X_test_reshaped, y_test), verbose=1)

    return model

In [None]:

model = LSTM_model(0.2)
# Predict probabilities
y_pred_probs = model.predict(X_test_reshaped)

# Convert probabilities to binary predictions
y_pred = (y_pred_probs > 0.5).astype(int)

# Calculate Balanced Accuracy
balanced_acc = balanced_accuracy_score(y_test, y_pred)
print(f'Balanced Accuracy: {balanced_acc:.4f}')

# Calculate AUC
auc_score = roc_auc_score(y_test, y_pred_probs)
print(f'AUC Score: {auc_score:.4f}')


  super().__init__(**kwargs)


### Model without cutter loader features

In [None]:
import pandas as pd

prefixes_to_remove = ['AMP1_IR', 'AMP2_IR', 'DMP3_IR', 'DMP4_IR', 'AMP5_IR', 'F_SIDE', 'V']

# Generate a list of columns to drop
columns_to_drop = [col for col in X_train.columns if any(col.startswith(prefix) for prefix in prefixes_to_remove)]

# Drop these columns from the DataFrame
X_train_new = X_train.drop(columns=columns_to_drop)

print(X_train_new.shape)
print(X_train_new.head())


In [None]:
columns_to_drop = [col for col in X_test.columns if any(col.startswith(prefix) for prefix in prefixes_to_remove)]

# Drop these columns from the DataFrame
X_test_new = X_test.drop(columns=columns_to_drop)

print(X_test_new.shape)
print(X_test_new.head())