In [1]:
import pandas as pd

zip_file_path = '../../extracted_data/combined_featured_data.zip'

# Load the data from the zip file
data = pd.read_csv(zip_file_path, compression='zip')

In [2]:
data.shape

(51700, 563)

#### Normalize Data

In [4]:
from sklearn.preprocessing import StandardScaler

features = data.drop(['MM263'], axis=1)  # Assuming MM263 is the label column
labels = data['MM263']

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)


In [5]:
features_scaled

array([[-1.20156351,  0.88297388, -0.97571639, ..., -0.39475857,
        -0.16299586, -0.16123654],
       [-1.31445384, -0.46731747, -0.97571639, ..., -0.39475857,
        -0.16299586, -0.16123654],
       [-1.40382535,  0.29128542, -0.97571639, ..., -0.39475857,
        -0.16299586, -0.16123654],
       ...,
       [ 0.28482581, -0.80073361,  0.59725092, ..., -0.39475857,
        -0.16299586, -0.16123654],
       [ 0.38830861, -0.56715774,  0.59725092, ..., -0.39475857,
        -0.16299586, -0.16123654],
       [ 0.47297636, -0.76938606,  0.59725092, ..., -0.39475857,
        -0.16299586, -0.16123654]])

#### Reshape data for LSTM

In [7]:
# Reshape for LSTM [samples, time steps, features]
# Assuming each row is a separate time sequence
X = features_scaled.reshape(features_scaled.shape[0], 1, features_scaled.shape[1])
y = labels.values

## split

In [9]:
##

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### Build

In [11]:
# from keras.models import Sequential
# from keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


base_model = Sequential()
base_model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
base_model.add(Dense(1, activation='sigmoid'))  # sigmoid for binary classification
base_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


  super().__init__(**kwargs)


### Train

In [13]:
history_base_model = base_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test), verbose=2)


Epoch 1/5
1293/1293 - 3s - 2ms/step - accuracy: 0.9926 - loss: 0.0330 - val_accuracy: 0.9946 - val_loss: 0.0215
Epoch 2/5
1293/1293 - 2s - 1ms/step - accuracy: 0.9955 - loss: 0.0169 - val_accuracy: 0.9945 - val_loss: 0.0201
Epoch 3/5
1293/1293 - 2s - 1ms/step - accuracy: 0.9954 - loss: 0.0146 - val_accuracy: 0.9948 - val_loss: 0.0195
Epoch 4/5
1293/1293 - 1s - 1ms/step - accuracy: 0.9963 - loss: 0.0123 - val_accuracy: 0.9941 - val_loss: 0.0173
Epoch 5/5
1293/1293 - 1s - 1ms/step - accuracy: 0.9964 - loss: 0.0109 - val_accuracy: 0.9950 - val_loss: 0.0173


### Evaluate

In [15]:
loss, accuracy = base_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")


Test Accuracy: 99.50%


### prediction 

In [17]:
from sklearn.metrics import classification_report

predictions = (base_model.predict(X_test) > 0.5).astype(int)
print(classification_report(y_test, predictions))


[1m324/324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 499us/step
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     10242
           1       0.91      0.52      0.66        98

    accuracy                           0.99     10340
   macro avg       0.95      0.76      0.83     10340
weighted avg       0.99      0.99      0.99     10340



In [None]:
pip install tensorflow


### Model build without cutter loader data

In [28]:
column_names = data.columns.tolist()
print(column_names)

['AN311_window_1_mean', 'AN311_window_1_std', 'AN311_window_1_min', 'AN311_window_1_max', 'AN311_window_2_mean', 'AN311_window_2_std', 'AN311_window_2_min', 'AN311_window_2_max', 'AN311_window_3_mean', 'AN311_window_3_std', 'AN311_window_3_min', 'AN311_window_3_max', 'AN311_window_4_mean', 'AN311_window_4_std', 'AN311_window_4_min', 'AN311_window_4_max', 'AN311_window_5_mean', 'AN311_window_5_std', 'AN311_window_5_min', 'AN311_window_5_max', 'AN422_window_1_mean', 'AN422_window_1_std', 'AN422_window_1_min', 'AN422_window_1_max', 'AN422_window_2_mean', 'AN422_window_2_std', 'AN422_window_2_min', 'AN422_window_2_max', 'AN422_window_3_mean', 'AN422_window_3_std', 'AN422_window_3_min', 'AN422_window_3_max', 'AN422_window_4_mean', 'AN422_window_4_std', 'AN422_window_4_min', 'AN422_window_4_max', 'AN422_window_5_mean', 'AN422_window_5_std', 'AN422_window_5_min', 'AN422_window_5_max', 'AN423_window_1_mean', 'AN423_window_1_std', 'AN423_window_1_min', 'AN423_window_1_max', 'AN423_window_2_mean

In [30]:
columns_to_remove = [
    'AMP1_IR_window_1_mean', 'AMP1_IR_window_1_std', 'AMP1_IR_window_1_min', 'AMP1_IR_window_1_max', 'AMP1_IR_window_2_mean', 
    'AMP1_IR_window_2_std', 'AMP1_IR_window_2_min', 'AMP1_IR_window_2_max', 'AMP1_IR_window_3_mean', 'AMP1_IR_window_3_std',
    'AMP1_IR_window_3_min', 'AMP1_IR_window_3_max', 'AMP1_IR_window_4_mean', 'AMP1_IR_window_4_std', 'AMP1_IR_window_4_min', 
    'AMP1_IR_window_4_max', 'AMP1_IR_window_5_mean', 'AMP1_IR_window_5_std', 'AMP1_IR_window_5_min', 'AMP1_IR_window_5_max', 
    'AMP2_IR_window_1_mean', 'AMP2_IR_window_1_std', 'AMP2_IR_window_1_min', 'AMP2_IR_window_1_max', 'AMP2_IR_window_2_mean', 
    'AMP2_IR_window_2_std', 'AMP2_IR_window_2_min', 'AMP2_IR_window_2_max', 'AMP2_IR_window_3_mean', 'AMP2_IR_window_3_std', 
    'AMP2_IR_window_3_min', 'AMP2_IR_window_3_max', 'AMP2_IR_window_4_mean', 'AMP2_IR_window_4_std', 'AMP2_IR_window_4_min', 
    'AMP2_IR_window_4_max', 'AMP2_IR_window_5_mean', 'AMP2_IR_window_5_std', 'AMP2_IR_window_5_min', 'AMP2_IR_window_5_max', 
    'DMP3_IR_window_1_mean', 'DMP3_IR_window_1_std', 'DMP3_IR_window_1_min', 'DMP3_IR_window_1_max', 'DMP3_IR_window_2_mean', 
    'DMP3_IR_window_2_std', 'DMP3_IR_window_2_min', 'DMP3_IR_window_2_max', 'DMP3_IR_window_3_mean', 'DMP3_IR_window_3_std', 
    'DMP3_IR_window_3_min', 'DMP3_IR_window_3_max', 'DMP3_IR_window_4_mean', 'DMP3_IR_window_4_std', 'DMP3_IR_window_4_min', 'DMP3_IR_window_4_max', 
    'DMP3_IR_window_5_mean', 'DMP3_IR_window_5_std', 'DMP3_IR_window_5_min', 'DMP3_IR_window_5_max', 'DMP4_IR_window_1_mean', 'DMP4_IR_window_1_std', 
    'DMP4_IR_window_1_min', 'DMP4_IR_window_1_max', 'DMP4_IR_window_2_mean', 'DMP4_IR_window_2_std', 'DMP4_IR_window_2_min', 'DMP4_IR_window_2_max',
    'DMP4_IR_window_3_mean', 'DMP4_IR_window_3_std', 'DMP4_IR_window_3_min', 'DMP4_IR_window_3_max', 'DMP4_IR_window_4_mean', 'DMP4_IR_window_4_std', 
    'DMP4_IR_window_4_min', 'DMP4_IR_window_4_max', 'DMP4_IR_window_5_mean', 'DMP4_IR_window_5_std', 'DMP4_IR_window_5_min', 'DMP4_IR_window_5_max', 
    'AMP5_IR_window_1_mean', 'AMP5_IR_window_1_std', 'AMP5_IR_window_1_min', 'AMP5_IR_window_1_max', 'AMP5_IR_window_2_mean', 'AMP5_IR_window_2_std', 
    'AMP5_IR_window_2_min', 'AMP5_IR_window_2_max', 'AMP5_IR_window_3_mean', 'AMP5_IR_window_3_std', 'AMP5_IR_window_3_min', 'AMP5_IR_window_3_max', 
    'AMP5_IR_window_4_mean', 'AMP5_IR_window_4_std', 'AMP5_IR_window_4_min', 'AMP5_IR_window_4_max', 'AMP5_IR_window_5_mean', 'AMP5_IR_window_5_std', 
    'AMP5_IR_window_5_min', 'AMP5_IR_window_5_max', 'F_SIDE_window_1_mean', 'F_SIDE_window_1_std', 'F_SIDE_window_1_min', 'F_SIDE_window_1_max', 
    'F_SIDE_window_2_mean', 'F_SIDE_window_2_std', 'F_SIDE_window_2_min', 'F_SIDE_window_2_max', 'F_SIDE_window_3_mean', 'F_SIDE_window_3_std', 'F_SIDE_window_3_min', 
    'F_SIDE_window_3_max', 'F_SIDE_window_4_mean', 'F_SIDE_window_4_std', 'F_SIDE_window_4_min', 'F_SIDE_window_4_max', 'F_SIDE_window_5_mean', 'F_SIDE_window_5_std', 'F_SIDE_window_5_min', 'F_SIDE_window_5_max', 'V_window_1_mean', 'V_window_1_std', 'V_window_1_min', 'V_window_1_max', 'V_window_2_mean', 'V_window_2_std', 'V_window_2_min', 'V_window_2_max', 'V_window_3_mean', 'V_window_3_std', 'V_window_3_min', 'V_window_3_max', 'V_window_4_mean', 'V_window_4_std', 'V_window_4_min', 'V_window_4_max', 'V_window_5_mean', 'V_window_5_std', 'V_window_5_min', 'V_window_5_max'
]


In [32]:
data.shape

(51700, 563)

In [34]:
# Ensure all listed columns are in the DataFrame columns to avoid errors
columns_to_remove = [col for col in columns_to_remove if col in data.columns]
data_filtered = data.drop(columns_to_remove, axis=1)
data_filtered.shape

(51700, 423)

### Data normalize and reshape

In [36]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Assuming 'MM263' is the label
features = data_filtered.drop(['MM263'], axis=1)
labels = data_filtered['MM263']

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Reshape for LSTM [samples, time steps, features]
X = features_scaled.reshape(features_scaled.shape[0], 1, features_scaled.shape[1])
y = labels.values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [38]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model_without_loader = Sequential()
model_without_loader.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
model_without_loader.add(Dense(1, activation='sigmoid'))  # sigmoid for binary classification
model_without_loader.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


  super().__init__(**kwargs)


In [44]:
history_filtered_data = model_without_loader.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test), verbose=2)


Epoch 1/5
1293/1293 - 1s - 860us/step - accuracy: 0.9965 - loss: 0.0107 - val_accuracy: 0.9954 - val_loss: 0.0149
Epoch 2/5
1293/1293 - 1s - 930us/step - accuracy: 0.9967 - loss: 0.0096 - val_accuracy: 0.9948 - val_loss: 0.0179
Epoch 3/5
1293/1293 - 1s - 910us/step - accuracy: 0.9968 - loss: 0.0094 - val_accuracy: 0.9951 - val_loss: 0.0153
Epoch 4/5
1293/1293 - 1s - 899us/step - accuracy: 0.9968 - loss: 0.0088 - val_accuracy: 0.9954 - val_loss: 0.0148
Epoch 5/5
1293/1293 - 1s - 894us/step - accuracy: 0.9971 - loss: 0.0077 - val_accuracy: 0.9955 - val_loss: 0.0129


In [42]:
loss, accuracy = model_without_loader.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")


Test Accuracy: 99.51%


In [46]:
# Predict probabilities for the positive class
probabilities_model1 = base_model.predict(X_test)[:, 1]  # Adjust depending on your model's output
probabilities_model2 = model_without_loader.predict(X_test)[:, 1]


2024-10-04 03:41:08.420880: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: Matrix size-incompatible: In[0]: [32,422], In[1]: [562,200]
	 [[{{function_node sequential_1_lstm_1_while_body_58551}}{{node sequential_1/lstm_1/while/lstm_cell_1/MatMul}}]]


InvalidArgumentError: Graph execution error:

Detected at node sequential_1/lstm_1/while/lstm_cell_1/MatMul defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 701, in start

  File "/opt/anaconda3/lib/python3.12/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/opt/anaconda3/lib/python3.12/asyncio/base_events.py", line 641, in run_forever

  File "/opt/anaconda3/lib/python3.12/asyncio/base_events.py", line 1987, in _run_once

  File "/opt/anaconda3/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 534, in dispatch_queue

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 523, in process_one

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 429, in dispatch_shell

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelbase.py", line 767, in execute_request

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes

  File "/opt/anaconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code

  File "/var/folders/ph/mqs9kl656hv6_7dcqk4stw900000gn/T/ipykernel_12111/2472787888.py", line 2, in <module>

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 512, in predict

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 208, in one_step_on_data_distributed

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 198, in one_step_on_data

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/trainer.py", line 96, in predict_step

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/layer.py", line 901, in __call__

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/models/sequential.py", line 212, in call

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/models/functional.py", line 175, in call

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/ops/function.py", line 171, in _run_through_graph

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/models/functional.py", line 560, in call

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/layer.py", line 901, in __call__

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/rnn/lstm.py", line 570, in call

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/rnn/rnn.py", line 406, in call

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/rnn/lstm.py", line 565, in inner_loop

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/rnn/rnn.py", line 346, in inner_loop

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/rnn.py", line 428, in rnn

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/rnn.py", line 411, in _step

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/rnn/rnn.py", line 338, in step

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/layer.py", line 901, in __call__

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/rnn/lstm.py", line 264, in call

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/ops/numpy.py", line 3445, in matmul

  File "/opt/anaconda3/lib/python3.12/site-packages/keras/src/backend/tensorflow/numpy.py", line 477, in matmul

Matrix size-incompatible: In[0]: [32,422], In[1]: [562,200]
	 [[{{node sequential_1/lstm_1/while/lstm_cell_1/MatMul}}]] [Op:__inference_one_step_on_data_distributed_58641]