In [1]:
import pandas as pd
import os
import tensorflow as tf
import numpy as np

In [2]:
def read_data(folder):
    """Get X and Y from csv files."""
    X = []
    Y = []
    for filename in os.listdir(folder):
        if filename.startswith('drowsy'):
            ylabel = 1
        else:
            ylabel = 0
        file_path = os.path.join(folder, filename)
        df = pd.read_csv(file_path, header=None)  # assuming no header in CSV files
        X.extend(df.values)
        Y.extend([ylabel] * len(df))
    return X, Y

In [3]:
X = []
Y = []
for i in [1, 2, 3, 4]:
    folder_path = f'Data2\\Fold{i}'
    X_part, Y_part = read_data(folder_path)
    X.extend(X_part)
    Y.extend(Y_part)

X = np.array(X)
Y = np.array(Y)

In [4]:
features_selected = [1, 3, 5, 7]  # Modify this according to your selected features
X_selected = X[:, features_selected]

In [80]:
X_selected_tensor = tf.convert_to_tensor(X_selected, dtype=tf.float32)
Y_tensor = tf.convert_to_tensor(Y, dtype=tf.int32)

# Determine sizes for train, validation, and test sets
train_size = int(0.75 * len(X_selected))
val_size = int(0.02 * len(X_selected))
test_size = len(X_selected) - train_size - val_size

# Split data into train, validation, and test sets
X_train, X_val, X_test = tf.split(X_selected_tensor, [train_size, val_size, test_size])
Y_train, Y_val, Y_test = tf.split(Y_tensor, [train_size, val_size, test_size])

In [81]:
X_train = tf.expand_dims(X_train, axis=-1)
X_val = tf.expand_dims(X_val, axis=-1)
X_test = tf.expand_dims(X_test, axis=-1)

In [46]:
# lstm stacked
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dropout(0.2),  # Add dropout layer with dropout rate of 0.2
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.Dropout(0.2),  # Add dropout layer with dropout rate of 0.2
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dropout(0.2),  # Add dropout layer with dropout rate of 0.2
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),  # Add dropout layer with dropout rate of 0.2
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [66]:
# lstm
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dropout(0.2),  # Add dropout layer with dropout rate of 0.2
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),  # Add dropout layer with dropout rate of 0.2
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.Dense(1, activation='sigmoid')
])

  super().__init__(**kwargs)


In [34]:
model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(64, unroll = True, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dropout(0.2), 
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.SimpleRNN(64, unroll = True, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dropout(0.2),  
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.SimpleRNN(64, unroll = True, input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dropout(0.2),  
    
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),  
    
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(64, unroll = True, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dropout(0.2), 
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.SimpleRNN(64, unroll = True, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dropout(0.2),  
    tf.keras.layers.BatchNormalization(),  # Add batch normalization layer

    tf.keras.layers.SimpleRNN(64, unroll = True, input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dropout(0.2),  
    
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),  
    
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [67]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [68]:
# history = model.fit(X_train, Y_train, epochs=5, batch_size=32, validation_data=(X_val, Y_val))

history = model.fit(X_train, Y_train, epochs = 2, batch_size = 100, validation_data = (X_val, Y_val), verbose = 1)

Epoch 1/2
[1m40184/40184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 4ms/step - accuracy: 0.6571 - loss: 0.5923 - val_accuracy: 0.6840 - val_loss: 0.7287
Epoch 2/2
[1m40184/40184[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 4ms/step - accuracy: 0.7996 - loss: 0.4096 - val_accuracy: 0.6921 - val_loss: 0.7014


In [79]:
from sklearn.metrics import classification_report

loss, accuracy = model.evaluate(X_test, Y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Make predictions on test data
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy
accuracy = classification_report(Y_test, y_pred_binary)
print(accuracy)

[1m10046/10046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - accuracy: 0.7468 - loss: 0.5986
Test Loss: 0.9440352916717529
Test Accuracy: 0.617664098739624
[1m10046/10046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step
              precision    recall  f1-score   support

           0       0.76      0.55      0.64    198203
           1       0.50      0.73      0.59    123263

    accuracy                           0.62    321466
   macro avg       0.63      0.64      0.62    321466
weighted avg       0.66      0.62      0.62    321466



In [39]:
# Calculate confidence interval for accuracy
def calculate_accuracy_ci(correct_predictions, total_samples):
    p_hat = correct_predictions / total_samples
    std_dev = np.sqrt(p_hat * (1 - p_hat) / total_samples)
    margin_of_error = 1.96 * std_dev  # 1.96 is the z-score for 95% confidence interval
    ci_lower = max(0, p_hat - margin_of_error)
    ci_upper = min(1, p_hat + margin_of_error)
    return ci_lower, ci_upper

accuracy_ci_lower, accuracy_ci_upper = calculate_accuracy_ci(np.sum(y_pred_binary == Y_test), len(Y_test))
print("95% Confidence Interval for Accuracy:", (accuracy_ci_lower, accuracy_ci_upper))

95% Confidence Interval for Accuracy: (0, 1)


  std_dev = np.sqrt(p_hat * (1 - p_hat) / total_samples)


In [40]:
model.save("rnn_stacked.keras")