In [1]:
pip install numpy==1.22.4

Note: you may need to restart the kernel to use updated packages.


In [13]:
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import (classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model

In [3]:
# -----------------------------
# STEP 1: Load Preprocessed Data
# -----------------------------
output_dir = r"C:\Users\pshas\OneDrive\Desktop\AcademicResearch\IITBombay\placement\DreptoAIML\Project-1\outputs"
X = np.load(os.path.join(output_dir, "preprocessed_features.npy"))
y = np.load(os.path.join(output_dir, "simulated_labels.npy"))

print("[INFO] Loaded preprocessed data.")
print(f"[INFO] Feature shape: {X.shape}, Label shape: {y.shape}")

[INFO] Loaded preprocessed data.
[INFO] Feature shape: (9120, 180), Label shape: (9120,)


In [4]:
# -----------------------------
# STEP 2: Train-Test Split
# -----------------------------
from sklearn.model_selection import train_test_split

# First split: Temp (train+val) vs Test
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Second split: Train vs Val
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

# Print the shapes to verify
print("Train:", X_train.shape, y_train.shape)
print("Validation:", X_val.shape, y_val.shape)
print("Test:", X_test.shape, y_test.shape)

# -----------------------------
# Save for reuse in model_test.py
# -----------------------------
np.save(os.path.join(output_dir, "X_train.npy"), X_train)
np.save(os.path.join(output_dir, "y_train.npy"), y_train)
np.save(os.path.join(output_dir, "X_val.npy"), X_val)
np.save(os.path.join(output_dir, "y_val.npy"), y_val)
np.save(os.path.join(output_dir, "X_test.npy"), X_test)
np.save(os.path.join(output_dir, "y_test.npy"), y_test)

Train: (5836, 180) (5836,)
Validation: (1460, 180) (1460,)
Test: (1824, 180) (1824,)


In [5]:
# -----------------------------
# STEP 3: Build MLP Model
# -----------------------------
model = Sequential([
    Dense(128, activation='relu', input_shape=(X.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [6]:
# -----------------------------
# STEP 4: Train Model
# -----------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=16,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


In [11]:
# -----------------------------
# STEP 5: Evaluate Model
# -----------------------------
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# 1. Basic Evaluation
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"[RESULT] Test Accuracy: {accuracy:.4f}")

# 2. Generate predictions
y_pred = (model.predict(X_test) > 0.5).astype("int32")
y_pred_proba = model.predict(X_test)  # Probabilities for ROC-AUC

# 3. Calculate additional metrics
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)


print(f"[EXTENDED METRICS]")
print(f"Precision: {precision:.4f}")  # True positives / (True positives + False positives)
print(f"Recall: {recall:.4f}")       # True positives / (True positives + False negatives)
print(f"F1-Score: {f1:.4f}")         # Harmonic mean of precision and recall
print(f"ROC-AUC: {roc_auc:.4f}")     # Area under ROC curve (1.0 = perfect)

# 4. Classification report
print("\n[CLASSIFICATION REPORT]")
print(classification_report(y_test, y_pred))

# 5. Confusion matrix

clf_report = classification_report(y_test, y_pred)  # <-- Now properly defined
cm = confusion_matrix(y_test, y_pred)
print("\n[CONFUSION MATRIX]")
print(cm)

[RESULT] Test Accuracy: 0.4973
[EXTENDED METRICS]
Precision: 0.4982
Recall: 0.7741
F1-Score: 0.6063
ROC-AUC: 0.4849

[CLASSIFICATION REPORT]
              precision    recall  f1-score   support

           0       0.49      0.22      0.30       912
           1       0.50      0.77      0.61       912

    accuracy                           0.50      1824
   macro avg       0.50      0.50      0.46      1824
weighted avg       0.50      0.50      0.46      1824


[CONFUSION MATRIX]
[[201 711]
 [206 706]]


In [10]:
# -----------------------------
# STEP 6: Save Model & Plots
# -----------------------------
model.save(os.path.join(output_dir, "mlp_model.h5"))

# Plot loss
plt.figure()
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Validation')
plt.title("Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.savefig(os.path.join(output_dir, "loss_curve.png"))
plt.close()

# Plot accuracy
plt.figure()
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title("Accuracy Curve")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.savefig(os.path.join(output_dir, "accuracy_curve.png"))
plt.close()

# Plot and save confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.savefig(os.path.join(output_dir, 'test_confusion_matrix.png'))
plt.close()

print(f"[INFO] Model and plots saved to {output_dir}/")

[INFO] Model and plots saved to C:\Users\pshas\OneDrive\Desktop\AcademicResearch\IITBombay\placement\DreptoAIML\Project-1\outputs/


In [14]:
#Test Plots

# Save Numerical Metrics to a .txt file
with open(os.path.join(output_dir, 'test_metrics.txt'), 'w') as f:
    f.write(f"Test Accuracy: {accuracy:.4f}\n")
    f.write(f"Precision: {precision:.4f}\n")
    f.write(f"Recall: {recall:.4f}\n")
    f.write(f"F1-Score: {f1:.4f}\n\n")
    f.write("Classification Report:\n")
    f.write(clf_report)
    f.write("\nConfusion Matrix:\n")
    f.write(np.array2string(cm))

# 6. Plot ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, 
         label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")

# Save ROC plot
roc_path = os.path.join(output_dir, 'roc_curve.png')
plt.savefig(roc_path, bbox_inches='tight', dpi=300)
plt.close()

print(f"\n[SAVED] ROC curve saved to {roc_path}")


[SAVED] ROC curve saved to C:\Users\pshas\OneDrive\Desktop\AcademicResearch\IITBombay\placement\DreptoAIML\Project-1\outputs\roc_curve.png
