<a href="https://colab.research.google.com/github/olga-yu/ML_models_for_efficient_classroom_usage3/blob/master/ML_Algorithms_smart_campus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd

# Load your dataset
dataset = pd.read_csv('processed_motionData2025_3.csv')

# Select features (X) and target variable (y)
X = dataset[["StudentID", "TimePeriod", 'date-year', 'date-month', 'date-day', 'Season', 'Weekday', 'Semester']]
y = dataset['sensor_mo.mean']  # Ensure this column contains 0s and 1s

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest with the best class weight found earlier
rf = RandomForestClassifier(n_estimators=100, class_weight={0: 1, 1: 3}, random_state=42)
rf.fit(X_train, y_train)

# Extract leaf indices
X_train_leaves = rf.apply(X_train)  # Get leaf indices for training
X_test_leaves = rf.apply(X_test)  # Get leaf indices for testing

# One-hot encode the leaf indices
encoder = OneHotEncoder()
X_train_encoded = encoder.fit_transform(X_train_leaves).toarray()
X_test_encoded = encoder.transform(X_test_leaves).toarray()

# Combine original features with Random Forest leaf features
X_train_combined = np.hstack([X_train, X_train_encoded])
X_test_combined = np.hstack([X_test, X_test_encoded])

print("✅ Random Forest leaf features added. Shape:", X_train_combined.shape)

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define Neural Network
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_combined.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification output
])

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])

# Train model
model.fit(X_train_combined, y_train, validation_data=(X_test_combined, y_test), epochs=20, batch_size=32)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Predict probabilities and labels
y_prob_nn = model.predict(X_test_combined).flatten()
y_pred_nn = (y_prob_nn > 0.5).astype(int)  # Convert probabilities to binary labels

# Calculate performance metrics
final_accuracy = accuracy_score(y_test, y_pred_nn)
final_precision = precision_score(y_test, y_pred_nn)
final_recall = recall_score(y_test, y_pred_nn)
final_f1 = f1_score(y_test, y_pred_nn)
final_roc_auc = roc_auc_score(y_test, y_prob_nn)

# Print final results
print("\n📌 **Final Hybrid Model Performance (RF + NN):**")
print(f"✅ Accuracy: {final_accuracy:.2f}")
print(f"✅ Precision: {final_precision:.2f}")
print(f"✅ Recall: {final_recall:.2f}")
print(f"✅ F1-score: {final_f1:.2f}")
print(f"🚀 ROC-AUC: {final_roc_auc:.3f}")  # This should be better than RF alone!



✅ Random Forest leaf features added. Shape: (4320, 21536)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - AUC: 0.4982 - loss: 1.8470 - val_AUC: 0.5810 - val_loss: 0.5099
Epoch 2/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - AUC: 0.5211 - loss: 0.5502 - val_AUC: 0.5445 - val_loss: 0.5400
Epoch 3/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - AUC: 0.5628 - loss: 0.5397 - val_AUC: 0.5493 - val_loss: 0.5145
Epoch 4/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - AUC: 0.5692 - loss: 0.5373 - val_AUC: 0.5201 - val_loss: 0.5283
Epoch 5/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - AUC: 0.5816 - loss: 0.5235 - val_AUC: 0.5393 - val_loss: 0.5313
Epoch 6/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - AUC: 0.6177 - loss: 0.5203 - val_AUC: 0.5447 - val_loss: 0.5358
Epoch 7/20
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/