<a href="https://colab.research.google.com/github/sharanya-sharma/Stress-Level-Detector/blob/main/Stress_Level_Detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, confusion_matrix
from sklearn.model_selection import GridSearchCV
from imblearn.over_sampling import SMOTE

In [None]:
# Load dataset
df = pd.read_csv("/content/stress_detection_dataset.csv")
df.dropna(inplace=True)

In [None]:
df.head()

Unnamed: 0,Age,Gender,Sleep Hours,Daily Study Hours,Social Interaction Level,Physical Activity Level,Anxiety Level,Depression Level,Self-Esteem,Family Support,Financial Stress,Academic Pressure,Stress Level
0,24,Female,6.5,7.5,High,Medium,Moderate,Severe,High,Low,Medium,Medium,High
1,21,Female,5.8,1.9,Medium,Medium,Severe,Mild,High,Medium,Medium,Low,Low
2,28,Male,6.8,6.2,Medium,Medium,Moderate,Moderate,Medium,Low,Medium,Medium,High
4,22,Male,7.7,1.4,Medium,Low,Severe,Moderate,Low,Medium,Medium,Medium,Medium
5,24,Female,6.7,5.8,Medium,Low,Mild,Severe,Medium,Medium,Medium,High,Medium


In [None]:
# Encode categorical variables
categorical_columns = ["Gender", "Social Interaction Level", "Physical Activity Level", "Anxiety Level", "Depression Level", "Self-Esteem", "Family Support", "Financial Stress", "Academic Pressure", "Stress Level"]
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [None]:
# Define features and target variable
X = df.drop(columns=["Stress Level"])
y = df["Stress Level"]

In [None]:
df.head()

Unnamed: 0,Age,Gender,Sleep Hours,Daily Study Hours,Social Interaction Level,Physical Activity Level,Anxiety Level,Depression Level,Self-Esteem,Family Support,Financial Stress,Academic Pressure,Stress Level
0,24,0,6.5,7.5,0,2,1,2,0,1,2,2,0
1,21,0,5.8,1.9,2,2,2,0,0,2,2,1,1
2,28,1,6.8,6.2,2,2,1,1,2,1,2,2,0
4,22,1,7.7,1.4,2,1,2,1,1,2,2,2,2
5,24,0,6.7,5.8,2,1,0,2,2,2,2,0,2


In [None]:
# Handle class imbalance
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [None]:
# Check class distribution after SMOTE
print("Class distribution after SMOTE:")
print(pd.Series(y_resampled).value_counts())

Class distribution after SMOTE:
Stress Level
0    219
1    219
2    219
Name: count, dtype: int64


In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [None]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, n_jobs=-1, verbose=1, return_train_score=True)
grid_search.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 81 candidates, totalling 405 fits


In [None]:
# Cross-validation scores
cv_results = grid_search.cv_results_
cv_scores = cv_results['mean_test_score']
print(f"Cross-validation Accuracy Scores: {cv_scores}")
print(f"Mean Accuracy: {np.mean(cv_scores):.2f} ± {np.std(cv_scores):.2f}")

Cross-validation Accuracy Scores: [0.92761905 0.93714286 0.94095238 0.92190476 0.93142857 0.93333333
 0.91809524 0.92380952 0.92380952 0.92952381 0.93142857 0.93714286
 0.93904762 0.92952381 0.93333333 0.92761905 0.92571429 0.92380952
 0.91809524 0.92       0.91809524 0.91809524 0.92       0.91809524
 0.92380952 0.91619048 0.92190476 0.93142857 0.93333333 0.93904762
 0.92380952 0.93142857 0.92952381 0.91619048 0.92380952 0.92380952
 0.93142857 0.93714286 0.93714286 0.93714286 0.92571429 0.93333333
 0.92952381 0.92380952 0.92190476 0.91809524 0.92       0.91619048
 0.91809524 0.92       0.91619048 0.92190476 0.91619048 0.92380952
 0.93142857 0.93333333 0.93904762 0.92380952 0.93142857 0.92952381
 0.91619048 0.92380952 0.92380952 0.93142857 0.93714286 0.93714286
 0.93714286 0.92571429 0.93333333 0.92952381 0.92380952 0.92190476
 0.91809524 0.92       0.91619048 0.91809524 0.92       0.91619048
 0.92190476 0.91619048 0.92380952]
Mean Accuracy: 0.93 ± 0.01


In [None]:
best_rf_model = grid_search.best_estimator_

In [None]:
# Train model
best_rf_model.fit(X_train_scaled, y_train)

In [None]:
# Feature importance
feature_importances = pd.Series(best_rf_model.feature_importances_, index=X.columns).sort_values(ascending=False)
important_features = feature_importances[:10].index.tolist()

In [None]:
# Retrain with top features
X_train_selected = X_train[important_features]
X_test_selected = X_test[important_features]
X_train_scaled = scaler.fit_transform(X_train_selected)
X_test_scaled = scaler.transform(X_test_selected)
best_rf_model.fit(X_train_scaled, y_train)

In [None]:
# Predictions
y_pred = best_rf_model.predict(X_test_scaled)

In [None]:
# Evaluate Model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mcc = matthews_corrcoef(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

In [None]:
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print(f"MCC: {mcc:.2f}")
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.98
Precision: 0.99
Recall: 0.98
F1-Score: 0.98
MCC: 0.98
Confusion Matrix:
 [[39  0  1]
 [ 0 47  0]
 [ 0  1 44]]


In [None]:
# Dummy Input for Prediction
def predict_stress_level(dummy_input):
    dummy_df = pd.DataFrame(dummy_input, columns=X.columns)
    dummy_input_selected = dummy_df[important_features]
    dummy_input_scaled = scaler.transform(dummy_input_selected)

    # Predict stress level
    stress_prediction = best_rf_model.predict(dummy_input_scaled)
    stress_probs = best_rf_model.predict_proba(dummy_input_scaled)

    # Ensure correct label mapping
    unique_classes = np.sort(df["Stress Level"].unique())
    stress_label = unique_classes[stress_prediction][0]

    # Map numeric labels to descriptions
    stress_levels = {0: "0-Low", 1: "1-Moderate", 2: "2-High"}
    stress_description = stress_levels.get(stress_label, "Unknown")

    # Display probabilities with class labels
    class_probabilities = dict(zip(unique_classes, stress_probs[0]))
    print("Class Probabilities:", class_probabilities)
    print("Predicted Stress Level:", stress_description)

# Example usage
dummy_input = np.array([[24,0,6.7,5.8,2,1,0,2,2,2,2,0]])
predict_stress_level(dummy_input)

Class Probabilities: {0: 0.03833333333333333, 1: 0.058026284348865004, 2: 0.9036403823178015}
Predicted Stress Level: 2-High
