<a href="https://colab.research.google.com/github/sharanya-sharma/Stress-Level-Detector/blob/main/Stress_Level_Detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [113]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, confusion_matrix
from imblearn.over_sampling import SMOTE
import streamlit as st
import joblib

In [114]:
# Load dataset
df = pd.read_csv("/content/stress_detection_dataset.csv")
df.dropna(inplace=True)

In [116]:
# Load the trained model
joblib.dump(model, 'stress_level_model.pkl')
model = joblib.load('stress_level_model.pkl')

In [118]:
joblib.dump(scaler, 'scaler.pkl')  # Save the scaler used for feature scaling

['scaler.pkl']

In [119]:
df.head()

Unnamed: 0,Age,Gender,Sleep Hours,Daily Study Hours,Social Interaction Level,Physical Activity Level,Anxiety Level,Depression Level,Self-Esteem,Family Support,Financial Stress,Academic Pressure,Stress Level
0,24,Female,6.5,7.5,High,Medium,Moderate,Severe,High,Low,Medium,Medium,High
1,21,Female,5.8,1.9,Medium,Medium,Severe,Mild,High,Medium,Medium,Low,Low
2,28,Male,6.8,6.2,Medium,Medium,Moderate,Moderate,Medium,Low,Medium,Medium,High
4,22,Male,7.7,1.4,Medium,Low,Severe,Moderate,Low,Medium,Medium,Medium,Medium
5,24,Female,6.7,5.8,Medium,Low,Mild,Severe,Medium,Medium,Medium,High,Medium


In [120]:
# Encoding Dictionary
encoding_dict = {
    "Gender": {"Male": 0, "Female": 1, "Other": 2},
    "Social Interaction Level": {"Low": 0, "Medium": 1, "High": 2},
    "Physical Activity Level": {"Low": 0, "Medium": 1, "High": 2},
    "Anxiety Level": {"None": 0, "Mild": 1, "Moderate": 2, "Severe": 3},
    "Depression Level": {"None": 0, "Mild": 1, "Moderate": 2, "Severe": 3},
    "Self-Esteem": {"Low": 0, "Medium": 1, "High": 2},
    "Family Support": {"Low": 0, "Medium": 1, "High": 2},
    "Financial Stress": {"Low": 0, "Medium": 1, "High": 2},
    "Academic Pressure": {"Low": 0, "Medium": 1, "High": 2},
    "Stress Level": {"Low": 0, "Medium": 1, "High": 2},
}

In [121]:
# Explicitly convert categorical columns to integers using the encoding_dict
for col in df.select_dtypes(include=['object']).columns:
    if col in encoding_dict:
        # Map values using the dictionary, and handle errors if an unseen category exists
        try:
            df[col] = df[col].map(encoding_dict[col])
        except KeyError as e:
            print(f"Unseen category found in column '{col}': {e}")
            df[col] = df[col].fillna(-1)

In [122]:
df.head()

Unnamed: 0,Age,Gender,Sleep Hours,Daily Study Hours,Social Interaction Level,Physical Activity Level,Anxiety Level,Depression Level,Self-Esteem,Family Support,Financial Stress,Academic Pressure,Stress Level
0,24,1,6.5,7.5,2,1,2,3,2,0,1,1,2
1,21,1,5.8,1.9,1,1,3,1,2,1,1,0,0
2,28,0,6.8,6.2,1,1,2,2,1,0,1,1,2
4,22,0,7.7,1.4,1,0,3,2,0,1,1,1,1
5,24,1,6.7,5.8,1,0,1,3,1,1,1,2,1


In [123]:
# Define features and target variable
X = df.drop(columns=["Stress Level"])
y = df["Stress Level"]

In [124]:
# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [125]:
# Check class distribution after SMOTE
print("Class distribution after SMOTE:")
print(pd.Series(y_resampled).value_counts())

Class distribution after SMOTE:
Stress Level
2    219
0    219
1    219
Name: count, dtype: int64


In [126]:
# Split data into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [127]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [128]:
# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, n_jobs=-1, verbose=1, return_train_score=True)
grid_search.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 81 candidates, totalling 405 fits


In [129]:
# Best model selection
model = grid_search.best_estimator_

In [130]:
# Train model on best hyperparameters
model.fit(X_train_scaled, y_train)

In [131]:
# Feature importance selection
feature_importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
important_features = feature_importances.index.tolist()

In [132]:
# Retrain with all features
X_train_selected = X_train[important_features]
X_test_selected = X_test[important_features]
X_train_scaled = scaler.fit_transform(X_train_selected)
X_test_scaled = scaler.transform(X_test_selected)
model.fit(X_train_scaled, y_train)

In [133]:
# Predictions
y_pred = model.predict(X_test_scaled)

In [134]:
# Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
mcc = matthews_corrcoef(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

In [137]:
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print(f"MCC: {mcc:.2f}")
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.95
Precision: 0.95
Recall: 0.95
F1-Score: 0.95
MCC: 0.92
Confusion Matrix:
 [[43  0  0]
 [ 2 41  2]
 [ 0  3 41]]


In [138]:
# Function to Predict Stress Level
def predict_stress_level(dummy_input):
    # Create a DataFrame from the dummy input and print it for checking
    dummy_df = pd.DataFrame(dummy_input, columns=X.columns)
    print("Dummy Input Data (DataFrame):")
    print(dummy_df)

    dummy_input_selected = dummy_df[important_features]
    dummy_input_scaled = scaler.transform(dummy_input_selected)

    # Predict stress level
    stress_prediction = model.predict(dummy_input_scaled)
    stress_probs = model.predict_proba(dummy_input_scaled)

    # Ensure correct label mapping
    stress_label = stress_prediction[0]  # Directly access the prediction

    # Map numeric labels to descriptions
    stress_levels = {0: "0-Low", 1: "1-Moderate", 2: "2-High"}
    stress_description = stress_levels.get(stress_label, "Unknown")

    # Display probabilities with class labels
    class_probabilities = dict(zip(range(len(stress_probs[0])), stress_probs[0]))
    print("Class Probabilities:", class_probabilities)
    print("Predicted Stress Level:", stress_description)

# Example Usage
dummy_input = np.array([[60, 1, 6.7, 5.8, 1, 2, 1, 3, 1, 2, 2, 2]])
predict_stress_level(dummy_input)

Dummy Input Data (DataFrame):
    Age  Gender  Sleep Hours  Daily Study Hours  Social Interaction Level  \
0  60.0     1.0          6.7                5.8                       1.0   

   Physical Activity Level  Anxiety Level  Depression Level  Self-Esteem  \
0                      2.0            1.0               3.0          1.0   

   Family Support  Financial Stress  Academic Pressure  
0             2.0               2.0                2.0  
Class Probabilities: {0: np.float64(0.09), 1: np.float64(0.43), 2: np.float64(0.48)}
Predicted Stress Level: 2-High
