In [None]:
!pip install --quiet scikit-learn xgboost catboost imbalanced-learn joblib


[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m99.2/99.2 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

from imblearn.over_sampling import BorderlineSMOTE
import joblib


In [None]:
df = pd.read_csv("DASS.csv")  # ensure DASS.csv is uploaded in Colab
print(f"‚úÖ Loaded dataset with shape: {df.shape}")
df.head()

‚úÖ Loaded dataset with shape: (1812, 33)


Unnamed: 0,Q1_1,Q1_2,Q1_3,Q1_4,Q1_5,Q1_6,Q3_1_S1,Q3_2_S2,Q3_3_S3,Q3_4_S4,...,Anxiety_Level,Q3_15_D1,Q3_16_D2,Q3_17_D3,Q3_18_D4,Q3_19_D5,Q3_20_D6,Q3_21_D7,Depression_Score,Depression_Level
0,21,2,0,4,4,1,3,3,3,3,...,3,0,3,3,2,3,3,3,17,5
1,18,2,0,4,4,1,3,3,3,3,...,2,0,2,3,0,0,2,0,7,3
2,40,2,1,2,1,1,3,3,1,2,...,5,2,1,1,2,2,1,2,11,4
3,24,1,0,4,2,0,3,3,1,2,...,5,2,1,1,2,2,1,2,11,4
4,50,1,1,2,2,0,3,3,3,2,...,2,1,0,0,1,2,0,0,4,1


In [None]:
# Features
stress_features = [f"Q3_{i}_S{i}" for i in range(1, 8)]
anxiety_features = [f"Q3_{i}_A{i-7}" for i in range(8, 15)]
depression_features = [f"Q3_{i}_D{i-14}" for i in range(15, 22)]

all_features = stress_features + anxiety_features + depression_features
df["Total_Score"] = df[all_features].sum(axis=1)

# Targets
targets = {
    "Stress": "Stress_Level",
    "Anxiety": "Anxiety_Level",
    "Depression": "Depression_Level"
}


In [None]:
def augment_with_noise(X, y, factor=0.05):
    """Adds Gaussian noise for data augmentation."""
    noise = np.random.normal(0, factor, X.shape)
    X_noisy = np.vstack([X, X + noise])
    y_noisy = np.hstack([y, y])
    return X_noisy, y_noisy


In [None]:
def train_model_fast(target_name, target_column):
    print(f"\nüéØ Training Model for {target_name.upper()} (FAST MODE)\n{'='*60}")

    # Features and labels
    X = df[all_features + ["Total_Score"]].apply(pd.to_numeric, errors='coerce').fillna(0)
    y_raw = df[target_column].astype(str)
    le = LabelEncoder()
    y = le.fit_transform(y_raw)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, stratify=y, test_size=0.2, random_state=42
    )

    # Apply BorderlineSMOTE
    sm = BorderlineSMOTE(random_state=42)
    X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

    # Apply Noise Injection
    X_train_res, y_train_res = augment_with_noise(X_train_res, y_train_res, factor=0.05)

    # Define models and hyperparameters (reduced for speed)
    base_models = {
        "Decision Tree": (
            Pipeline([('clf', DecisionTreeClassifier(random_state=42, class_weight='balanced'))]),
            {'clf__max_depth':[None, 10, 15]}
        ),
        "Random Forest": (
            Pipeline([('clf', RandomForestClassifier(n_estimators=150, random_state=42, class_weight='balanced'))]),
            {'clf__max_depth':[None, 10, 20]}
        ),
        "SVM": (
            Pipeline([
                ('scaler', StandardScaler()),
                ('clf', SVC(kernel='rbf', probability=True, class_weight='balanced', random_state=42))
            ]),
            {
                "clf__C": [1, 5, 10, 50],
                "clf__gamma": ["scale", 0.01, 0.1]
            }
        ),
        "MLP": (
            Pipeline([
                ('scaler', StandardScaler()),
                ('clf', MLPClassifier(max_iter=2000, random_state=42))
            ]),
            {
                "clf__hidden_layer_sizes": [(100,), (200,), (200,100)],
                "clf__activation": ["relu", "tanh"],
                "clf__solver": ["adam", "sgd"],
                "clf__learning_rate_init": [0.001, 0.0005],
                "clf__alpha": [0.0001, 0.001]
            }
        ),
        "XGBoost": (
            Pipeline([('clf', XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42))]),
            {'clf__n_estimators':[100, 200], 'clf__max_depth':[3, 6]}
        ),
        "CatBoost": (
            Pipeline([('clf', CatBoostClassifier(verbose=0, auto_class_weights='Balanced', random_state=42))]),
            {'clf__iterations':[200], 'clf__depth':[4, 6]}
        )
    }

    best_f1 = -1
    best_pipe = None
    best_model_name = ""
    results = []

    # Train each model
    for name, (pipe, grid) in base_models.items():
        print(f"\nüîç Tuning {name}...")
        try:
            rs = RandomizedSearchCV(
                pipe, grid, n_iter=10,  # random subset
                cv=3, scoring='f1_weighted', n_jobs=-1, random_state=42
            )
            rs.fit(X_train_res, y_train_res)
            model = rs.best_estimator_
        except Exception as e:
            print(f"‚ö†Ô∏è RandomizedSearch failed for {name}: {e}")
            model = pipe.fit(X_train_res, y_train_res)

        # Evaluate
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted')
        rec = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')

        print(f"‚úÖ {name}: Acc={acc:.4f}, Prec={prec:.4f}, Rec={rec:.4f}, F1={f1:.4f}")

        results.append({
            "Target": target_name,
            "Model": name,
            "Accuracy": acc * 100,
            "Precision": prec * 100,
            "Recall": rec * 100,
            "F1": f1 * 100
        })

        if f1 > best_f1:
            best_f1 = f1
            best_pipe = model
            best_model_name = name

    # Save best model + encoder
    os.makedirs("models_fast", exist_ok=True)
    joblib.dump(best_pipe, f"models_fast/best_{target_name.lower()}_model.joblib")
    joblib.dump(le, f"models_fast/{target_name.lower()}_encoder.joblib")

    print(f"\nüèÜ Best Model for {target_name}: {best_model_name} (F1: {best_f1:.4f})")
    return results


In [None]:
all_results = []
for target, label in targets.items():
    res = train_model_fast(target, label)
    all_results.extend(res)



üéØ Training Model for STRESS (FAST MODE)

üîç Tuning Decision Tree...
‚úÖ Decision Tree: Acc=0.8209, Prec=0.8317, Rec=0.8209, F1=0.8245

üîç Tuning Random Forest...
‚úÖ Random Forest: Acc=0.9091, Prec=0.9161, Rec=0.9091, F1=0.9101

üîç Tuning SVM...
‚úÖ SVM: Acc=0.9146, Prec=0.9160, Rec=0.9146, F1=0.9144

üîç Tuning MLP...
‚úÖ MLP: Acc=0.9835, Prec=0.9839, Rec=0.9835, F1=0.9835

üîç Tuning XGBoost...
‚úÖ XGBoost: Acc=0.9118, Prec=0.9153, Rec=0.9118, F1=0.9126

üîç Tuning CatBoost...
‚úÖ CatBoost: Acc=0.9201, Prec=0.9254, Rec=0.9201, F1=0.9213

üèÜ Best Model for Stress: MLP (F1: 0.9835)

üéØ Training Model for ANXIETY (FAST MODE)

üîç Tuning Decision Tree...
‚úÖ Decision Tree: Acc=0.7824, Prec=0.7966, Rec=0.7824, F1=0.7868

üîç Tuning Random Forest...
‚úÖ Random Forest: Acc=0.8705, Prec=0.8729, Rec=0.8705, F1=0.8710

üîç Tuning SVM...
‚úÖ SVM: Acc=0.9091, Prec=0.9100, Rec=0.9091, F1=0.9087

üîç Tuning MLP...
‚úÖ MLP: Acc=0.9587, Prec=0.9608, Rec=0.9587, F1=0.9582

üîç T

In [None]:
df_results = pd.DataFrame(all_results).round(2)
df_results = df_results.sort_values(by=["Target", "F1"], ascending=[True, False])
df_results.to_csv("models_fast/final_results.csv", index=False)

print("\nüìä Final Results:")
print(df_results.to_string(index=False))



üìä Final Results:
    Target         Model  Accuracy  Precision  Recall    F1
   Anxiety           MLP     95.87      96.08   95.87 95.82
   Anxiety           SVM     90.91      91.00   90.91 90.87
   Anxiety      CatBoost     87.33      87.44   87.33 87.37
   Anxiety Random Forest     87.05      87.29   87.05 87.10
   Anxiety       XGBoost     84.57      84.31   84.57 84.34
   Anxiety Decision Tree     78.24      79.66   78.24 78.68
Depression           MLP     97.52      97.60   97.52 97.52
Depression           SVM     93.94      94.09   93.94 93.97
Depression      CatBoost     90.36      90.72   90.36 90.41
Depression       XGBoost     89.53      89.68   89.53 89.57
Depression Random Forest     87.05      87.61   87.05 87.24
Depression Decision Tree     80.17      79.78   80.17 79.89
    Stress           MLP     98.35      98.39   98.35 98.35
    Stress      CatBoost     92.01      92.54   92.01 92.13
    Stress           SVM     91.46      91.60   91.46 91.44
    Stress       XG

In [None]:
import os

# Make sure directory exists
os.makedirs("models_final", exist_ok=True)

# Save full results
df_results.to_csv("models_final/full_results.csv", index=False)

# Show results grouped by Target (Stress, Anxiety, Depression)
for target in df_results["Target"].unique():
    print(f"\nüìä Evaluation Matrix for {target}")
    print(df_results[df_results["Target"] == target].to_string(index=False))



üìä Evaluation Matrix for Anxiety
 Target         Model  Accuracy  Precision  Recall    F1
Anxiety           MLP     95.87      96.08   95.87 95.82
Anxiety           SVM     90.91      91.00   90.91 90.87
Anxiety      CatBoost     87.33      87.44   87.33 87.37
Anxiety Random Forest     87.05      87.29   87.05 87.10
Anxiety       XGBoost     84.57      84.31   84.57 84.34
Anxiety Decision Tree     78.24      79.66   78.24 78.68

üìä Evaluation Matrix for Depression
    Target         Model  Accuracy  Precision  Recall    F1
Depression           MLP     97.52      97.60   97.52 97.52
Depression           SVM     93.94      94.09   93.94 93.97
Depression      CatBoost     90.36      90.72   90.36 90.41
Depression       XGBoost     89.53      89.68   89.53 89.57
Depression Random Forest     87.05      87.61   87.05 87.24
Depression Decision Tree     80.17      79.78   80.17 79.89

üìä Evaluation Matrix for Stress
Target         Model  Accuracy  Precision  Recall    F1
Stress         