In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df= pd.read_csv('StressLevelDataset.csv')
df.head()

In [None]:
print("Shape:", df.shape)
print("Column Names:", df.columns.tolist())

In [None]:
print(df.info())

In [None]:
df.describe()

In [None]:
# Map target values
stress_map = {0: "Low", 1: "Medium", 2: "High"}
df["stress_level_label"] = df["stress_level"].map(stress_map)

In [None]:
df.isnull().sum()

In [None]:
df['stress_level'].unique() #this is our target column

In [None]:
#DESCRIPTIVE ANALYSIS

In [None]:
plt.figure(figsize=(8, 6))

sns.countplot(x='stress_level',hue='stress_level', data=df, palette='coolwarm')

plt.title('Stress Level (0 = Normal, 1 = Eustress, 2= Distress)')
plt.xlabel('Stress Type')
plt.ylabel('Count')
plt.xticks([0, 1, 2], ['Normal', 'Eustress', 'Distress'])

for p in plt.gca().patches:
    count = int(p.get_height())
    x = p.get_x() + p.get_width() / 2
    y = p.get_height() / 2
    plt.text(x, y, str(count), ha='center', va='center', color='black', fontsize=12, fontweight='bold')

plt.show()

In [None]:
#CORRELATION MATRIX
corr_matrix = df.corr()
plt.figure(figsize=(15,15))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Features Correlation')
plt.show()

In [None]:
df.columns

In [None]:
target_corr = corr_matrix['stress_level']

selected_features = target_corr[(target_corr) >= 0.55].index.tolist()

print("Features with Strong Correlation:")
print(selected_features)

In [None]:
features_to_plot = [
    'anxiety_level', 'mental_health_history', 'depression', 'headache', 'breathing_problem', 'noise_level', 
    'study_load', 'future_career_concerns', 'peer_pressure', 'extracurricular_activities', 'bullying', 'stress_level'
]

import warnings
warnings.filterwarnings('ignore')

plt.figure(figsize=(18, 16))  # perbesar figurenya untuk subplot 4x3
for i, feature in enumerate(features_to_plot):
    plt.subplot(4, 3, i + 1)  # 4 baris x 3 kolom = 12 subplot
    sns.histplot(data=df, x=feature, hue='stress_level', kde=True, palette='coolwarm')
    plt.title(f'Distribution of {feature}')
    plt.xlabel('')
    plt.ylabel('')

plt.tight_layout()
plt.show()

In [None]:
#OUTLIER CHECKING
plt.figure(figsize=(18, 16))  
for i, feature in enumerate(features_to_plot):
    plt.subplot(4, 3, i + 1) 
    sns.boxplot(data=df, hue='stress_level', x='stress_level', y=feature, palette='coolwarm')
    plt.title(f'Outlier Check: {feature}')
    plt.xlabel('')
    plt.ylabel('')

plt.tight_layout()
plt.show()

In [None]:
df_ML = df.copy()
df_ML.sample(5)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
X= df_ML.drop('stress_level', axis=1)
y= df_ML['stress_level']

print("Feature (X) dimension: ", X.shape)
print("Feature (y) dimension: ", y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)

print(f"Training Set Size: {X_train.shape[0]}")
print(f"Test Set Size: {X_test.shape[0]}")

In [None]:
#MODELING

In [None]:
selected_features = [
    'anxiety_level', 'mental_health_history', 'depression', 'headache', 'breathing_problem', 'noise_level', 
    'study_load', 'future_career_concerns', 'peer_pressure', 'extracurricular_activities', 'bullying'
]

scaler = StandardScaler()
X_train[selected_features] = scaler.fit_transform(X_train[selected_features])
X_test[selected_features] = scaler.transform(X_test[selected_features])

In [None]:
models = {
    'Logistic Regression': LogisticRegression(random_state=42, solver='liblinear'), 
    'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100), 
    'XGBoost': XGBClassifier(random_state=42, eval_metric='logloss'),
    'SVM': SVC(random_state=42, probability=True, kernel='rbf'),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42, n_estimators=100),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'AdaBoost': AdaBoostClassifier(random_state=42, n_estimators=100)
}

In [None]:
trained_models = {}

for name, model in models.items():
    print(f"[{name}] The model is being trained...")
    model.fit(X_train, y_train)
    trained_models[name] = model
    print(f"[{name}] model training is completed.")
    print()

print("\nAll models were trained successfully.")

In [None]:
#EVALUATION MODEL

In [None]:
results = {}

for name, model in trained_models.items():
    print(f"\n--- {name} Evaluating the Model ---")

    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')  # 'weighted' / micro
    roc_auc = roc_auc_score(y_test, y_proba, multi_class='ovr')
    cm = confusion_matrix(y_test, y_pred)

    results[name] = {
        'Accuracy': accuracy,
        'F1 Score': f1,
        'ROC-AUC': roc_auc,
        'Confusion Matrix': cm
    }
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"ROC-AUC: {roc_auc:.4f}")
    print("Confusion Matrix:\n", cm)

    plt.figure(figsize=(6, 5))
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No Stress', 'Normal Stress', 'High Stress'])
    disp.plot(cmap='Blues', values_format='d')
    plt.title(f'{name} Confusion Matrix')
    plt.show()

In [None]:
#BAR PLOT MODEL

In [None]:
# Convert results dict to DataFrame
metrics_df = pd.DataFrame(results).T[['Accuracy', 'F1 Score', 'ROC-AUC']]
metrics_df = metrics_df.sort_values('ROC-AUC', ascending=False)

In [None]:
metrics_df.plot(kind='bar', figsize=(10, 6), colormap='Set1')
plt.title("Model Comparison: Accuracy, F1 Score, ROC-AUC")
plt.ylabel("Score")
plt.ylim(0.7, 1.05)
plt.xticks(rotation=45)
plt.legend(loc='lower right')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()