**Import all important Libraries**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib
from imblearn.over_sampling import SMOTE

**Load the dataset**

In [2]:
file_path ='/kaggle/input/stress-level-dataset/Stress Dataset _ Tutorial-1.csv'
df = pd.read_csv(file_path)

In [3]:
print("Dataset Loaded Successfully")

Dataset Loaded Successfully


In [4]:
print(df.head())

   Age      Gender Marital_Status   Job_Role  Experience_Years  \
0   56  Non-Binary        Widowed  Developer                 5   
1   46      Female         Single    Analyst                20   
2   32      Female         Single  Developer                10   
3   60      Female       Divorced    Analyst                26   
4   25        Male        Married      Sales                29   

   Monthly_Salary_INR  Working_Hours_per_Week  Commute_Time_Hours  \
0           102703.08                      44                1.72   
1           165583.91                      54                0.62   
2            38005.39                      81                1.22   
3            51127.36                      79                2.03   
4           174597.45                      63                1.45   

   Remote_Work Health_Issues  ... Family_Support_Level Job_Satisfaction  \
0         True           NaN  ...                    2                5   
1         True           NaN  ...     

**Preprocessing**

In [5]:
# Checking for missing values
df.dropna(inplace=True)
print("Missing values dropped")

Missing values dropped


**Encoding categorical columns (if any)**

In [6]:
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le
print("Categorical features encoded")

Categorical features encoded


**Splitting features and target**

In [7]:
X = df.iloc[:, :-1]  # Assuming last column is target
y = df.iloc[:, -1]

**Normalizing data**

In [8]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print("Feature scaling applied")

Feature scaling applied


**Handling Class Imbalance**

In [9]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)
print("Class imbalance handled with SMOTE")

Class imbalance handled with SMOTE


**Train-Test Split**

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.1, random_state=42)
print("Data split into training and testing sets")

Data split into training and testing sets


**Train Model with Hyperparameter Tuning**

In [11]:
model = XGBClassifier(n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)
print("Model training complete with XGBoost")

Model training complete with XGBoost


**Evaluate Model**

In [12]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 8.33%


**Save Model**

In [13]:
joblib.dump(model, 'stress_model.pkl')
print("Model saved successfully")

Model saved successfully
