In [11]:
# 📦 Importing Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.feature_selection import SelectFromModel

# 📂 Load the dataset
df = pd.read_csv("C:/Users/kondu/Downloads/Day(3)/day3_updated_mental_health.csv")

# 🧠 Target columns for encoding
target_cols = ['Gender', 'Country', 'JobRole', 'Department', 'RemoteWork',
               'HasMentalHealthSupport', 'HasTherapyAccess', 'SalaryRange']

# ✅ Check which columns exist before encoding
cols_to_encode = [col for col in target_cols if col in df.columns]

# 🧼 Apply one-hot encoding safely
df = pd.get_dummies(df, columns=cols_to_encode, drop_first=True)

# 🎯 Define Features and Target
X = df.drop(columns=['BurnoutLevel', 'BurnoutRisk', 'EmployeeID'], errors='ignore')
y = df['BurnoutRisk'].astype(int)  # Ensure target is numeric

# 🔀 Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🌳 Train Decision Tree
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)

# 🌲 Train Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# 🔍 Train k-NN
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)

# 📊 Evaluate All Models
print("📌 Decision Tree - Accuracy:", accuracy_score(y_test, y_pred_tree))
print("📌 Decision Tree - ROC-AUC :", roc_auc_score(y_test, tree_model.predict_proba(X_test)[:, 1]))

print("📌 Random Forest - Accuracy:", accuracy_score(y_test, y_pred_rf))
print("📌 Random Forest - ROC-AUC :", roc_auc_score(y_test, rf_model.predict_proba(X_test)[:, 1]))

print("📌 k-NN - Accuracy:", accuracy_score(y_test, y_pred_knn))
print("📌 k-NN - ROC-AUC :", roc_auc_score(y_test, knn_model.predict_proba(X_test)[:, 1]))

# ⭐ Feature Selection using Random Forest
selector = SelectFromModel(rf_model, threshold="mean", max_features=5)
X_selected = selector.transform(X)
print("Selected Features shape:", X_selected.shape)


📌 Decision Tree - Accuracy: 0.5533333333333333
📌 Decision Tree - ROC-AUC : 0.4999812965248943
📌 Random Forest - Accuracy: 0.6633333333333333
📌 Random Forest - ROC-AUC : 0.5000124689834038
📌 k-NN - Accuracy: 0.58
📌 k-NN - ROC-AUC : 0.5112407885385105
Selected Features shape: (3000, 5)


