In [None]:
# 📌 Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score,
    accuracy_score, classification_report, confusion_matrix
)
import xgboost as xgb  # Make sure XGBoost is installed

# 📂 Step 2: Load dataset
df = pd.read_csv('/content/sleep_pattern_analysis_datasett.csv')  # Upload in Colab first

# 🧹 Step 3: Data cleaning
df.drop('Person_ID', axis=1, inplace=True)
df['Work Hours (hrs/day)'] = pd.to_numeric(df['Work Hours (hrs/day)'], errors='coerce')
df['Gender'] = LabelEncoder().fit_transform(df['Gender'])
df.dropna(inplace=True)

# 🎯 Step 4: Split features and target
X = df.drop('Sleep Quality', axis=1)
y = df['Sleep Quality']

# ⚖️ Step 5: Feature scaling (optional)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --------------------------
# 🧠 Step 6A: Regression Model with XGBoost
# --------------------------
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
regressor = xgb.XGBRegressor(random_state=42)
regressor.fit(X_train_r, y_train_r)
y_pred_r = regressor.predict(X_test_r)

print("📊 Regression Evaluation (XGBoost)")
print("MAE:", mean_absolute_error(y_test_r, y_pred_r))
print("MSE:", mean_squared_error(y_test_r, y_pred_r))
print("R²:", r2_score(y_test_r, y_pred_r))

# --------------------------
# 🧠 Step 6B: Classification Model with XGBoost
# --------------------------
# Convert numeric quality into categories
y_class = y.apply(lambda x: 0 if x <= 4 else (1 if x <= 7 else 2))  # 0=Poor, 1=Average, 2=Good

X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_scaled, y_class, test_size=0.2, random_state=42)
classifier = xgb.XGBClassifier(random_state=42)
classifier.fit(X_train_c, y_train_c)
y_pred_c = classifier.predict(X_test_c)

print("\n📊 Classification Evaluation (XGBoost)")
print("Accuracy:", accuracy_score(y_test_c, y_pred_c))
print("Classification Report:\n", classification_report(y_test_c, y_pred_c, target_names=["Poor", "Average", "Good"]))
print("Confusion Matrix:\n", confusion_matrix(y_test_c, y_pred_c))

📊 Regression Evaluation (XGBoost)
MAE: 2.638400077819824
MSE: 9.692822456359863
R²: -0.19400620460510254

📊 Classification Evaluation (XGBoost)
Accuracy: 0.354
Classification Report:
               precision    recall  f1-score   support

        Poor       0.42      0.50      0.46       394
     Average       0.33      0.28      0.30       310
        Good       0.26      0.24      0.25       296

    accuracy                           0.35      1000
   macro avg       0.34      0.34      0.34      1000
weighted avg       0.35      0.35      0.35      1000

Confusion Matrix:
 [[196  90 108]
 [132  87  91]
 [139  86  71]]
