<a href="https://colab.research.google.com/github/ne-adrita/Predicting-Sleep-Quality-through-Behavioral-Pattern-Recognition/blob/main/CatBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install catboost


In [10]:
# 📌 Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score,
    accuracy_score, classification_report, confusion_matrix
)
from catboost import CatBoostRegressor, CatBoostClassifier

# 📂 Load dataset
df = pd.read_csv('/content/sleep_pattern_analysis_datasett (1).csv')

# 🧹 Clean data
df.drop('Person_ID', axis=1, inplace=True)
df['Work Hours (hrs/day)'] = pd.to_numeric(df['Work Hours (hrs/day)'], errors='coerce')
df['Gender'] = LabelEncoder().fit_transform(df['Gender'])
df.dropna(inplace=True)

# 🎯 Features and targets
X = df.drop('Sleep Quality', axis=1)
y = df['Sleep Quality']  # For regression
y_class = y.apply(lambda x: 0 if x <= 4 else (1 if x <= 7 else 2))  # For classification

# ⚖ Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 🔀 Split for classification
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(X_scaled, y_class, test_size=0.2, random_state=42)

# 🔀 Split for regression
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 🤖 Train CatBoostClassifier
cat_cls = CatBoostClassifier(verbose=0)
cat_cls.fit(X_train_cls, y_train_cls)
y_pred_cls = cat_cls.predict(X_test_cls)

# 📊 Classification Evaluation
print("🎯 Classification Accuracy:", accuracy_score(y_test_cls, y_pred_cls))
print("📋 Classification Report:\n", classification_report(y_test_cls, y_pred_cls, target_names=["Poor", "Average", "Good"]))
print("🔁 Confusion Matrix:\n", confusion_matrix(y_test_cls, y_pred_cls))

# 🤖 Train CatBoostRegressor
cat_reg = CatBoostRegressor(verbose=0)
cat_reg.fit(X_train_reg, y_train_reg)
y_pred_reg = cat_reg.predict(X_test_reg)

# 📊 Regression Evaluation
print("🧮 MAE:", mean_absolute_error(y_test_reg, y_pred_reg))
print("📉 MSE:", mean_squared_error(y_test_reg, y_pred_reg))
print("📈 R2 Score:", r2_score(y_test_reg, y_pred_reg))


🎯 Classification Accuracy: 0.349
📋 Classification Report:
               precision    recall  f1-score   support

        Poor       0.42      0.51      0.46       394
     Average       0.30      0.24      0.26       310
        Good       0.28      0.25      0.26       296

    accuracy                           0.35      1000
   macro avg       0.33      0.33      0.33      1000
weighted avg       0.34      0.35      0.34      1000

🔁 Confusion Matrix:
 [[201  88 105]
 [146  74  90]
 [134  88  74]]
🧮 MAE: 2.535750523934219
📉 MSE: 8.848910093175375
📈 R2 Score: -0.09004916212017577
