In [2]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [3]:
# 🧠 CatBoost Regression and Classification on Sleep Dataset

# 📌 Step 1: Import Libraries
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score,
    accuracy_score, classification_report, confusion_matrix
)

# 📂 Step 2: Load and Clean Dataset
df = pd.read_csv('Sleep_Data_Sampled(missing_Dis).csv')
df.drop(columns=['Person ID'], inplace=True)

# Label Encode Categorical Columns
categorical_cols = ['Gender', 'Occupation', 'BMI Category', 'Blood Pressure']
for col in categorical_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

# 🎯 Step 3: Define Features and Targets
X = df.drop(columns=['Quality of Sleep'])
y_reg = df['Quality of Sleep']
y_class = y_reg.apply(lambda x: 0 if x <= 4 else (1 if x <= 7 else 2))  # Classification target

# ⚖ Step 4: Feature Scaling (optional for CatBoost, but we'll apply it for consistency)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 🔀 Step 5: Train/Test Split
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_scaled, y_reg, test_size=0.2, random_state=42)
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_scaled, y_class, test_size=0.2, random_state=42)

# 🟢 Step 6A: CatBoost Regression
regressor = CatBoostRegressor(verbose=0, random_state=42)
regressor.fit(X_train_r, y_train_r)
y_pred_r = regressor.predict(X_test_r)

print("📊 Regression Evaluation (CatBoost)")
print("MAE:", mean_absolute_error(y_test_r, y_pred_r))
print("MSE:", mean_squared_error(y_test_r, y_pred_r))
print("R² Score:", r2_score(y_test_r, y_pred_r))

# 🟢 Step 6B: CatBoost Classification
classifier = CatBoostClassifier(verbose=0, random_state=42)
classifier.fit(X_train_c, y_train_c)
y_pred_c = classifier.predict(X_test_c)

print("\n📊 Classification Evaluation (CatBoost)")
print("Accuracy:", accuracy_score(y_test_c, y_pred_c))
print("Confusion Matrix:\n", confusion_matrix(y_test_c, y_pred_c))
print("Classification Report:\n", classification_report(y_test_c, y_pred_c, target_names=["Poor", "Average", "Good"]))


📊 Regression Evaluation (CatBoost)
MAE: 0.029967933097154547
MSE: 0.005551335286922135
R² Score: 0.9949762838507925

📊 Classification Evaluation (CatBoost)
Accuracy: 0.9983333333333333
Confusion Matrix:
 [[   8    1    0]
 [   0 1611    3]
 [   0    1 1376]]
Classification Report:
               precision    recall  f1-score   support

        Poor       1.00      0.89      0.94         9
     Average       1.00      1.00      1.00      1614
        Good       1.00      1.00      1.00      1377

    accuracy                           1.00      3000
   macro avg       1.00      0.96      0.98      3000
weighted avg       1.00      1.00      1.00      3000

