In [1]:
# 🧠 LightGBM Regression and Classification on Sleep Dataset

# 📌 Step 1: Import Libraries
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score,
    accuracy_score, classification_report, confusion_matrix
)

# 📂 Step 2: Load and Clean Dataset
df = pd.read_csv('Sleep_Data_Sampled(missing_Dis).csv')
df.drop(columns=['Person ID'], inplace=True)

# Label Encode Categorical Columns
categorical_cols = ['Gender', 'Occupation', 'BMI Category', 'Blood Pressure']
for col in categorical_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

# 🎯 Step 3: Define Features and Targets
X = df.drop(columns=['Quality of Sleep'])
y_reg = df['Quality of Sleep']
y_class = y_reg.apply(lambda x: 0 if x <= 4 else (1 if x <= 7 else 2))  # Classification target

# ⚖ Step 4: Feature Scaling (important for some models)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 🔀 Step 5: Train/Test Split
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_scaled, y_reg, test_size=0.2, random_state=42)
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_scaled, y_class, test_size=0.2, random_state=42)

# 🟢 Step 6A: LightGBM Regression
regressor = lgb.LGBMRegressor(random_state=42)
regressor.fit(X_train_r, y_train_r)
y_pred_r = regressor.predict(X_test_r)

print("📊 Regression Evaluation (LightGBM)")
print("MAE:", mean_absolute_error(y_test_r, y_pred_r))
print("MSE:", mean_squared_error(y_test_r, y_pred_r))
print("R² Score:", r2_score(y_test_r, y_pred_r))

# 🟢 Step 6B: LightGBM Classification
classifier = lgb.LGBMClassifier(random_state=42)
classifier.fit(X_train_c, y_train_c)
y_pred_c = classifier.predict(X_test_c)

print("\n📊 Classification Evaluation (LightGBM)")
print("Accuracy:", accuracy_score(y_test_c, y_pred_c))
print("Confusion Matrix:\n", confusion_matrix(y_test_c, y_pred_c))
print("Classification Report:\n", classification_report(y_test_c, y_pred_c, target_names=["Poor", "Average", "Good"]))


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000492 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 270
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 10
[LightGBM] [Info] Start training from score 7.131833
📊 Regression Evaluation (LightGBM)
MAE: 0.028832525948128736
MSE: 0.007132528056228545
R² Score: 0.9935453734049962
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000292 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 270
[LightGBM] [Info] Number of data points in the train set: 12000, number of used features: 10
[LightGBM] [Info] Start training from score -5.991465
[LightGBM] [Info] Start training from score -0.621602
[LightGBM] [Info] S




📊 Classification Evaluation (LightGBM)
Accuracy: 0.9983333333333333
Confusion Matrix:
 [[   7    2    0]
 [   0 1611    3]
 [   0    0 1377]]
Classification Report:
               precision    recall  f1-score   support

        Poor       1.00      0.78      0.88         9
     Average       1.00      1.00      1.00      1614
        Good       1.00      1.00      1.00      1377

    accuracy                           1.00      3000
   macro avg       1.00      0.93      0.96      3000
weighted avg       1.00      1.00      1.00      3000





In [None]:
from google.colab import drive
drive.mount('/content/drive')