In [None]:

# Sleep Quality Prediction using Machine Learning (Regression & Classification)

## 📌 Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, classification_report, confusion_matrix

## 📂 Step 2: Load Dataset
df = pd.read_csv('/content/sleep_pattern_analysis_datasett.csv')  # Upload this file to Colab first

## 🧹 Step 3: Data Cleaning
df.drop('Person_ID', axis=1, inplace=True)
df['Work Hours (hrs/day)'] = pd.to_numeric(df['Work Hours (hrs/day)'], errors='coerce')
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])
df.dropna(inplace=True)

## 🎯 Step 4: Feature and Target Split
X = df.drop('Sleep Quality', axis=1)
y = df['Sleep Quality']

## ⚖️ Step 5: Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --------------------------
# 🧠 Step 6A: Regression Model
# --------------------------
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
regressor = RandomForestRegressor(random_state=42)
regressor.fit(X_train_r, y_train_r)
y_pred_r = regressor.predict(X_test_r)

print("📊 Regression Evaluation")
print("MAE:", mean_absolute_error(y_test_r, y_pred_r))
print("MSE:", mean_squared_error(y_test_r, y_pred_r))
print("R²:", r2_score(y_test_r, y_pred_r))

# --------------------------
# 🧠 Step 6B: Classification Model
# --------------------------
y_class = y.apply(lambda x: 0 if x <= 4 else (1 if x <= 7 else 2))  # 0=Poor, 1=Average, 2=Good
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_scaled, y_class, test_size=0.2, random_state=42)
classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train_c, y_train_c)
y_pred_c = classifier.predict(X_test_c)

print("\n📊 Classification Evaluation")
print("Accuracy:", accuracy_score(y_test_c, y_pred_c))
print("Classification Report:\n", classification_report(y_test_c, y_pred_c, target_names=["Poor", "Average", "Good"]))
print("Confusion Matrix:\n", confusion_matrix(y_test_c, y_pred_c))


📊 Regression Evaluation
MAE: 2.47025
MSE: 8.358985100000002
R²: -0.02969796375910061

📊 Classification Evaluation
Accuracy: 0.366
Classification Report:
               precision    recall  f1-score   support

        Poor       0.40      0.64      0.50       394
     Average       0.35      0.23      0.28       310
        Good       0.25      0.14      0.18       296

    accuracy                           0.37      1000
   macro avg       0.33      0.34      0.32      1000
weighted avg       0.34      0.37      0.33      1000

Confusion Matrix:
 [[254  61  79]
 [190  70  50]
 [186  68  42]]
