In [None]:
import pandas as pd

# Adjust path if using Drive mount
DATA_PATH = '/content/Sleep_health_and_lifestyle_dataset.csv'

df = pd.read_csv(DATA_PATH)
print("Shape:", df.shape)
df.head()


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib

from sklearn.model_selection import train_test_split, GridSearchCV, validation_curve
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 1) Load & clean
df = pd.read_csv(DATA_PATH)
df.drop('Person ID', axis=1, inplace=True, errors='ignore')
df['Sleep Disorder'] = df['Sleep Disorder'].fillna('None')

df.dropna(subset=['Age','Blood Pressure','BMI Category'], inplace=True)
bp = df['Blood Pressure'].str.split('/', expand=True).astype(int)
df['systolic_bp'], df['diastolic_bp'] = bp[0], bp[1]
df.drop('Blood Pressure', axis=1, inplace=True)
df['BMI Category'] = df['BMI Category'].str.replace('Weight','').str.strip()

# 2) Prep features & target
X = df.drop('Sleep Disorder', axis=1)
y = LabelEncoder().fit_transform(df['Sleep Disorder'])
numeric_feats = ['Age','systolic_bp','diastolic_bp']
categorical_feats = [c for c in X.columns if c not in numeric_feats]
preprocessor = ColumnTransformer([
    ('num', Pipeline([('scaler', StandardScaler())]), numeric_feats),
    ('cat', Pipeline([('onehot', OneHotEncoder(handle_unknown='ignore'))]), categorical_feats)
])

# 3) Build pipelines + param grids
def make_pipe(clf):
    return Pipeline([('prep',preprocessor),
                     ('select',SelectKBest(f_classif,k=10)),
                     ('clf',clf)])
models = {
    'logreg': (make_pipe(LogisticRegression(max_iter=1000)),
               {'clf__C':[0.01,0.1,1,10]}),
    'knn':    (make_pipe(KNeighborsClassifier()),
               {'clf__n_neighbors':[3,5,7]}),
    'svm':    (make_pipe(SVC()),
               {'clf__C':[0.1,1,10],'clf__kernel':['linear','rbf']}),
    'dt':     (make_pipe(DecisionTreeClassifier()),
               {'clf__max_depth':[None,5,10]})
}

# 4) Train, tune, evaluate
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

best_models = {}
for name,(pipe,params) in models.items():
    print(f"\n▶️ {name}")
    g = GridSearchCV(pipe, params, cv=5, n_jobs=-1, scoring='accuracy')
    g.fit(X_train, y_train)
    y_pred = g.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(" Best params:", g.best_params_)
    print(" Test acc:  ", acc)
    print(classification_report(y_test, y_pred))
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(4,3))
    plt.imshow(cm, cmap='Blues')
    plt.title(f"{name} Confusion")
    plt.colorbar()
    plt.xticks([],[])
    plt.yticks([],[])
    plt.show()
    best_models[name] = (g.best_estimator_, acc)

# 5) Identify & save best
best_name, (best_model, best_acc) = max(best_models.items(), key=lambda x:x[1][1])
print(f"\n🏆 Best: {best_name} ({best_acc:.3f})")
OUTDIR = '/content/drive/MyDrive/models/'  # or './models/'
os.makedirs(OUTDIR, exist_ok=True)
joblib.dump(best_model, os.path.join(OUTDIR, 'sleep_disorder_model.joblib'))
print("Model saved to", OUTDIR)


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# 1) Predict with the saved best model
best_pred = best_model.predict(X_test)

# 2) Compute the confusion matrix
cm = confusion_matrix(y_test, best_pred)

# 3) Plot it with class labels
#    (If you want human-readable labels, make sure you saved your LabelEncoder:
#      le = LabelEncoder(); y = le.fit_transform(df['Sleep Disorder'])
#    )
display_labels = le.classes_ if 'le' in globals() else None

disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=display_labels)
fig, ax = plt.subplots(figsize=(6,6))
disp.plot(ax=ax, cmap='Blues', colorbar=True)
plt.xticks(rotation=45)
plt.title(f'{best_name} Confusion Matrix (acc={best_acc:.2f})')
plt.tight_layout()
plt.show()
