In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
import joblib

# Load and clean data
data = pd.read_csv("Sleep_health_and_lifestyle_dataset.csv")
data.dropna(inplace=True)
data.drop(columns=['Person ID', 'Occupation', 'Gender', 'BMI Category'], inplace=True)

X = data.drop(columns=['Sleep Disorder', 'Insomnia'])
y_disorder = data['Sleep Disorder']
y_insomnia = data['Insomnia']

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, "scaler.pkl")  # save scaler

# Train-Test Split
X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(X_scaled, y_disorder, test_size=0.3, random_state=42, stratify=y_disorder)
X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(X_scaled, y_insomnia, test_size=0.3, random_state=42, stratify=y_insomnia)

# Sleep Disorder Model
rfe_d = RFE(LogisticRegression(solver='liblinear'), n_features_to_select=9)
rfe_d.fit(X_train_d, y_train_d)
X_train_sel_d = X_train_d[:, rfe_d.support_]
model_d = LogisticRegression(solver='liblinear')
model_d.fit(X_train_sel_d, y_train_d)
joblib.dump(model_d, "model_disorder.pkl")
joblib.dump(rfe_d.support_, "features_disorder.pkl")

# Insomnia Model
rfe_i = RFE(LogisticRegression(solver='liblinear'), n_features_to_select=9)
rfe_i.fit(X_train_i, y_train_i)
X_train_sel_i = X_train_i[:, rfe_i.support_]
model_i = LogisticRegression(solver='liblinear')
model_i.fit(X_train_sel_i, y_train_i)
joblib.dump(model_i, "model_insomnia.pkl")
joblib.dump(rfe_i.support_, "features_insomnia.pkl")

# Save metrics (optional)
report_d = classification_report(y_test_d, model_d.predict(X_test_d[:, rfe_d.support_]), output_dict=True)
report_i = classification_report(y_test_i, model_i.predict(X_test_i[:, rfe_i.support_]), output_dict=True)
joblib.dump(report_d, "report_disorder.pkl")
joblib.dump(report_i, "report_insomnia.pkl")
