In [16]:
# Load the dataset
file_path = r"C:\Users\Arin\Downloads\Sleep_health_and_lifestyle_dataset.csv"
df = pd.read_csv(file_path)

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score

# 🔹 Drop Person ID (not useful for prediction)
df.drop(columns=["Person ID"], inplace=True)

# 🔹 Split Blood Pressure into Systolic & Diastolic
df[['Systolic_BP', 'Diastolic_BP']] = df['Blood Pressure'].str.split('/', expand=True).astype(float)
df.drop(columns=['Blood Pressure'], inplace=True)

# 🔹 Encode categorical features where needed
label_encoders = {}
categorical_cols = ["Gender", "Occupation", "BMI Category", "Sleep Disorder"]

for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

# 🔹 Define features & target
X = df.drop(columns=["Sleep Disorder"])  # Features
y = df["Sleep Disorder"]  # Target

# 🔹 Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 🔹 Train-test split (90% train, 10% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.1, random_state=42, stratify=y)

# 🔹 Train Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# 🔹 Train Logistic Regression Model
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

# 🔹 Evaluate Models
rf_accuracy = accuracy_score(y_test, rf_pred)
rf_precision = precision_score(y_test, rf_pred, average="weighted")

lr_accuracy = accuracy_score(y_test, lr_pred)
lr_precision = precision_score(y_test, lr_pred, average="weighted")

print(f"✅ Random Forest Accuracy: {rf_accuracy:.2f}, Precision: {rf_precision:.2f}")
print(f"✅ Logistic Regression Accuracy: {lr_accuracy:.2f}, Precision: {lr_precision:.2f}")

# --------------------------------------------------------
# 🛠 FUNCTION TO PREDICT NEW DATA
# --------------------------------------------------------
def predict_sleep_disorder(input_data, model_choice="rf"):
    """Predicts sleep disorder for a new person"""
    global scaler, label_encoders

    feature_names = [
        "Gender", "Age", "Occupation", "Sleep Duration",
        "Quality of Sleep", "Physical Activity Level",
        "Stress Level", "BMI Category", "Heart Rate",
        "Daily Steps", "Systolic_BP", "Diastolic_BP"
    ]

    # 🔹 Convert input data into DataFrame
    df_input = pd.DataFrame([input_data], columns=feature_names)

    # 🔹 Encode categorical values
    for col in categorical_cols:
        if col in df_input.columns:
            encoder = label_encoders[col]
            df_input[col] = df_input[col].apply(lambda x: encoder.transform([x])[0] if x in encoder.classes_ else -1)

    # 🔹 Normalize numerical features
    df_input_scaled = scaler.transform(df_input)

    # 🔹 Choose model
    if model_choice == "rf":
        model = rf_model
    else:
        model = lr_model

    # 🔹 Make prediction
    prediction = model.predict(df_input_scaled)

    # 🔹 Convert back to readable class
    sleep_disorder = label_encoders["Sleep Disorder"].inverse_transform([prediction[0]])[0]

    return sleep_disorder
# 🔹 Evaluate Random Forest Model
rf_train_pred = rf_model.predict(X_train)
rf_test_pred = rf_model.predict(X_test)

rf_train_accuracy = accuracy_score(y_train, rf_train_pred)
rf_test_accuracy = accuracy_score(y_test, rf_test_pred)

rf_train_precision = precision_score(y_train, rf_train_pred, average="weighted")
rf_test_precision = precision_score(y_test, rf_test_pred, average="weighted")

# 🔹 Evaluate Logistic Regression Model
lr_train_pred = lr_model.predict(X_train)
lr_test_pred = lr_model.predict(X_test)

lr_train_accuracy = accuracy_score(y_train, lr_train_pred)
lr_test_accuracy = accuracy_score(y_test, lr_test_pred)

lr_train_precision = precision_score(y_train, lr_train_pred, average="weighted")
lr_test_precision = precision_score(y_test, lr_test_pred, average="weighted")

# 🔹 Print Results
print("\n🔹 Model Performance Metrics:")
print(f"✅ Random Forest Train Accuracy: {rf_train_accuracy:.2f}, Test Accuracy: {rf_test_accuracy:.2f}")
print(f"✅ Random Forest Train Precision: {rf_train_precision:.2f}, Test Precision: {rf_test_precision:.2f}")
print(f"✅ Logistic Regression Train Accuracy: {lr_train_accuracy:.2f}, Test Accuracy: {lr_test_accuracy:.2f}")
print(f"✅ Logistic Regression Train Precision: {lr_train_precision:.2f}, Test Precision: {lr_test_precision:.2f}")

# --------------------------------------------------------
# 🎯 USER INPUT FOR PREDICTION
# --------------------------------------------------------

print("\n📌 Enter your details for sleep disorder prediction:")

gender = input("Gender (Male/Female): ")
age = int(input("Age: "))
occupation = input("Occupation: ")
sleep_duration = float(input("Sleep Duration (hours): "))
quality_of_sleep = int(input("Quality of Sleep (1-10): "))
physical_activity = int(input("Physical Activity Level (minutes/day): "))
stress_level = int(input("Stress Level (1-10): "))
bmi_category = input("BMI Category (Underweight/Normal/Overweight/Obese): ")
heart_rate = int(input("Heart Rate (bpm): "))
daily_steps = int(input("Daily Steps: "))
systolic_bp = float(input("Systolic BP: "))
diastolic_bp = float(input("Diastolic BP: "))

# Combine user input into a list
user_input = [
    gender, age, occupation, sleep_duration, quality_of_sleep,
    physical_activity, stress_level, bmi_category, heart_rate,
    daily_steps, systolic_bp, diastolic_bp
]

# Predict with both models
predicted_disorder_rf = predict_sleep_disorder(user_input, model_choice="rf")


print(f"\n🔮 Random Forest Prediction: {predicted_disorder_rf}")


✅ Random Forest Accuracy: 0.95, Precision: 0.95
✅ Logistic Regression Accuracy: 0.92, Precision: 0.93

🔹 Model Performance Metrics:
✅ Random Forest Train Accuracy: 0.93, Test Accuracy: 0.95
✅ Random Forest Train Precision: 0.93, Test Precision: 0.95
✅ Logistic Regression Train Accuracy: 0.90, Test Accuracy: 0.92
✅ Logistic Regression Train Precision: 0.90, Test Precision: 0.93

📌 Enter your details for sleep disorder prediction:


Gender (Male/Female):  Male
Age:  28
Occupation:  Sales Representative
Sleep Duration (hours):  4
Quality of Sleep (1-10):  6
Physical Activity Level (minutes/day):  30
Stress Level (1-10):  8
BMI Category (Underweight/Normal/Overweight/Obese):  Overweight
Heart Rate (bpm):  90
Daily Steps:  3000
Systolic BP:  140
Diastolic BP:  90



🔮 Random Forest Prediction: Sleep Apnea
🔮 Logistic Regression Prediction: Insomnia
