In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
file_path = "Sleep_health_and_lifestyle_dataset.csv"  # Replace with your dataset path
data = pd.read_csv(file_path)

# Drop unnecessary columns if any (e.g., Person ID, as it may not be a useful feature)
data = data.drop(columns=["Person ID"])

# Check for missing values and fill them (if needed)
if data.isnull().sum().any():
    data.fillna(method='ffill', inplace=True)

# Split the dataset into features and target (replace "Target_Column_Name" with your actual target column)
# Assuming "Sleep Disorder" is the target column
X = data.drop(columns=["Sleep Disorder"])
y = data["Sleep Disorder"]

# Print the preprocessing summary
print("Preprocessing complete!")
print("Shape of Features (X):", X.shape)
print("Shape of Target (y):", y.shape)


In [None]:
from sklearn.preprocessing import LabelEncoder

# Initialize a dictionary to store the label encoders for reference
label_encoders = {}

# Identify categorical columns
categorical_columns = X.select_dtypes(include=["object"]).columns

# Apply Label Encoding to each categorical column
for column in categorical_columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = dict(zip(le.classes_, le.transform(le.classes_)))
    print(f"Label Encoding for '{column}': {label_encoders[column]}")

# Encode the target column (if it's categorical)
if y.dtype == "object":
    target_encoder = LabelEncoder()
    y = target_encoder.fit_transform(y)
    print(f"\nLabel Encoding for Target ('Sleep Disorder'): {dict(zip(target_encoder.classes_, target_encoder.transform(target_encoder.classes_)))}")


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train and evaluate K-Nearest Neighbors (KNN)
print("Training KNN...")
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_predictions = knn.predict(X_test)
print(f"KNN Accuracy: {accuracy_score(y_test, knn_predictions):.4f}")

# Train and evaluate Support Vector Machine (SVM)
print("\nTraining SVM...")
svm = SVC(probability=True)
svm.fit(X_train, y_train)
svm_predictions = svm.predict(X_test)
print(f"SVM Accuracy: {accuracy_score(y_test, svm_predictions):.4f}")

# Train and evaluate Decision Tree (DT)
print("\nTraining Decision Tree...")
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
dt_predictions = dt.predict(X_test)
print(f"Decision Tree Accuracy: {accuracy_score(y_test, dt_predictions):.4f}")

# Train and evaluate Random Forest (RF)
print("\nTraining Random Forest...")
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
rf_predictions = rf.predict(X_test)
print(f"Random Forest Accuracy: {accuracy_score(y_test, rf_predictions):.4f}")


In [None]:
# Train and evaluate Artificial Neural Network (ANN)
print("\nTraining ANN...")
ann = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),
    Dense(32, activation='relu'),
    Dense(len(set(y)), activation='softmax')  # Adjust output layer for the number of classes in target
])
ann.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
ann.fit(X_train, y_train, epochs=50, batch_size=16, verbose=1)
ann_accuracy = ann.evaluate(X_test, y_test, verbose=0)[1]
print(f"ANN Accuracy: {ann_accuracy:.4f}")

In [None]:
from sklearn.preprocessing import StandardScaler
import numpy as np

# Generate probabilities from Random Forest as additional features
rf_proba_train = rf.predict_proba(X_train)
rf_proba_test = rf.predict_proba(X_test)

# Combine RF probabilities with original features
X_train_hybrid = np.hstack((X_train, rf_proba_train))
X_test_hybrid = np.hstack((X_test, rf_proba_test))

# Standardize the combined features for ANN
scaler = StandardScaler()
X_train_hybrid = scaler.fit_transform(X_train_hybrid)
X_test_hybrid = scaler.transform(X_test_hybrid)

# Train the ANN on the hybrid features
print("\nTraining RF + ANN Hybrid Model...")
hybrid_ann = Sequential([
    Dense(128, activation='relu', input_dim=X_train_hybrid.shape[1]),
    Dense(64, activation='relu'),
    Dense(len(set(y)), activation='softmax')  # Adjust output layer for the number of classes
])
hybrid_ann.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
hybrid_ann.fit(X_train_hybrid, y_train, epochs=15, batch_size=16, verbose=1)

# Evaluate the hybrid model
hybrid_accuracy = hybrid_ann.evaluate(X_test_hybrid, y_test, verbose=0)[1]
print(f"\nHybrid Model (RF + ANN) Accuracy: {hybrid_accuracy:.4f}")


In [None]:
import joblib

class HybridModel:
    def __init__(self, rf_model, ann_model, scaler):
        self.rf_model = rf_model
        self.ann_model = ann_model
        self.scaler = scaler

    def predict(self, X):
        # Generate probabilities from RF
        rf_proba = self.rf_model.predict_proba(X)
        # Combine RF probabilities with original features
        X_hybrid = np.hstack((X, rf_proba))
        # Standardize the hybrid features
        X_hybrid = self.scaler.transform(X_hybrid)
        # Predict with the ANN
        return self.ann_model.predict(X_hybrid)

# Create the hybrid model object
hybrid_model = HybridModel(rf_model=rf, ann_model=hybrid_ann, scaler=scaler)

# Save the hybrid model to a file
hybrid_model_path = "hybrid_model.pkl"
joblib.dump(hybrid_model, hybrid_model_path)
print(f"Hybrid model saved to {hybrid_model_path}")


In [None]:
import joblib
import numpy as np

# Define the HybridModel class
class HybridModel:
    def __init__(self, rf_model, ann_model, scaler):
        self.rf_model = rf_model
        self.ann_model = ann_model
        self.scaler = scaler

    def predict(self, X):
        # Generate probabilities from RF
        rf_proba = self.rf_model.predict_proba(X)
        # Combine RF probabilities with original features
        X_hybrid = np.hstack((X, rf_proba))
        # Standardize the hybrid features
        X_hybrid = self.scaler.transform(X_hybrid)
        # Predict with the ANN
        return self.ann_model.predict(X_hybrid)

def predict_user_input(hybrid_model_path):
    """
    Load the hybrid model and predict results based on user input.

    Args:
        hybrid_model_path (str): Path to the saved hybrid model.

    Returns:
        None
    """
    # Load the hybrid model
    hybrid_model = joblib.load(hybrid_model_path)
    print("Hybrid model loaded successfully!")

    # Collect user input
    print("\nEnter the following details for prediction:")
    try:
        gender = input("Gender (Male/Female): ").strip().title()
        age = int(input("Age (e.g., 25): ").strip())
        occupation = input("Occupation (e.g., Engineer, Doctor): ").strip().title()
        sleep_duration = float(input("Sleep Duration (in hours, e.g., 6.5): ").strip())
        quality_of_sleep = int(input("Quality of Sleep (1-10): ").strip())
        physical_activity = int(input("Physical Activity Level (e.g., 30): ").strip())
        stress_level = int(input("Stress Level (1-10): ").strip())
        bmi_category = input("BMI Category (e.g., Normal, Overweight): ").strip().title()
        blood_pressure = input("Blood Pressure (e.g., 120/80): ").strip()
        heart_rate = int(input("Heart Rate (e.g., 75): ").strip())
        daily_steps = int(input("Daily Steps (e.g., 5000): ").strip())
    except ValueError as e:
        print(f"Invalid input. Error: {e}")
        return

    # Prepare the input array using provided mappings
    gender_mapping = {'Female': 0, 'Male': 1}
    occupation_mapping = {'Accountant': 0, 'Doctor': 1, 'Engineer': 2, 'Lawyer': 3, 'Manager': 4, 
                          'Nurse': 5, 'Sales Representative': 6, 'Salesperson': 7, 'Scientist': 8, 
                          'Software Engineer': 9, 'Teacher': 10}
    bmi_mapping = {'Normal': 0, 'Normal Weight': 1, 'Obese': 2, 'Overweight': 3}
    blood_pressure_mapping = {'115/75': 0, '115/78': 1, '117/76': 2, '118/75': 3, '118/76': 4, 
                               '119/77': 5, '120/80': 6, '121/79': 7, '122/80': 8, '125/80': 9, 
                               '125/82': 10, '126/83': 11, '128/84': 12, '128/85': 13, '129/84': 14, 
                               '130/85': 15, '130/86': 16, '131/86': 17, '132/87': 18, '135/88': 19, 
                               '135/90': 20, '139/91': 21, '140/90': 22, '140/95': 23, '142/92': 24}

    # Convert inputs to numerical values
    input_data = [
        gender_mapping.get(gender, -1),
        age,
        occupation_mapping.get(occupation, -1),
        sleep_duration,
        quality_of_sleep,
        physical_activity,
        stress_level,
        bmi_mapping.get(bmi_category, -1),
        blood_pressure_mapping.get(blood_pressure, -1),
        heart_rate,
        daily_steps
    ]

    # Debug: Print the mapped input data
    print("\nMapped Input Data:", input_data)

    # Check for unmapped inputs
    if -1 in input_data:
        print("Some inputs could not be mapped correctly. Please check your inputs.")
        print("Unmapped Values:", [i for i, val in enumerate(input_data) if val == -1])
        return

    # Convert input data to a NumPy array and reshape for prediction
    input_array = np.array(input_data).reshape(1, -1)

    # Predict using the hybrid model
    prediction = hybrid_model.predict(input_array)
    predicted_class = np.argmax(prediction)

    # Map the prediction back to the sleep disorder label
    target_mapping = {0: 'Insomnia', 1: 'Sleep Apnea', 2: 'No-disorder'}
    print("\nPrediction Result:")
    print(f"The predicted sleep disorder class is: {target_mapping.get(predicted_class, 'Unknown')}")

# Example usage
if __name__ == "__main__":
    hybrid_model_path = "hybrid_model.pkl"  # Path to the saved hybrid model
    predict_user_input(hybrid_model_path)
