# Load and Preprocess the Dataset and Train Modal

In [None]:
!pip install xgboost

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from sklearn.impute import SimpleImputer
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from xgboost import plot_importance
from sklearn.preprocessing import LabelEncoder, StandardScaler
from google.colab import drive


csv_file = './Data/Sleep_health_and_lifestyle_dataset2.csv'

# data = pd.read_csv('Sleep_health_and_lifestyle_dataset2.csv')
data = pd.read_csv(csv_file)

data.columns = data.columns.str.strip()

print(data.columns)

data['Stress_Level'].fillna('Low')

numerical_columns = ['Technology_Usage_Hours', 'Social_Media_Usage_Hours', 'Gaming_Hours',
                     'Screen_Time_Hours', 'Sleep_Hours', 'Physical_Activity_Hours']

imputer = SimpleImputer(strategy='mean')
data[numerical_columns] = imputer.fit_transform(data[numerical_columns])



data['Optimal_Bedtime_Duration'] = np.where(data['Age'] <= 30, 8, 7)


data = pd.get_dummies(data, drop_first=True)

columns_to_drop = ['User_ID', 'Mental_Health_Status']
existing_columns = [col for col in columns_to_drop if col in data.columns]

X = data.drop(columns=existing_columns)
y = data['Optimal_Bedtime_Duration']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = XGBRegressor(
    objective='reg:squarederror',
    random_state=42,
    n_estimators=500,
    learning_rate=0.01,
    max_depth=5,
    alpha=0.1,
    reg_lambda=0.1,
    subsample=0.8,
    colsample_bytree=0.8
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')


: 

# Predict Optimal Bedtime Pattern

In [2]:
user_input = {
    'Age': 17,
    'Gender_Male': 1,
    'Gender_Female': 0,
    'Work_Environment_Impact_Neutral': 1,
    'Work_Environment_Impact_Positive': 0,
    'Physical_Activity_Hours': 6.35,
    'Stress_Level': 6,
    'BMI_Category_Overweight': 1,
    'BMI_Category_Underweight': 0,
    'Technology_Usage_Hours': 2,
    'Social_Media_Usage_Hours': 1.5,
    'Gaming_Hours': 0,
    'Screen_Time_Hours': 0,
    'Sleep_Hours': 0
}

user_input_df = pd.DataFrame([user_input])

missing_columns = [col for col in X.columns if col not in user_input_df.columns]

if missing_columns:
    missing_df = pd.DataFrame(0, index=user_input_df.index, columns=missing_columns)
    user_input_df = pd.concat([user_input_df, missing_df], axis=1)

user_input_df = user_input_df[X.columns]

predicted_bedtime_duration = model.predict(user_input_df)

print(f'Predicted Optimal Bedtime Duration: {predicted_bedtime_duration[0]} hours')


Predicted Optimal Bedtime Duration: 7.807015419006348 hours
