In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib 
import os   

# --- Phase 1: Data Simulation ---
print("--- Data Simulation ---")
n_samples = 10000

# Environmental Factors
avg_temperature = np.random.uniform(-5, 38, n_samples)
humidity = np.random.uniform(10, 95, n_samples)
noise_level = np.random.uniform(30, 90, n_samples)

# Individual Variability Factor
sensitivity = np.random.uniform(0.8, 1.2, n_samples)

optimal_temp = 20.0
optimal_humidity = 50.0
optimal_noise = 40.0

temp_deviation = avg_temperature - optimal_temp
abs_temp_deviation = np.abs(temp_deviation)

noise_deviation_above_optimal = np.maximum(0, noise_level - optimal_noise)
noise_impact_factor = 0.003 * noise_deviation_above_optimal**1.8

humidity_deviation = humidity - optimal_humidity
abs_humidity_deviation = np.abs(humidity_deviation)

temp_noise_interaction_impact = (abs_temp_deviation / 10) * (noise_deviation_above_optimal / 10) * 0.05

# --- Sleep Metrics ---
base_sleep = 8.0
temp_effect_dur = -0.02 * abs_temp_deviation**1.5
humidity_effect_dur = -0.005 * abs_humidity_deviation
noise_effect_dur = -noise_impact_factor * 10
interaction_effect_dur = -temp_noise_interaction_impact * 5
total_duration_effect = sensitivity * (temp_effect_dur + humidity_effect_dur + noise_effect_dur + interaction_effect_dur)
sleep_duration = base_sleep + total_duration_effect
sleep_duration = np.clip(sleep_duration, 2.5, 8.5)

base_latency = 15.0
temp_effect_lat = 0.3 * abs_temp_deviation**1.2
noise_effect_lat = 0.02 * noise_deviation_above_optimal**1.9
total_latency_increase = sensitivity * (temp_effect_lat + noise_effect_lat)
sleep_latency = base_latency + total_latency_increase
sleep_latency = np.clip(sleep_latency, 5, 80)

base_deep_sleep = 22.0
temp_effect_deep = -0.18 * abs_temp_deviation**1.1
noise_effect_deep = -0.015 * noise_deviation_above_optimal**1.7
humidity_effect_deep = -0.03 * np.maximum(0, humidity_deviation)
total_deep_reduction = sensitivity * (temp_effect_deep + noise_effect_deep + humidity_effect_deep)
deep_sleep_percentage = base_deep_sleep + total_deep_reduction
deep_sleep_percentage = np.clip(deep_sleep_percentage, 4, 25)

df_refined = pd.DataFrame({
    'Avg Temperature (°C)': avg_temperature,
    'Avg Humidity (%)': humidity,
    'Noise Level (dB)': noise_level,
    'Sensitivity Factor': sensitivity,
    'Sleep Duration (hrs)': sleep_duration,
    'Sleep Latency (mins)': sleep_latency,
    'Deep Sleep (%)': deep_sleep_percentage
})

csv_file_path_refined = 'simulated_sleep_data_refined.csv'
df_refined.to_csv(csv_file_path_refined, index=False)
print(f'Refined data saved to {csv_file_path_refined}')
# print("\nRefined Data Head:\n", df_refined.head())
# print("\nRefined Data Description:\n", df_refined.describe())
# print("\nGenerating Pairplot for Refined Data...")
# sns.pairplot(df_refined, vars=['Avg Temperature (°C)', 'Avg Humidity (%)', 'Noise Level (dB)',
#                               'Sleep Duration (hrs)', 'Sleep Latency (mins)', 'Deep Sleep (%)'])
# plt.suptitle('Refined Simulated Sleep Data Relationships', y=1.02)
# plt.show()
print("--- Refined Data Simulation Complete ---")


# --- Phase 2: Model Training ---
print("\n--- Model Training ---")
features = ['Avg Temperature (°C)', 'Avg Humidity (%)', 'Noise Level (dB)', 'Sensitivity Factor']
targets = ['Sleep Duration (hrs)', 'Sleep Latency (mins)', 'Deep Sleep (%)']
models = {}
evaluation_results = {}
X_test_for_loading_example = None
target_for_loading_example = targets[0]

for target in targets:
    print(f"\n--- Training Model for: {target} ---")
    X = df_refined[features]
    y = df_refined[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    if target == target_for_loading_example:
        X_test_for_loading_example = X_test.copy()

    print(f"Training set size: {X_train.shape[0]}, Testing set size: {X_test.shape[0]}")
    model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1, max_depth=20, min_samples_leaf=5)
    print("Training model...")
    model.fit(X_train, y_train)
    models[target] = model
    print("Model training complete.")
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    evaluation_results[target] = {'MSE': mse, 'R2': r2}
    print(f"\n--- Model Evaluation for {target} ---")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"R-squared (R2) Score: {r2:.4f}")

print("\n--- All Model Training Complete ---")
print("\nSummary of Evaluation Results:")
for target, metrics in evaluation_results.items():
    print(f"{target}: MSE={metrics['MSE']:.4f}, R2={metrics['R2']:.4f}")


# --- Phase 3: Save Trained Models ---
print("\n--- Saving Trained Models ---")

model_dir = 'saved_sleep_models'
os.makedirs(model_dir, exist_ok=True)

for target, model in models.items():
    filename_safe_target = target.replace(' (%)', '_percent').replace(' (°C)', '_C').replace(' (hrs)', '_hrs').replace(' (mins)', '_mins').replace(' ', '_')
    model_filename = os.path.join(model_dir, f'sleep_model_{filename_safe_target}.joblib')

    try:
        joblib.dump(model, model_filename)
        print(f"Successfully saved model for '{target}' to '{model_filename}'")
    except Exception as e:
        print(f"Error saving model for '{target}': {e}")

print("--- Model Saving Complete ---")


# --- Phase 4: Load and Use Saved Model ---
print("\n--- Loading and Testing a Saved Model (Example) ---")

target_to_load = target_for_loading_example
print(f"Attempting to load model for: {target_to_load}")

# Recreate the filename used for saving
filename_safe_target_load = target_to_load.replace(' (%)', '_percent').replace(' (°C)', '_C').replace(' (hrs)', '_hrs').replace(' (mins)', '_mins').replace(' ', '_')
model_load_path = os.path.join(model_dir, f'sleep_model_{filename_safe_target_load}.joblib')

if os.path.exists(model_load_path):
    try:
        loaded_model = joblib.load(model_load_path)
        print(f"Successfully loaded model from '{model_load_path}'")

        if X_test_for_loading_example is not None:
            print("\nMaking predictions with the loaded model on sample test data:")
            # Select a few samples from the stored X_test
            sample_data_to_predict = X_test_for_loading_example.head(5)

            predictions = loaded_model.predict(sample_data_to_predict)

            print("Sample Input Data:")
            print(sample_data_to_predict)
            print("\nPredicted Output:")
            print(predictions)
        else:
            print("Sample test data not available for prediction.")

    except Exception as e:
        print(f"Error loading model from '{model_load_path}': {e}")
else:
    print(f"Model file not found at '{model_load_path}'. Cannot load.")

print("--- Loading Example Complete ---")

--- Data Simulation ---
Refined data saved to simulated_sleep_data_refined.csv
--- Refined Data Simulation Complete ---

--- Model Training ---

--- Training Model for: Sleep Duration (hrs) ---
Training set size: 8000, Testing set size: 2000
Training model...
Model training complete.

--- Model Evaluation for Sleep Duration (hrs) ---
Mean Squared Error (MSE): 0.0098
R-squared (R2) Score: 0.9975

--- Training Model for: Sleep Latency (mins) ---
Training set size: 8000, Testing set size: 2000
Training model...
Model training complete.

--- Model Evaluation for Sleep Latency (mins) ---
Mean Squared Error (MSE): 0.3326
R-squared (R2) Score: 0.9973

--- Training Model for: Deep Sleep (%) ---
Training set size: 8000, Testing set size: 2000
Training model...
Model training complete.

--- Model Evaluation for Deep Sleep (%) ---
Mean Squared Error (MSE): 0.1033
R-squared (R2) Score: 0.9936

--- All Model Training Complete ---

Summary of Evaluation Results:
Sleep Duration (hrs): MSE=0.0098, R2=