In [23]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df_sim = pd.read_csv('...data/processed/sim_cleaned_data.csv')
df_mit = pd.read_csv('...data/processed/mit_cleaned_data.csv')

print("Both clean datasets are loaded and ready for audit!")

# --- Fixes Scale Bias---
#This converts the 1-9 simulated scale to 1-5 scale to match MIT's Likert range
#Maps the current 1-9 scale to 1-5 here.
sim_sleep_conversion = {1: 4, 2: 5, 3: 7, 4: 8, 5: 9}
df_sim['sleep_hours'] = df_sim['sleep_hours'].map(sim_sleep_conversion)

print(f"Test: Sleep hours now range from {df_sim['sleep_hours'].min()} to {df_sim['sleep_hours'].max()}")

#Formula: ((Score - Min_Old / (Max_old - Min_Old)) * (Max_New-Min_New) + Min_New
#This converts the stress_self__report (calculation example for 1-10 scale to 1-5)
df_sim['stress_self_report'] = ((df_sim['stress_self_report'] - 1) / (10-1)) * (5-1) + 1

print("Simulation scales normalized to 1-5 to match MIT Benchmark")

#--- FIXED AUDIT BLOCK ---

#1. map the MIT Likert HR (1-5) to realistic BPM values (60-100)
#2. this conversion makes the data comparable
hr_map = {1: 65, 2: 70, 3: 80, 4: 90, 5: 95}
df_mit['heart_rate_normalized'] = df_mit ['heart_rate'].map(hr_map).fillna(75)

comparison_df = pd.DataFrame({
    'Metric': [
        'Avg Sleep Hours', 'Avg Heart Rate', 'Avg Stress Score'],
    'Simulated': [
        df_sim['sleep_hours'].mean(),
        df_sim['heart_rate'].mean(),
        df_sim['stress_self_report'].mean()
    ],
    'MIT Benchmark': [
        df_mit['sleep_hours'].mean(),
        df_mit['heart_rate_normalized'].mean(),
        df_mit['stress_self_report'].mean()
    ]
})
print("---CORRECTED STATISTICAL AUDIT---")
print (comparison_df.round(2))

Both clean datasets are loaded and ready for audit!
Test: Sleep hours now range from nan to nan
Simulation scales normalized to 1-5 to match MIT Benchmark
---CORRECTED STATISTICAL AUDIT---
             Metric  Simulated  MIT Benchmark
0   Avg Sleep Hours        NaN           6.36
1    Avg Heart Rate      70.75          77.89
2  Avg Stress Score       2.91           2.64


In [None]:
#--- FIXED AUDIT BLOCK ---

#1. map the MIT Likert HR (1-5) to realistic BPM values (60-100)
#2. this conversion makes the data comparable
hr_map = {1: 65, 2: 70, 3: 80, 4: 90, 5: 95}
df_mit['heart_rate_normalized'] = df_mit ['heart_rate'].map(hr_map).fillna(75)













