In [2]:
import pandas as pd
import os
import sys
import importlib

# 1. FORCE THE PATH
# This finds the 'Diabetic_Clinical_Remediation_Pipeline' folder regardless of where this notebook sits
current_path = os.getcwd()
root_name = 'Diabetic_Clinical_Remediation_Pipeline'

if root_name in current_path:
    # This chops off everything after the root folder name
    repo_root = current_path.split(root_name)[0] + root_name
else:
    repo_root = current_dir

# 2. Add root to path so DataAuditor and remediator can be found
if repo_root not in sys.path:
    sys.path.append(repo_root)

# 3. Safety Check: Print the paths to see why it might be failing
print(f"üìÇ Looking in Root: {repo_root}")
data_path = os.path.join(repo_root, 'data', 'diabetic_data.csv')
print(f"üìÑ Looking for Data: {data_path}")

# 4. Try Loading and Auditing
if os.path.exists(data_path):
    import DataAuditor
    import remediator
    importlib.reload(DataAuditor)
    importlib.reload(remediator)
    
    df = pd.read_csv(data_path)
    auditor = DataAuditor.DataAuditor()
    pipeline = remediator.DataRemediatorPipeline(df)
    
    baseline = auditor.calculate_dqi(df)
    clean_df = pipeline.run_full_remediation()
    final = auditor.calculate_dqi(clean_df)

    print(f"üìä Baseline DQI: {baseline}%")
    print(f"‚úÖ Final DQI: {final}%")
    print(f"üöÄ Improvement: {round(final - baseline, 2)}%")
else:
    print("‚ùå ERROR: Data file not found at the path above. Please check your sidebar!")

üìÇ Looking in Root: /home/15c39be2-1358-4133-aeca-6a5f4c251228/Diabetic_Clinical_Remediation_Pipeline
üìÑ Looking for Data: /home/15c39be2-1358-4133-aeca-6a5f4c251228/Diabetic_Clinical_Remediation_Pipeline/data/diabetic_data.csv
üìä Baseline DQI: 92.65%
‚úÖ Final DQI: 100.0%
üöÄ Improvement: 7.35%
