In [1]:
import sys
import os
import pandas as pd

# Add src to sys.path to import modeling.py
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))
from modeling import train_and_save_models

# Set project root and load data
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
df = pd.read_csv(os.path.join(project_root, 'data', 'insurance.csv'))
df['BMI'] = df['Weight'] / ((df['Height']/100)**2)

# Train and save models, get results
results = train_and_save_models(df)

print("Model training and saving complete. Results:")
for model, metrics in results.items():
    print(f"{model}: RMSE={metrics['rmse']:.2f}, R2={metrics['r2']:.3f}")

# Optional: Check that the .pkl files exist
print("\nSaved model files:")
for model in results.keys():
    model_path = os.path.join(project_root, "models", f"{model}_model.pkl")
    print(model_path, "exists:", os.path.exists(model_path))



random_forest saved to models/random_forest_model.pkl | RMSE: 2135.17 | R2: 0.893
gradient_boosting saved to models/gradient_boosting_model.pkl | RMSE: 2382.75 | R2: 0.867
linear_regression saved to models/linear_regression_model.pkl | RMSE: 3494.41 | R2: 0.714
Model training and saving complete. Results:
random_forest: RMSE=2135.17, R2=0.893
gradient_boosting: RMSE=2382.75, R2=0.867
linear_regression: RMSE=3494.41, R2=0.714

Saved model files:
c:\Users\tanus\Documents\Scalar Study Materials\Projects\insurance-cost-prediction\models\random_forest_model.pkl exists: True
c:\Users\tanus\Documents\Scalar Study Materials\Projects\insurance-cost-prediction\models\gradient_boosting_model.pkl exists: True
c:\Users\tanus\Documents\Scalar Study Materials\Projects\insurance-cost-prediction\models\linear_regression_model.pkl exists: True


