In [9]:
# 1. Save the trained model using joblib (.pkl format)
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import os

# Create models directory
os.makedirs('../models', exist_ok=True)

# Load preprocessed data
df = pd.read_csv('../data/heart_disease_preprocessed.csv')
X = df.drop('num', axis=1)
y = (df['num'] > 0).astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train best model (from hyperparameter tuning results)
model = LogisticRegression(C=1, penalty='l2', solver='liblinear', random_state=42)
model.fit(X_train, y_train)

print(f"Model accuracy: {model.score(X_test, y_test):.3f}")

# Save as model.pkl
joblib.dump(model, '../models/model.pkl')
print("Model saved as: ../models/model.pkl")

Model accuracy: 0.867
Model saved as: ../models/model.pkl


In [10]:
# 2. Ensure reproducibility by saving model pipeline (preprocessing + model)
# Load and verify the saved model
loaded_model = joblib.load('../models/model.pkl')

# Test the loaded model
accuracy = loaded_model.score(X_test, y_test)
print(f"Loaded model accuracy: {accuracy:.3f}")

# Quick prediction test
sample_prediction = loaded_model.predict(X_test[:1])
sample_probability = loaded_model.predict_proba(X_test[:1])

print(f"Sample prediction: {'Disease' if sample_prediction[0] == 1 else 'No Disease'}")
print(f"Confidence: {sample_probability[0].max():.3f}")
print("\nModel successfully exported and verified!")

Loaded model accuracy: 0.867
Sample prediction: No Disease
Confidence: 0.940

Model successfully exported and verified!
