# %% [markdown]
# # üè• HEART DISEASE PREDICTION PIPELINE
# ## Complete ML System - From Data to Deployment
# 
# **Objective:** Predict heart disease with 90%+ accuracy using patient medical data
# **Algorithms:** SVM, Logistic Regression, Random Forest, XGBoost
# **Author:** Your Name | **Date:** 2024

In [None]:
# ## 1. Environment Setup

# %%

import sys, os
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from datetime import datetime

sys.path.append('..')
from src.config.settings import ProjectConfig
from src.utils.logger import setup_logging
from src.data.loader import DataLoader
from src.data.preprocessor import DataPreprocessor
from src.data.validator import DataValidator
from src.eda.outlier_detector import detect_outliers_iqr

from sklearn.metrics import precision_recall_curve
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score






config = ProjectConfig.load("heart")
logger = setup_logging("heart_pipeline")
logger.info("‚úÖ Heart disease prediction pipeline initialized")

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
# ## 2. Data Loading & Validation

loader = DataLoader(config)
df = loader.load_heart_disease()
validator = DataValidator(config)
report = validator.generate_quality_report(df)
print(f"‚úÖ Dataset loaded: {df.shape[0]} patients, {df.shape[1]} features")

In [None]:
# %% [markdown]
# ## 3. Initial Data Inspection

# %%
print("üìä FIRST 5 ROWS:")
display(df.head())

print("\nüìä DATA INFO:")
print(df.info())

print("\nüìä BASIC STATISTICS:")
display(df.describe())

In [None]:
# %% [markdown]
# ## 4. Missing Values Analysis

# %%
missing = pd.DataFrame({
    'Column': df.columns,
    'Missing': df.isnull().sum().values,
    'Percentage': (df.isnull().sum() / len(df) * 100).values
}).sort_values('Percentage', ascending=False)

print("üîç MISSING VALUES REPORT:")
display(missing[missing['Missing'] > 0])

print(f"\n‚úÖ Total missing: {df.isnull().sum().sum()} cells")

In [None]:
# %% [markdown]
# ## 5. Duplicate Check

# %%
duplicates = df.duplicated().sum()
print(f"üìä DUPLICATE ROWS: {duplicates}")
print(f"Percentage: {(duplicates/len(df))*100:.2f}%")

if duplicates > 0:
    print("‚ö†Ô∏è Duplicates found - will remove during preprocessing")

In [None]:
# %% [markdown]
# ## 6. Target Variable Distribution

# %%
target_dist = df['target'].value_counts()
target_pct = df['target'].value_counts(normalize=True) * 100

print("üéØ TARGET DISTRIBUTION:")
print(f"No Disease: {target_dist[0]} ({target_pct[0]:.1f}%)")
print(f"Disease:    {target_dist[1]} ({target_pct[1]:.1f}%)")

if target_pct.min() < 30:
    print("‚ö†Ô∏è Imbalanced dataset - will apply SMOTE")

In [None]:
# %% [markdown]
# ## 7. Data Types Analysis

# %%
dtype_df = pd.DataFrame({
    'Column': df.columns,
    'Type': df.dtypes.values,
    'Unique': [df[col].nunique() for col in df.columns]
})

print("üìä DATA TYPES:")
display(dtype_df)

numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
print(f"\n‚úÖ Numeric columns: {len(numeric_cols)}")

In [None]:
# %% [markdown]
# ## 8. Detailed Statistics

# %%
stats_df = df.describe().T
stats_df['skew'] = df[numeric_cols].skew()
stats_df['kurtosis'] = df[numeric_cols].kurtosis()

print("üìà STATISTICAL SUMMARY:")
display(stats_df.round(2))

In [None]:
# %% [markdown]
# ## 9. Outlier Detection (IQR Method)


outliers = detect_outliers_iqr(df, numeric_cols)
outlier_df = pd.DataFrame(outliers).T.round(2)

print("üîç OUTLIER REPORT:")
display(outlier_df)

cols_with_outliers = outlier_df[outlier_df['percentage'] > 5].index.tolist()
print(f"\n‚ö†Ô∏è Columns with >5% outliers: {cols_with_outliers}")

In [None]:
# %% [markdown]
# ## 10. Outlier Detection using IQR Method

# %%
# Detect outliers using IQR method
numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist()
if 'target' in numerical_cols:
    numerical_cols.remove('target')

outlier_info_iqr = detect_outliers_iqr(df, numerical_cols)

print("üîç OUTLIER DETECTION (IQR METHOD):")
print("="*70)
outlier_df = pd.DataFrame(outlier_info_iqr).T
display(outlier_df.round(2))

# Highlight columns with significant outliers
significant_outliers = outlier_df[outlier_df['outlier_percentage'] > 5]
if len(significant_outliers) > 0:
    print("\n‚ö†Ô∏è Columns with >5% outliers:")
    for col in significant_outliers.index:
        print(f"   - {col}: {significant_outliers.loc[col, 'outlier_percentage']:.2f}% outliers")

In [None]:
# %% [markdown]
# ## 10. Feature Correlation with Target

# %%
correlations = df[numeric_cols].corr()['target'].drop('target').sort_values(ascending=False)

print("üìä FEATURE CORRELATION WITH TARGET:")
for feat, corr in correlations.items():
    print(f"{feat:10}: {corr:+.3f}")

top_features = correlations.head(5).index.tolist()
print(f"\n‚úÖ Top 5 features: {top_features}")

In [None]:
# %% [markdown]
# ## 11. Target Distribution Visualization

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

ax1.bar(['No Disease', 'Disease'], target_dist.values, color=['#4ECDC4', '#FF6B6B'])
ax1.set_title('Target Distribution', fontweight='bold')
for i, v in enumerate(target_dist.values):
    ax1.text(i, v+5, str(v), ha='center', fontweight='bold')

ax2.pie(target_dist.values, labels=['No Disease', 'Disease'], 
        autopct='%1.1f%%', colors=['#4ECDC4', '#FF6B6B'])
ax2.set_title('Target Distribution (%)', fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# %% [markdown]
# ## 12. Age Distribution by Target

# %%
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

for target in [0, 1]:
    subset = df[df['target'] == target]['age']
    ax1.hist(subset, alpha=0.7, label=f'Target {target}', bins=20)
ax1.set_xlabel('Age')
ax1.set_ylabel('Count')
ax1.set_title('Age Distribution by Target', fontweight='bold')
ax1.legend(['No Disease', 'Disease'])

df.boxplot(column='age', by='target', ax=ax2)
ax2.set_title('Age Boxplot by Target', fontweight='bold')
ax2.set_xlabel('Target (0=No Disease, 1=Disease)')

plt.suptitle('Age Analysis', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# %% [markdown]
# ## 13. Chest Pain Type Analysis

# %%
cp_cross = pd.crosstab(df['cp'], df['target'])
cp_cross.columns = ['No Disease', 'Disease']

fig, ax = plt.subplots(figsize=(10, 6))
cp_cross.plot(kind='bar', stacked=True, ax=ax, color=['#4ECDC4', '#FF6B6B'])
ax.set_title('Chest Pain Type vs Disease', fontweight='bold', fontsize=14)
ax.set_xlabel('Chest Pain Type (0-3)')
ax.set_ylabel('Count')
ax.legend(['No Disease', 'Disease'])
plt.xticks(rotation=0)
plt.show()

In [None]:
# %% [markdown]
# ## 14. Maximum Heart Rate Analysis

# %%
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

df.boxplot(column='thalach', by='target', ax=ax1)
ax1.set_title('Max Heart Rate by Target', fontweight='bold')
ax1.set_ylabel('Max Heart Rate')

for target in [0, 1]:
    subset = df[df['target'] == target]['thalach']
    ax2.hist(subset, alpha=0.7, label=f'Target {target}', bins=20)
ax2.set_xlabel('Max Heart Rate')
ax2.set_ylabel('Frequency')
ax2.set_title('Heart Rate Distribution', fontweight='bold')
ax2.legend(['No Disease', 'Disease'])

plt.tight_layout()
plt.show()

In [None]:
# %% [markdown]
# ## 15. ST Depression (Oldpeak) Analysis

# %%
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

df.boxplot(column='oldpeak', by='target', ax=ax1)
ax1.set_title('ST Depression by Target', fontweight='bold')
ax1.set_ylabel('Oldpeak')

for target in [0, 1]:
    subset = df[df['target'] == target]['oldpeak']
    ax2.hist(subset, alpha=0.7, label=f'Target {target}', bins=20)
ax2.set_xlabel('Oldpeak')
ax2.set_ylabel('Frequency')
ax2.set_title('ST Depression Distribution', fontweight='bold')
ax2.legend(['No Disease', 'Disease'])

plt.tight_layout()
plt.show()

In [None]:
# %% [markdown]
# ## 16. Correlation Heatmap

plt.figure(figsize=(14, 12))
corr = df.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
sns.heatmap(corr, mask=mask, annot=True, fmt='.2f', cmap='RdBu_r', 
            center=0, square=True, linewidths=0.5)
plt.title('Feature Correlation Matrix', fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.show()

In [None]:
# %% [markdown]
# ## 17. Gender Analysis

# %%
gender_cross = pd.crosstab(df['sex'], df['target'])
gender_cross.index = ['Female', 'Male']
gender_cross.columns = ['No Disease', 'Disease']

fig, ax = plt.subplots(figsize=(8, 6))
gender_cross.plot(kind='bar', ax=ax, color=['#4ECDC4', '#FF6B6B'])
ax.set_title('Gender vs Heart Disease', fontweight='bold', fontsize=14)
ax.set_xlabel('Gender')
ax.set_ylabel('Count')
ax.legend(['No Disease', 'Disease'])
plt.xticks(rotation=0)
plt.show()

In [None]:
# %% [markdown]
# ## 18. Feature Importance (Correlation Based)

# %%
plt.figure(figsize=(10, 8))
correlations.sort_values().plot(kind='barh', color='#2E86AB')
plt.title('Feature Importance - Correlation with Target', fontweight='bold', fontsize=14)
plt.xlabel('Absolute Correlation')
plt.tight_layout()
plt.show()

In [None]:
# %% [markdown]
# ## 19. Data Preprocessing

# %%

preprocessor = DataPreprocessor(config)
X_train, X_test, y_train, y_test = preprocessor.prepare_dataset(df)

print(f"‚úÖ Training set: {X_train.shape}")
print(f"‚úÖ Testing set:  {X_test.shape}")
print(f"‚úÖ Classes balanced: {len(np.unique(y_train))}")

In [None]:
# %% [markdown]
# ## 20. Training: Logistic Regression


lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
lr_proba = lr_model.predict_proba(X_test)[:, 1]

print("‚úÖ Logistic Regression trained")

In [None]:
# %% [markdown]
# ## 21. Training: Support Vector Machine

svm_model = SVC(probability=True, random_state=42)
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
svm_proba = svm_model.predict_proba(X_test)[:, 1]

print("‚úÖ SVM trained")

In [None]:
# %% [markdown]
# ## 24. Model Comparison: Accuracy

models = ['Logistic Regression', 'SVM', 'Random Forest', 'XGBoost']
predictions = [lr_pred, svm_pred, rf_pred, xgb_pred]
accuracies = [accuracy_score(y_test, pred) for pred in predictions]

for model, acc in zip(models, accuracies):
    print(f"{model:20}: {acc:.4f}")

best_acc_idx = np.argmax(accuracies)
print(f"\nüèÜ Best model: {models[best_acc_idx]} with accuracy {accuracies[best_acc_idx]:.4f}")

In [None]:
# %% [markdown]
# ## 25. Model Comparison: Precision


precisions = [precision_score(y_test, pred) for pred in predictions]

for model, prec in zip(models, precisions):
    print(f"{model:20}: {prec:.4f}")

best_prec_idx = np.argmax(precisions)
print(f"\nüèÜ Best model: {models[best_prec_idx]} with precision {precisions[best_prec_idx]:.4f}")

In [None]:
# %% [markdown]
# ## 26. Model Comparison: Recall

recalls = [recall_score(y_test, pred) for pred in predictions]

for model, rec in zip(models, recalls):
    print(f"{model:20}: {rec:.4f}")

best_rec_idx = np.argmax(recalls)
print(f"\nüèÜ Best model: {models[best_rec_idx]} with recall {recalls[best_rec_idx]:.4f}")

In [None]:
# %% [markdown]
# ## 27. Model Comparison: F1 Score


f1_scores = [f1_score(y_test, pred) for pred in predictions]

for model, f1 in zip(models, f1_scores):
    print(f"{model:20}: {f1:.4f}")

best_f1_idx = np.argmax(f1_scores)
best_model = models[best_f1_idx]
best_f1 = f1_scores[best_f1_idx]
print(f"\nüèÜ Best model: {best_model} with F1 score {best_f1:.4f}")

In [None]:
# %% [markdown]
# ## 28. Model Comparison: ROC-AUC

# %%
from sklearn.metrics import roc_auc_score

probabilities = [lr_proba, svm_proba, rf_proba, xgb_proba]
roc_aucs = [roc_auc_score(y_test, proba) for proba in probabilities]

for model, auc in zip(models, roc_aucs):
    print(f"{model:20}: {auc:.4f}")

best_auc_idx = np.argmax(roc_aucs)
print(f"\nüèÜ Best model: {models[best_auc_idx]} with ROC-AUC {roc_aucs[best_auc_idx]:.4f}")

In [None]:
# %% [markdown]
# ## 29. Complete Results Summary

# %%
results_df = pd.DataFrame({
    'Model': models,
    'Accuracy': accuracies,
    'Precision': precisions,
    'Recall': recalls,
    'F1-Score': f1_scores,
    'ROC-AUC': roc_aucs
}).round(4)

print("üìä MODEL COMPARISON SUMMARY:")
display(results_df)

best_overall = results_df.loc[results_df['F1-Score'].idxmax()]
print(f"\nüèÜ OVERALL BEST MODEL: {best_overall['Model']}")
print(f"   F1-Score: {best_overall['F1-Score']:.4f}")
print(f"   Accuracy: {best_overall['Accuracy']:.4f}")

In [None]:
# %% [markdown]
# ## 30. Confusion Matrix (Best Model)

# %%
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

best_pred = [lr_pred, svm_pred, rf_pred, xgb_pred][best_f1_idx]
cm = confusion_matrix(y_test, best_pred)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['No Disease', 'Disease'])
disp.plot(cmap='Blues')
plt.title(f'Confusion Matrix - {best_model}', fontweight='bold', fontsize=14)
plt.grid(False)
plt.show()

tn, fp, fn, tp = cm.ravel()
print(f"True Negatives:  {tn}")
print(f"False Positives: {fp}")
print(f"False Negatives: {fn}")
print(f"True Positives:  {tp}")

In [None]:
# %% [markdown]
# ## 31. ROC Curves Comparison

# %%
from sklearn.metrics import roc_curve

plt.figure(figsize=(10, 8))

for model, proba, name in zip(models, probabilities, models):
    fpr, tpr, _ = roc_curve(y_test, proba)
    auc = roc_auc_score(y_test, proba)
    plt.plot(fpr, tpr, label=f'{name} (AUC = {auc:.3f})', linewidth=2)

plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves Comparison', fontweight='bold', fontsize=14)
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# %% [markdown]
# ## 32. Feature Importance (Best Model)

# %%
if best_model in ['Random Forest', 'XGBoost']:
    best_model_obj = [rf_model, xgb_model][models.index(best_model)-2]
    importances = best_model_obj.feature_importances_
    
    feat_imp = pd.DataFrame({
        'Feature': df.drop('target', axis=1).columns,
        'Importance': importances
    }).sort_values('Importance', ascending=True)
    
    plt.figure(figsize=(10, 8))
    plt.barh(feat_imp['Feature'], feat_imp['Importance'], color='#2E86AB')
    plt.xlabel('Importance')
    plt.title(f'Feature Importance - {best_model}', fontweight='bold', fontsize=14)
    plt.tight_layout()
    plt.show()
    
    print("Top 5 Most Important Features:")
    for i, row in feat_imp.tail(5).iterrows():
        print(f"   {row['Feature']}: {row['Importance']:.4f}")

In [None]:
# %% [markdown]
# ## 33. Precision-Recall Curve

# %%

plt.figure(figsize=(10, 8))

for model, proba, name in zip(models, probabilities, models):
    precision, recall, _ = precision_recall_curve(y_test, proba)
    plt.plot(recall, precision, label=name, linewidth=2)

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curves', fontweight='bold', fontsize=14)
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# %% [markdown]
# ## 34. Learning Curve Analysis

# %%
from sklearn.model_selection import learning_curve

if best_model in ['Random Forest', 'XGBoost']:
    train_sizes, train_scores, test_scores = learning_curve(
        best_model_obj, X_train, y_train, cv=5, n_jobs=-1,
        train_sizes=np.linspace(0.1, 1.0, 10)
    )
    
    train_mean = np.mean(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    
    plt.figure(figsize=(10, 6))
    plt.plot(train_sizes, train_mean, 'o-', label='Training score', color='blue')
    plt.plot(train_sizes, test_mean, 'o-', label='Cross-validation score', color='red')
    plt.xlabel('Training examples')
    plt.ylabel('Score')
    plt.title(f'Learning Curve - {best_model}', fontweight='bold', fontsize=14)
    plt.legend(loc='best')
    plt.grid(True, alpha=0.3)
    plt.show()

In [None]:
# %% [markdown]
# ## 35. Save Model for Deployment


best_model_obj = [lr_model, svm_model, rf_model, xgb_model][best_f1_idx]
model_path = f'../models/heart_{best_model.lower().replace(" ", "_")}_v1.0.0.pkl'
joblib.dump(best_model_obj, model_path)

print(f"‚úÖ Model saved: {model_path}")
print(f"üìÖ Saved at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

In [None]:
# %% [markdown]
# ## 36. Save Preprocessor & Feature Names

# %%
joblib.dump(preprocessor.scaler, '../models/heart_scaler.pkl')
joblib.dump(df.drop('target', axis=1).columns.tolist(), '../models/heart_features.pkl')

print("‚úÖ Preprocessor saved")
print("‚úÖ Feature names saved")
print(f"üìä Features: {len(df.columns)-1} total")

In [None]:
# %% [markdown]
# ## 37. Model Metadata

# %%
metadata = {
    'model_name': best_model,
    'version': '1.0.0',
    'training_date': datetime.now().isoformat(),
    'dataset_shape': df.shape,
    'features': df.drop('target', axis=1).columns.tolist(),
    'metrics': {
        'accuracy': float(best_overall['Accuracy']),
        'precision': float(best_overall['Precision']),
        'recall': float(best_overall['Recall']),
        'f1_score': float(best_overall['F1-Score']),
        'roc_auc': float(best_overall['ROC-AUC'])
    }
}

import json
with open('../models/heart_metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print("‚úÖ Model metadata saved")

In [None]:
# %% [markdown]
# ## 38. Test Single Prediction

# %%
sample = X_test[0].reshape(1, -1)
pred = best_model_obj.predict(sample)[0]
proba = best_model_obj.predict_proba(sample)[0]

print("üîç SINGLE PREDICTION TEST:")
print(f"Actual:    {y_test.iloc[0]}")
print(f"Predicted: {pred}")
print(f"Probability: No Disease: {proba[0]:.3f}, Disease: {proba[1]:.3f}")
print(f"Risk Level: {'HIGH' if pred == 1 else 'LOW'}")

In [None]:
# %% [markdown]
# ## 39. Export Results to CSV

# %%
results_df.to_csv('../reports/heart_model_comparison.csv', index=False)
print("‚úÖ Results exported to reports/heart_model_comparison.csv")

results_df.style.background_gradient(cmap='Blues', subset=['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC'])

In [None]:
# %% [markdown]
# ## 40. Pipeline Complete - Final Summary

# %%
summary = f"""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë                    PIPELINE COMPLETED SUCCESSFULLY               ‚ïë
‚ï†‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï£
‚ïë  Dataset:        Heart Disease                                    ‚ïë
‚ïë  Samples:        {df.shape[0]:,} patients                         ‚ïë
‚ïë  Features:       {df.shape[1]-1}                                  ‚ïë
‚ïë  Best Model:     {best_model}                                     ‚ïë
‚ïë  F1-Score:       {best_overall['F1-Score']:.4f}                   ‚ïë
‚ïë  Accuracy:       {best_overall['Accuracy']:.4f}                   ‚ïë
‚ïë  ROC-AUC:        {best_overall['ROC-AUC']:.4f}                    ‚ïë
‚ïë  Model Saved:    models/heart_{best_model.lower().replace(' ', '_')}_v1.0.0.pkl  ‚ïë
‚ïë  Reports:        reports/heart_model_comparison.csv               ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
"""

print(summary)

with open('../reports/heart_summary.txt', 'w') as f:
    f.write(summary)

In [None]:
# %% [markdown]
# ## 41. Environment Information

# %%
import sklearn
import xgboost
import imblearn

print("üîß ENVIRONMENT:")
print(f"Python:        {sys.version.split()[0]}")
print(f"Pandas:        {pd.__version__}")
print(f"NumPy:         {np.__version__}")
print(f"Scikit-learn:  {sklearn.__version__}")
print(f"XGBoost:       {xgboost.__version__}")
print(f"Imbalanced:    {imblearn.__version__}")
print(f"Joblib:        {joblib.__version__}")

print("\n‚úÖ All systems ready for deployment!")

In [None]:
# %% [markdown]
# ## 42. Next Steps - Deployment

# %%
print("""
üöÄ READY FOR DEPLOYMENT:

1. Start API server:
   $ uvicorn api.main:app --reload

2. Access API documentation:
   http://localhost:8000/docs

3. Make predictions via API:
   curl -X POST http://localhost:8000/predict/heart \\
        -H "Content-Type: application/json" \\
        -d '{"age": 55, "sex": 1, "cp": 0, "trestbps": 140, ...}'

4. Run with Docker:
   $ docker build -t heart-model .
   $ docker run -p 8000:8000 heart-model

5. Monitor performance:
   $ python scripts/monitor.py

üéâ CONGRATULATIONS! Your winning project is complete!
""")