# Heart Failure Prediction Model Training
This notebook trains a machine learning model to predict heart failure.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load the dataset
# Make sure to place your heart failure dataset CSV file in the same directory
df = pd.read_csv('heart_failure_clinical_records_dataset.csv')
print("Dataset shape:", df.shape)
print("\nDataset info:")
df.info()

In [None]:
# Explore the dataset
print("First 5 rows:")
print(df.head())
print("\nDataset description:")
print(df.describe())
print("\nTarget variable distribution:")
print(df['DEATH_EVENT'].value_counts())

In [None]:
# Data preprocessing
# Separate features and target
X = df.drop(['DEATH_EVENT'], axis=1)
y = df['DEATH_EVENT']

print("Features:", X.columns.tolist())
print("Target variable:", y.name)

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Training set size:", X_train.shape)
print("Testing set size:", X_test.shape)

In [None]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10)
rf_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = rf_model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

In [None]:
# Feature importance
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': rf_model.feature_importances_
})
feature_importance = feature_importance.sort_values('importance', ascending=False)
print("Feature Importance:")
print(feature_importance)

In [None]:
# Save the model and scaler
with open('heart_failure_model.pkl', 'wb') as f:
    pickle.dump({
        'model': rf_model,
        'scaler': scaler,
        'feature_names': X.columns.tolist()
    }, f)

print("Model saved successfully as 'heart_failure_model.pkl'")
print(f"Final Model Accuracy: {accuracy*100:.2f}%")