# Heart Failure Pipeline بدون كلاس مخصص
النوت بوك ده بينشئ بايبلاين تحتوي على تنظيف القيم المتطرفة، Scaling، SMOTE، وتدريب موديل، بدون الحاجة لتعريف كلاس مخصص.

In [1]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib


In [2]:

def handle_outliers(X_df):
    X = X_df.copy()
    # creatinine_phosphokinase
    Q1 = X['creatinine_phosphokinase'].quantile(0.25)
    Q3 = X['creatinine_phosphokinase'].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    median_val = X['creatinine_phosphokinase'].median()
    X['creatinine_phosphokinase'] = X['creatinine_phosphokinase'].apply(
        lambda x: median_val if x < lower_bound or x > upper_bound else x
    )

    # ejection_fraction
    q01, q99 = X['ejection_fraction'].quantile([0.01, 0.99])
    X['ejection_fraction'] = X['ejection_fraction'].clip(lower=q01, upper=q99)

    # platelets
    q01, q99 = X['platelets'].quantile([0.01, 0.99])
    X['platelets'] = X['platelets'].clip(lower=q01, upper=q99)

    # serum_creatinine
    q01, q99 = X['serum_creatinine'].quantile([0.01, 0.99])
    X['serum_creatinine'] = X['serum_creatinine'].clip(lower=q01, upper=q99)

    return X


In [3]:

df = pd.read_csv('heart_failure_clinical_records_dataset.csv')
X = df.drop('DEATH_EVENT', axis=1)
y = df['DEATH_EVENT']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)


In [4]:

pipeline = Pipeline(steps=[
    ('outlier_handler', FunctionTransformer(handle_outliers)),
    ('smote', SMOTE(random_state=42)),
    ('scaler', StandardScaler()),
    ('model', RandomForestClassifier(
        n_estimators=300, max_depth=15, min_samples_split=2,
        min_samples_leaf=1, random_state=42, class_weight='balanced'
    ))
])

In [5]:

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.8666666666666667
              precision    recall  f1-score   support

           0       0.92      0.89      0.90        61
           1       0.77      0.83      0.80        29

    accuracy                           0.87        90
   macro avg       0.84      0.86      0.85        90
weighted avg       0.87      0.87      0.87        90



In [6]:

import joblib
joblib.dump(pipeline, "pipeline_with_smote.pkl")


['pipeline_with_smote.pkl']