# **1. Import Library**


In [26]:
import pandas as pd
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# **2. Load Dataset**

In [27]:
df = pd.read_csv("heart_preprocessed.csv")

X = df.drop("target", axis=1)
y = df["target"]

# **3. Train Test Split**

In [28]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# **4. Hyperparameter Tuning**

In [29]:
param_grid = {
    "n_estimators": [50, 100],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5]
}

rf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(
    rf,
    param_grid,
    cv=3,
    scoring="accuracy",
    n_jobs=-1
)

# **5. MLflow Manual Logging**

In [31]:
import mlflow
import mlflow.sklearn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import os

os.environ["MLFLOW_TRACKING_USERNAME"] = "maiamaiaa"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "869502a5c2a91e45f72472247ce181db03e173f5"

mlflow.set_tracking_uri(
    "https://dagshub.com/maiamaiaa/Eksperimen_SML_EugeniaGraselaMaia.mlflow"
)

mlflow.set_experiment("Heart Disease Tuning")

# Train the model using grid_search
grid_search.fit(X_train, y_train)
model = grid_search.best_estimator_

y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

with mlflow.start_run(run_name="RF_Manual_Logging"):
    mlflow.log_param("model", "RandomForest")
    mlflow.log_param("n_estimators", grid_search.best_params_.get("n_estimators", 100))
    mlflow.log_param("max_depth", grid_search.best_params_.get("max_depth"))
    mlflow.log_param("min_samples_split", grid_search.best_params_.get("min_samples_split"))

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)

    mlflow.sklearn.log_model(model, "model")



üèÉ View run RF_Manual_Logging at: https://dagshub.com/maiamaiaa/Eksperimen_SML_EugeniaGraselaMaia.mlflow/#/experiments/0/runs/6ae5c99240704979aaea31ecd17a55b0
üß™ View experiment at: https://dagshub.com/maiamaiaa/Eksperimen_SML_EugeniaGraselaMaia.mlflow/#/experiments/0


# **6. Log Parameters**

In [None]:
mlflow.log_params(grid_search.best_params_)

# **7. Log Metrics**

In [None]:
mlflow.log_metric("accuracy", accuracy)

# **8. Save & Log Model**

In [None]:
mlflow.sklearn.log_model(
        sk_model=best_model,
        artifact_path="model"
    )



<mlflow.models.model.ModelInfo at 0x22fd2672cf0>

# **9. Confusion Matrix Artifact**

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")

plt.savefig("confusion_matrix.png")
mlflow.log_artifact("confusion_matrix.png")

plt.close()

print("Training selesai!")
print("Best Params:", grid_search.best_params_)
print("Accuracy:", accuracy)

Training selesai!
Best Params: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 50}
Accuracy: 0.9180327868852459
