# **1. Import Library**


In [33]:
import pandas as pd
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# **2. Load Dataset**

In [34]:
df = pd.read_csv("heart_preprocessed.csv")

X = df.drop("target", axis=1)
y = df["target"]

# **3. Train Test Split**

In [35]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# **4. Hyperparameter Tuning**

In [36]:
param_grid = {
    "n_estimators": [50, 100],
    "max_depth": [None, 10, 20],
    "min_samples_split": [2, 5]
}

rf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(
    rf,
    param_grid,
    cv=3,
    scoring="accuracy",
    n_jobs=-1
)

# **5. MLflow Manual Logging**

In [42]:
import mlflow
import mlflow.sklearn
import matplotlib.pyplot as plt
import pandas as pd
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay


mlflow.end_run()

os.environ["MLFLOW_TRACKING_USERNAME"] = "maiamaiaa"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "869502a5c2a91e45f72472247ce181db03e173f5"

mlflow.set_tracking_uri("https://dagshub.com/maiamaiaa/Eksperimen_SML_EugeniaGraselaMaia.mlflow")
mlflow.set_experiment("Heart Disease Tuning")

grid_search.fit(X_train, y_train)
model = grid_search.best_estimator_
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.savefig("confusion_matrix.png")
plt.close()

feat_importances = pd.Series(model.feature_importances_, index=X.columns)
plt.figure(figsize=(10,6))
feat_importances.nlargest(10).plot(kind='barh')
plt.title("Feature Importance")
plt.savefig("feature_importance.png")
plt.close()

with mlflow.start_run(run_name="RF_Manual_Logging"):
    mlflow.log_param("model", "RandomForest")
    mlflow.log_param("n_estimators", grid_search.best_params_.get("n_estimators", 100))
    mlflow.log_param("max_depth", grid_search.best_params_.get("max_depth"))
    mlflow.log_param("min_samples_split", grid_search.best_params_.get("min_samples_split"))

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)

    mlflow.log_artifact("confusion_matrix.png")
    mlflow.log_artifact("feature_importance.png")

    mlflow.sklearn.log_model(model, "model")

üèÉ View run adventurous-mink-928 at: https://dagshub.com/maiamaiaa/Eksperimen_SML_EugeniaGraselaMaia.mlflow/#/experiments/0/runs/44da064f94b14857821319ee8f22a949
üß™ View experiment at: https://dagshub.com/maiamaiaa/Eksperimen_SML_EugeniaGraselaMaia.mlflow/#/experiments/0




üèÉ View run RF_Manual_Logging at: https://dagshub.com/maiamaiaa/Eksperimen_SML_EugeniaGraselaMaia.mlflow/#/experiments/0/runs/f73ec25509c04b4f839e5884eee81bdd
üß™ View experiment at: https://dagshub.com/maiamaiaa/Eksperimen_SML_EugeniaGraselaMaia.mlflow/#/experiments/0
