In [None]:
# Imports
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

In [None]:
# Data
data = pd.read_csv("../data/heart.csv")
data.head()

In [None]:
# Data check
data.shape

In [None]:
# Target balance
data["target"].value_counts()

In [None]:
# Dataset info
data.info()

In [None]:
# Define features and target
X = data.drop("target", axis=1)
y = data["target"]

X.shape, y.shape

In [None]:
# Train and test split
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    test_size=0.2, 
    random_state=42,
    stratify=y
)

X_train.shape, X_test.shape

In [None]:
# Train model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [None]:
# Evaluate model
y_pred = model.predict(X_test)

print("Test accuracy:", model.score(X_test, y_test))
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

ConfusionMatrixDisplay(confusion_matrix=cm).plot()

In [None]:
# Feature importance
fi = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
fi

In [None]:
# Compare n_estimators
model_one = RandomForestClassifier(n_estimators=1, random_state=42)
model_one.fit(X_train, y_train)

model_small = RandomForestClassifier(n_estimators=10, random_state=42)
model_small.fit(X_train, y_train)

model_default = RandomForestClassifier(n_estimators=100, random_state=42)
model_default.fit(X_train, y_train)

print("n_estimators=1 accuracy:", model_one.score(X_test, y_test))
print("n_estimators=10 accuracy:", model_small.score(X_test, y_test))
print("n_estimators=100 accuracy:", model_default.score(X_test, y_test))