In [71]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

In [73]:
# Load dataset
dataset = pd.read_csv("./CKD.csv")

In [75]:
# Separate independent and dependent variables
independent = dataset.drop("classification", axis=1)
dependent = dataset["classification"]

In [77]:
# convert categorical data into binary format(one-hot encoding)
independent = pd.get_dummies(independent, drop_first=True)

In [79]:
# Dependent variable is categorical (e.g., 'yes'/'no'), encode it to numerical values
label_encoder = LabelEncoder()
dependent = label_encoder.fit_transform(dependent)

In [81]:
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size=1/3, random_state=0)

In [83]:
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(x_train, y_train)

y_predict = classifier.predict(x_test)

report = classification_report(y_test, y_predict)
print(report)

              precision    recall  f1-score   support

           0       0.98      0.98      0.98        51
           1       0.99      0.99      0.99        82

    accuracy                           0.98       133
   macro avg       0.98      0.98      0.98       133
weighted avg       0.98      0.98      0.98       133



In [85]:
param_grid = {
    'n_estimators': [10, 50, 100, 1000],
    'criterion': ['gini', 'entropy', 'log_loss']
}

grid = GridSearchCV(RandomForestClassifier(), param_grid, refit=True, verbose = 3, n_jobs=-1, scoring='f1_weighted')
grid.fit(x_train, y_train)

print("Best Parameters:", grid.best_params_)
y_predict = grid.predict(x_test)
grid_report = classification_report(y_test, y_predict)
print(grid_report)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best Parameters: {'criterion': 'entropy', 'n_estimators': 1000}
              precision    recall  f1-score   support

           0       0.98      0.98      0.98        51
           1       0.99      0.99      0.99        82

    accuracy                           0.98       133
   macro avg       0.98      0.98      0.98       133
weighted avg       0.98      0.98      0.98       133

