In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import LabelEncoder
import numpy as np


In [3]:

df1 = pd.read_csv("winequality-white.csv")
df2 = pd.read_excel("Dry_Bean_Dataset.xlsx")
df3 = pd.read_excel("Raisin_Dataset.xlsx")

datasets_name = ['wine', 'drybean', 'raisin']
datasets = {
    'X': [df1.drop(columns='quality'), df2.drop(columns='Class'), df3.drop(columns='Class')],
    'y': [df1['quality'], df2['Class'], df3['Class']]
}
combinations = [
    {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 10},
    {'n_estimators': 100, 'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'sqrt'},
    {'n_estimators': 200, 'criterion': 'log_loss', 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 8, 'max_features': 'log2'}
]
kf = KFold(n_splits=5, shuffle=True, random_state=42)
results = []
res = {'wine':[],'drybean':[],'raisin':[]}
for i, dataset_name in enumerate(datasets_name):
    best_score = 0
    X = datasets['X'][i]
    y = datasets['y'][i]

    print(f"\nDataset: {dataset_name}")    
    for params in combinations:
        model = RandomForestClassifier(**params, random_state=42)
        cv_scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')
        mean_score = np.mean(cv_scores)
        results.append({
            'Dataset': dataset_name,
            'n_estimators': params['n_estimators'],
            'criterion': params['criterion'],
            'max_depth': params['max_depth'],
            'min_samples_split': params['min_samples_split'],
            'min_samples_leaf': params['min_samples_leaf'],
            'max_features': params['max_features'],
            'Mean_CV_Accuracy': mean_score
        })
        res[dataset_name].append(mean_score)


Dataset: wine

Dataset: drybean

Dataset: raisin


In [6]:
for i in results:
    print(i)
print()
print(res)

{'Dataset': 'wine', 'n_estimators': 50, 'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 10, 'Mean_CV_Accuracy': 0.47264493131266805}
{'Dataset': 'wine', 'n_estimators': 100, 'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'Mean_CV_Accuracy': 0.5485991536553334}
{'Dataset': 'wine', 'n_estimators': 200, 'criterion': 'log_loss', 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 8, 'max_features': 'log2', 'Mean_CV_Accuracy': 0.6031079193679515}
{'Dataset': 'drybean', 'n_estimators': 50, 'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 10, 'Mean_CV_Accuracy': 0.40827357668086084}
{'Dataset': 'drybean', 'n_estimators': 100, 'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'Mean_CV_Accuracy': 0.8968484375215023}
{'Dataset': 'drybean', 'n_estimators': 200, '