In [None]:
import os
import src.processing.data_processing as dp 

import numpy as np
import pandas as pd

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

In [None]:
# Exp1
decision_tree_parameters = {"model__min_samples_leaf": [1, 5, 10],
                            "model__min_samples_split": [5, 10, 20],
                            "model__max_depth": [4, 5, 10, None]}

random_forest_parameters = {"model__n_estimators": [100,200,300],
                            "model__min_samples_leaf": [1, 5, 10],
                            "model__min_samples_split": [5, 10, 20],
                            "model__max_depth": [4, 5, 10, None]}
# Exp2

knn_parameters = {"model__n_neighbors" : range(10, 110, 10),
                  "model__weights" : ["uniform", "distance"],
                  "model__p" : [1,2]}

radial_svc_parameters = {"model__C": [0.01, 0.05, 0.1, 0.2, 1, 2, 3, 10],
                         "model__kernel": ["rbf"],
                         "model__gamma": [0.01, 0.05, 0.1, 0.2, 1]}

models_exp1 = [(DecisionTreeClassifier, decision_tree_parameters), (RandomForestClassifier, random_forest_parameters)]
models_exp2 = [(SVC, radial_svc_parameters), (KNeighborsClassifier, knn_parameters)]

In [None]:
# Loading data
df = pd.read_excel(os.path.join("data", "Dry_Bean_Dataset.xlsx"))

# Processamento diferentes visto que nao temos eval
X, y, size_pack, _ = dp.df_category_split(df)
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=test_size, random_state=12)

In [None]:
#exp 1
for model, parameters in models_exp1:
    pipe = Pipeline([('model', model())])
    
    grid = GridSearchCV(pipe, parameters, cv=5, n_jobs=-1, refit=True, verbose=2, scoring="accuracy")
    grid.fit(X_train, y_train)    
    print(f"Model: {model}")
    print(f"Accuracy: {round(grid.score(X_test, y_test),3)}")
    print(f"Best Parameters: {grid.best_params_}")
    print("#" * 64)

In [None]:
#exp 2
for model, parameters in models_exp2:
    pipe = Pipeline([('scaler', StandardScaler()), 
                     ('model', model())])
    
    grid = GridSearchCV(pipe, parameters, cv=5, n_jobs=-1, refit=1, verbose=2, scoring="accuracy")
    grid.fit(X_train, y_train)    
    print(f"Model: {model}")
    print(f"Accuracy: {round(grid.score(X_test, y_test),3)}")
    print(f"Best Parameters: {grid.best_params_}")
    print("#" * 64)