In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import optuna

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('../data/data.csv')
df_model = df[['contrast_score', 'sharpness_score', 'noise_score', 'y']]
X = df_model.iloc[:, :-1].values
y = df_model.iloc[:, -1].values

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=True, random_state=0
)

In [4]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
def objective(trial):
    classifier_name = trial.suggest_categorical("classifier", [
        "LogisticRegression", "KNN", "DecisionTree", "RandomForest", "MLP", "SVC"
    ])

    if classifier_name == "LogisticRegression":
        C = trial.suggest_loguniform("lr_C", 1e-4, 1e2)
        model = LogisticRegression(C=C, max_iter=1000)

    elif classifier_name == "KNN":
        n_neighbors = trial.suggest_int("knn_n_neighbors", 1, 30)
        model = KNeighborsClassifier(n_neighbors=n_neighbors)

    elif classifier_name == "DecisionTree":
        max_depth = trial.suggest_int("dt_max_depth", 1, 20)
        model = DecisionTreeClassifier(max_depth=max_depth)

    elif classifier_name == "RandomForest":
        n_estimators = trial.suggest_int("rf_n_estimators", 10, 200)
        max_depth = trial.suggest_int("rf_max_depth", 2, 20)
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)

    elif classifier_name == "MLP":
        hidden_layer_sizes = trial.suggest_categorical("mlp_hidden_layer_sizes", [(50,), (100,), (50, 50)])
        alpha = trial.suggest_loguniform("mlp_alpha", 1e-5, 1e-1)
        model = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, alpha=alpha, max_iter=1000)

    elif classifier_name == "SVC":
        C = trial.suggest_loguniform("svc_C", 1e-2, 1e2)
        gamma = trial.suggest_loguniform("svc_gamma", 1e-4, 1e-1)
        model = SVC(C=C, gamma=gamma)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc


In [6]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-07-27 17:37:33,043] A new study created in memory with name: no-name-4441ee74-147c-4a55-a19a-0d65d9de6c64
  C = trial.suggest_loguniform("lr_C", 1e-4, 1e2)
[I 2025-07-27 17:37:33,060] Trial 0 finished with value: 0.653125 and parameters: {'classifier': 'LogisticRegression', 'lr_C': 63.71479949014432}. Best is trial 0 with value: 0.653125.
  C = trial.suggest_loguniform("lr_C", 1e-4, 1e2)
[I 2025-07-27 17:37:33,073] Trial 1 finished with value: 0.65 and parameters: {'classifier': 'LogisticRegression', 'lr_C': 0.3854948269848581}. Best is trial 0 with value: 0.653125.
  C = trial.suggest_loguniform("lr_C", 1e-4, 1e2)
[I 2025-07-27 17:37:33,082] Trial 2 finished with value: 0.653125 and parameters: {'classifier': 'LogisticRegression', 'lr_C': 17.80404060754396}. Best is trial 0 with value: 0.653125.
  alpha = trial.suggest_loguniform("mlp_alpha", 1e-5, 1e-1)
[I 2025-07-27 17:37:34,214] Trial 3 finished with value: 0.928125 and parameters: {'classifier': 'MLP', 'mlp_hidden_layer_si

In [7]:
def get_best_model(params):
    name = params['classifier']
    if name == "LogisticRegression":
        return LogisticRegression(C=params['lr_C'], max_iter=1000)
    elif name == "KNN":
        return KNeighborsClassifier(n_neighbors=params['knn_n_neighbors'])
    elif name == "DecisionTree":
        return DecisionTreeClassifier(max_depth=params['dt_max_depth'])
    elif name == "RandomForest":
        return RandomForestClassifier(n_estimators=params['rf_n_estimators'], max_depth=params['rf_max_depth'])
    elif name == "MLP":
        return MLPClassifier(hidden_layer_sizes=params['mlp_hidden_layer_sizes'], alpha=params['mlp_alpha'], max_iter=1000)
    elif name == "SVC":
        return SVC(C=params['svc_C'], gamma=params['svc_gamma'])

In [8]:
best_model = get_best_model(study.best_params)
best_model.fit(X_train, y_train)
final_y_pred = best_model.predict(X_test)
print("Acurácia final com melhor modelo:", accuracy_score(y_test, final_y_pred))
print(f'{best_model}')

Acurácia final com melhor modelo: 0.921875
MLPClassifier(alpha=0.04988728586343318, hidden_layer_sizes=(50, 50),
              max_iter=1000)


In [13]:
import cv2
from visao.ImageLoader import ImageLoader
from visao.ImageVectorizer import ImageVectorizer

loader = ImageLoader()
vec = ImageVectorizer()

kali = loader.load('../real/suica.png')
# kali = cv2.resize(kali, (512, 512))
kalivec = vec(kali)
print(best_model.predict(scaler.transform(np.array([kalivec]))))

[1]
