<a href="https://colab.research.google.com/github/messias077/REP/blob/main/Problema_Generalizacao.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
""" Avaliando a generalização de algoritmos """

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import make_scorer, mean_squared_error, accuracy_score
from sklearn.model_selection import KFold, GridSearchCV, cross_validate
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_wine

# Carregando os dados
wine = load_wine()
X, y = wine.data, wine.target

kf = KFold(n_splits=10)

# Definição dos modelos

# Logistic Regression convencional
lr_conv = LogisticRegression(max_iter=10000)

# KNN convencional
knn_conv = KNeighborsRegressor()

# Logistic Regression aninhado
lr_anin = Pipeline([
    ("padronizacao", StandardScaler()),
    ("classificador", LogisticRegression(max_iter=3000))
])

# KNN Aninhado
parametros = {'n_neighbors': [3, 5, 7]}
grid_search_knn = GridSearchCV(KNeighborsRegressor(), parametros, scoring='neg_mean_squared_error')
knn_anin = Pipeline([
    ("padronizacao", StandardScaler()),
    ("gsknn", grid_search_knn)
])


# Guarda as acurácias de cada uma das rodadas para calcular a média
acuracias_lr_conv = []
acuracias_lr_anin = []
acuracias_knn_conv = []
acuracias_knn_anin = []

# Guarda os scores de cada uma das rodadas para calcular a média
scores_lr_conv = []
scores_lr_anin = []
scores_knn_conv = []
scores_knn_anin = []

# Rodando todas os n_splits do KFold
for treinamento, teste in kf.split(X, y):
    X_treinamento = X[treinamento]
    y_treinamento = y[treinamento]

    X_teste = X[teste]
    y_teste = y[teste]

    # Convencional
    scores = cross_validate(lr_conv, X_treinamento, y_treinamento)
    scores_lr_conv.append(np.mean(scores['test_score']))

    lr_conv.fit(X_treinamento, y_treinamento)
    y_pred_lr_conv = lr_conv.predict(X_teste)
    acur_lr_conv = accuracy_score(y_teste, y_pred_lr_conv)
    acuracias_lr_conv.append(acur_lr_conv)

    scores = cross_validate(knn_conv, X_treinamento, y_treinamento,
                            scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=False))
    scores_knn_conv.append(np.mean(scores['test_score']))

    knn_conv.fit(X_treinamento, y_treinamento)
    y_pred_knn_conv = knn_conv.predict(X_teste).astype(int)
    acur_knn_conv = accuracy_score(y_teste, y_pred_knn_conv)
    acuracias_knn_conv.append(acur_knn_conv)


    # Com aninhamento
    scores = cross_validate(lr_anin, X_treinamento, y_treinamento)
    scores_lr_anin.append(np.mean(scores['test_score']))

    lr_anin.fit(X_treinamento, y_treinamento)
    y_pred_lr_anin = lr_anin.predict(X_teste)
    acur_lr_anin = accuracy_score(y_teste, y_pred_lr_anin)
    acuracias_lr_anin.append(acur_lr_anin)

    scores = cross_validate(knn_anin, X_treinamento, y_treinamento,
                            scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=False))
    scores_knn_anin.append(np.mean(scores['test_score']))

    knn_anin.fit(X_treinamento, y_treinamento)
    y_pred_knn_anin = knn_anin.predict(X_teste).astype(int)
    acur_knn_anin = accuracy_score(y_teste, y_pred_knn_anin)
    acuracias_knn_anin.append(acur_knn_anin)

print("\n  --------------------------------------")
print("\n         **** Resultados ****")
print("\n  --------------------------------------")

print("\n* K-Fold Convencional")
print("\n - Logistic Regression\n")
print(f"   Acurácia global..: {np.mean(acuracias_lr_conv)}")
print(f"   Desvio padrão....: {np.std(acuracias_lr_conv)}")
print(f"   Scores...........: {scores_lr_conv}")
print(f"   Média dos scores.: {np.mean(scores_lr_conv)}")

print("\n - KNeighbors Regressor\n")
print(f"   Acurácia global..: {np.mean(acuracias_knn_conv)}")
print(f"   Desvio padrão....: {np.std(acuracias_knn_conv)}")
print(f"   Scores...........: {scores_knn_conv}")
print(f"   Média dos scores.: {np.mean(scores_knn_conv)}")

print("\n\n* K-Fold Aninhado")
print("\n - Logistic Regression\n")
print(f"   Acurácia global..: {np.mean(acuracias_lr_anin)}")
print(f"   Desvio padrão....: {np.std(acuracias_lr_anin)}")
print(f"   Scores...........: {scores_lr_anin}")
print(f"   Média dos scores.: {np.mean(scores_lr_anin)}")

print("\n - KNeighbors Regressor\n")
print(f"   Acurácia global..: {np.mean(acuracias_knn_anin)}")
print(f"   Desvio padrão....: {np.std(acuracias_knn_anin)}")
print(f"   Scores...........: {scores_knn_anin}")
print(f"   Média dos scores.: {np.mean(scores_knn_anin)}")



  --------------------------------------

         **** Resultados ****

  --------------------------------------

* K-Fold Convencional

 - Logistic Regression

   Acurácia global..: 0.95
   Desvio padrão....: 0.05241100628920338
   Scores...........: [0.9625, 0.96875, 0.95625, 0.9625, 0.98125, 0.9625, 0.95, 0.9625, 0.9564393939393939, 0.9626893939393939]
   Média dos scores.: 0.9625378787878789

 - KNeighbors Regressor

   Acurácia global..: 0.6333333333333333
   Desvio padrão....: 0.37531879453454525
   Scores...........: [-0.6616160638564985, -0.5900939227141654, -0.63085262449558, -0.6636404742256687, -0.6412747515106212, -0.6523655065364247, -0.644378796529961, -0.6479419333664767, -0.5931703964807868, -0.5875551645043373]
   Média dos scores.: -0.631288963422052


* K-Fold Aninhado

 - Logistic Regression

   Acurácia global..: 0.9666666666666666
   Desvio padrão....: 0.03685138655950446
   Scores...........: [0.98125, 0.98125, 0.9875, 0.98125, 0.975, 0.96875, 0.96875, 0.975, 0