In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns




In [None]:
df = pd.read_csv('C:/Users/lgrodrigues/Documents/Projeto Final/Projeto-Final---Grupo-4/Dataset/advertising_tratado.csv', delimiter=',')
print(df)

#Split Treino e Teste
X = df[['Daily Time Spent on Site','Age', 'Area Income','Daily Internet Usage']]
y = df['Clicked on Ad']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, random_state=42,
                                                    stratify=y)

#Standard MinMax

pipe_features_mm = Pipeline([('scaler', MinMaxScaler())])
colunas_num = X_train.columns
pre_processador = ColumnTransformer(
    transformers=[
        ("pre_process", pipe_features_mm, colunas_num)
    ]
)




In [3]:
#pipeline de modelos
pipe_models = Pipeline([
    ('pre_processador', pre_processador),
    ('classifier', DecisionTreeClassifier())
])

#Definindo a busca do melhor modelo
param_grid = [
    {'pre_processador':[pre_processador]},
    
    {'classifier': [DecisionTreeClassifier(random_state=42)]},


    {'classifier': [KNeighborsClassifier(n_neighbors=5)]},

    {'classifier': [LogisticRegression(random_state=1)]}
]
stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


#Cria o objeto de busca
grid_search = GridSearchCV(pipe_models, param_grid, cv=stratified_cv, scoring=['precision', 'accuracy', 'f1', 'recall', 'roc_auc' ], refit = 'f1')
#treina o modelo
grid_search.fit(X_train, y_train)

#salva os melhores parâmetros
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

#Predição considerando o melhor modelo
y_pred = best_model.predict(X_test)
y_pred_train = best_model.predict(X_train)




In [None]:
# Avaliando métricas
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print("Melhores hiperparâmetros:", best_params)
print("Acurácia no conjunto de teste:", accuracy)
print('Precisão no conjunto de teste:', precision)
print('F1 no conjunto de teste:', f1)
print('Recall no conjunto de teste:', recall)
print('ROC_AUC no conjunto de teste:', roc_auc)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


In [None]:
#métricas do  treino
accuracy_train = accuracy_score(y_train, y_pred_train)
precision_train = precision_score(y_train, y_pred_train)
recall_train = recall_score(y_train, y_pred_train)
f1_train = f1_score(y_train, y_pred_train)
roc_auc_train = roc_auc_score(y_train, y_pred_train)

# Imprima os resultados para o treinamento
print("Acurácia no conjunto de treinamento:", accuracy_train)
print('Precisão no conjunto de treinamento:', precision_train)
print('Recall no conjunto de treinamento:', recall_train)
print('F1 no conjunto de treinamento:', f1_train)
print('ROC AUC no conjunto de treinamento:', roc_auc_train)


In [None]:
#Matriz de confusão teste
print(confusion_matrix(y_test, y_pred))

ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap = 'mako' )
plt.title('Matriz de Confusão Teste')
plt.xlabel('Predição')
plt.ylabel('Verdadeira')
plt.grid(False)
plt.savefig('Modelo de Machine Learning/MatrizConfusaoTeste.png')
plt.show()


print(classification_report(y_test, y_pred))



In [None]:
#matriz de confusão treino
print(confusion_matrix(y_train, y_pred_train))

ConfusionMatrixDisplay.from_predictions(y_train, y_pred_train, cmap = 'mako')
plt.title('Matriz de Confusão Treino')
plt.xlabel('Predição')
plt.ylabel('Verdadeira')
plt.grid(False)
plt.savefig('Modelo de Machine Learning/MatrizConfusaoTreino.png')
plt.show()

print(classification_report(y_train, y_pred_train))

In [None]:
#salvar melhor modelo
import joblib
joblib.dump(best_model, 'Modelo de Machine Learning/modelo_regressao_logistica.joblib')