In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import GradientBoostingClassifier

In [11]:
data = pd.read_csv('../data/water_potability_oversampled.csv').dropna()

X = data.drop('Potability', axis=1)
y = data['Potability']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

In [12]:
gb_classifier = GradientBoostingClassifier(random_state=42)

gb_classifier.fit(X_resampled, y_resampled)

gb_predictions = gb_classifier.predict(X_test)

gb_accuracy = accuracy_score(y_test, gb_predictions)
gb_precision = precision_score(y_test, gb_predictions)
gb_recall = recall_score(y_test, gb_predictions)
gb_f1 = f1_score(y_test, gb_predictions)

print(f'Acurácia do modelo (Gradient Boosting): {gb_accuracy * 100:.2f}%')
print(f'Precision (Gradient Boosting): {gb_precision:.2f}')
print(f'Recall (Gradient Boosting): {gb_recall:.2f}')
print(f'F1-score (Gradient Boosting): {gb_f1:.2f}')

print('\nClassification Report (Gradient Boosting):\n', classification_report(y_test, gb_predictions))

Acurácia do modelo (Gradient Boosting): 66.67%
Precision (Gradient Boosting): 0.68
Recall (Gradient Boosting): 0.66
F1-score (Gradient Boosting): 0.67

Classification Report (Gradient Boosting):
               precision    recall  f1-score   support

           0       0.65      0.67      0.66       232
           1       0.68      0.66      0.67       248

    accuracy                           0.67       480
   macro avg       0.67      0.67      0.67       480
weighted avg       0.67      0.67      0.67       480



In [16]:
# classificador Gradient Boosting com hiperparâmetros ajustados
gb_classifier = GradientBoostingClassifier(
    n_estimators=500,  # Aumentar o número de árvores
    learning_rate=0.1,  # Ajustar a taxa de aprendizado
    max_depth=5,  # Ajustar a profundidade máxima das árvores
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42
)

gb_classifier.fit(X_resampled, y_resampled)

gb_predictions = gb_classifier.predict(X_test)

gb_accuracy = accuracy_score(y_test, gb_predictions)
gb_precision = precision_score(y_test, gb_predictions)
gb_recall = recall_score(y_test, gb_predictions)
gb_f1 = f1_score(y_test, gb_predictions)

print(f'Acurácia do modelo (Gradient Boosting): {gb_accuracy * 100:.2f}%')
print(f'Precision (Gradient Boosting): {gb_precision:.2f}')
print(f'Recall (Gradient Boosting): {gb_recall:.2f}')
print(f'F1-score (Gradient Boosting): {gb_f1:.2f}')

print('\nClassification Report (Gradient Boosting):\n', classification_report(y_test, gb_predictions))

Acurácia do modelo (Gradient Boosting): 73.96%
Precision (Gradient Boosting): 0.75
Recall (Gradient Boosting): 0.75
F1-score (Gradient Boosting): 0.75

Classification Report (Gradient Boosting):
               precision    recall  f1-score   support

           0       0.73      0.73      0.73       232
           1       0.75      0.75      0.75       248

    accuracy                           0.74       480
   macro avg       0.74      0.74      0.74       480
weighted avg       0.74      0.74      0.74       480

