In [2]:
import numpy as np
import pandas as pd
import basedosdados as bd

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, precision_score, recall_score
from sklearn.metrics import confusion_matrix, fbeta_score, brier_score_loss, precision_recall_curve, auc

from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier, LocalOutlierFactor
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.svm import SVC, OneClassSVM

## Dados

In [2]:
# TODO rode isto para criar o csv com vários hexágonos (O finalizado)
# main_table = bd.read_sql(query = """ SELECT * FROM `rj-cor-dev.clima_pluviometro.main_table_fields_1H_mais_frequentes`""", billing_project_id = "projeto-fgv1", use_bqstorage_api = True)
# main_table.to_csv("csvs/main_table_mult_hexag.csv")

In [3]:
main_table_k = pd.read_csv("csvs/main_table_mult_hexag.csv")

## Pré-processamento

In [4]:
main_table = main_table_k.copy()

In [5]:
main_table.fillna(0, inplace=True)

main_table["target"] = main_table["alagamento_pop"].apply(lambda x: 1 if x > 0 else 0)

main_table["id_h3"] = main_table["id_h3"].astype("category")

# Aplicar one-hot encoding na coluna "estacao_ano"
one_hot_encoder = OneHotEncoder(sparse=False)
encoded_cols = one_hot_encoder.fit_transform(main_table[["estacao_ano"]])
encoded_labels = one_hot_encoder.categories_[0]

# Adicionar as colunas codificadas ao DataFrame original
for i, label in enumerate(encoded_labels):
    main_table[f"estacao_ano_{label}"] = encoded_cols[:, i]


# Transformar a coluna id_h3 em categórica
label_encoder = LabelEncoder()
main_table["id_h3"] = label_encoder.fit_transform(main_table["id_h3"])


main_table.drop(columns=["data_hora", "estacao_ano", "alagamento_fim",
                         "estacoes", "Unnamed: 0", "alagamento_pop", "alagamento_inicio", 
                         "quinzenas", "alagamento_lat", "alagamento_long", "id_alagamento",
                         "gravidade_alagamento"], inplace=True)

In [6]:
X = main_table.drop(columns=["target"])
y = main_table["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5)

std = StandardScaler()

X_train_standard = std.fit_transform(X_train)
X_val_standard = std.transform(X_val)
X_test_standard = std.transform(X_test)

X_test

Unnamed: 0,id_h3,chuva_15min,chuva_1h,chuva_4h,chuva_24h,chuva_96h,estacao_ano_Inverno,estacao_ano_Outono,estacao_ano_Primavera,estacao_ano_Verão
1187439,49,0.599317,2.600339,1.741515e+01,24.436614,40.027015,0.0,1.0,0.0,0.0
985198,15,0.000000,0.000000,0.000000e+00,0.012229,0.692578,0.0,0.0,1.0,0.0
1982444,23,0.000000,0.000000,0.000000e+00,0.000000,2.202850,0.0,0.0,0.0,1.0
1945507,19,0.000000,0.000000,0.000000e+00,0.000000,12.789241,0.0,0.0,1.0,0.0
1543190,11,0.000000,0.000000,0.000000e+00,28.797095,28.797095,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2517434,4,0.000000,0.000000,4.510000e-07,4.200053,4.200053,0.0,1.0,0.0,0.0
3332888,46,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.0,0.0,1.0,0.0
1680138,43,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.0,0.0,1.0,0.0
2150023,14,0.000000,0.000000,0.000000e+00,0.000000,0.380969,0.0,1.0,0.0,0.0


## Modelos

In [6]:
# Naive Model
# predicts majority class always

# predictid minority proportion

naive_pred = np.ones(len(y_test)) * y_train.mean()
y_pred_nv = np.where(naive_pred > 0.5, 1, 0)

# metrics
print("Naive Model")

print("R2: ", r2_score(y_test, y_pred_nv))
print("MSE: ", mean_squared_error(y_test, y_pred_nv))
print("Precision: ", precision_score(y_test, y_pred_nv))
print("Recall: ", recall_score(y_test, y_pred_nv))
print("F0.5: ", fbeta_score(y_test, y_pred_nv, beta=0.5))
recall_nv, precision_nv, th_nv = precision_recall_curve(y_test, y_pred_nv)
print("Area under Precision Recall Curve: ", auc(recall_nv, precision_nv))
print("Brier Score: ", brier_score_loss(y_test, y_pred_nv))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred_nv))

Naive Model
R2:  -0.0019201578502179295
MSE:  0.0019164779101138818


Precision:  0.0
Recall:  0.0
F0.5:  0.0
Area under Precision Recall Curve:  0.4990417610449431
Brier Score:  0.0019164779101138818
Confusion Matrix: 
 [[604117      0]
 [  1160      0]]


In [8]:
# Logistic Regression

logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train_standard, y_train)

y_pred_lr = logreg.predict(X_test_standard)

print("R²: ", r2_score(y_test, y_pred_lr))
print("Precison: ", precision_score(y_test, y_pred_lr))
print("Recall: ", recall_score(y_test, y_pred_lr))
print("Brier Score: ", brier_score_loss(y_test, y_pred_lr))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_lr, beta=0.5))
recall_lr, precision_lr, th_lr = precision_recall_curve(y_test, y_pred_lr)
print("Area under Precision Recall Curve: ", auc(recall_lr, precision_lr))
print("confusion_matrix: \n", confusion_matrix(y_test, y_pred_lr))

R²:  -0.031188295272667643
Precison:  0.40425531914893614
Recall:  0.061389337641357025
Brier Score:  0.002104821428866453
F-0.5 Score:  0.1909547738693467
Area under Precision Recall Curve:  0.23173687495812517
confusion_matrix: 
 [[603927    112]
 [  1162     76]]


In [7]:
# naive bayes
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(X_train_standard, y_train)

y_pred_gnb = gnb.predict(X_test_standard)

print("R²: ", r2_score(y_test, y_pred_gnb))
print("Precison: ", precision_score(y_test, y_pred_gnb))
print("Recall: ", recall_score(y_test, y_pred_gnb))
print("Brier Score: ", brier_score_loss(y_test, y_pred_gnb))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_gnb, beta=0.5))
recall_gnb, precision_gnb, th_gnb = precision_recall_curve(y_test, y_pred_gnb)
print("Area under Precision Recall Curve: ", auc(recall_gnb, precision_gnb))
print("confusion_matrix: \n", confusion_matrix(y_test, y_pred_gnb))


R²:  -11.225153374320678
Precison:  0.04502801120448179
Recall:  0.5543103448275862
Brier Score:  0.023384334775648174
F-0.5 Score:  0.05516472203157173
Area under Precision Recall Curve:  0.29817977729537226
confusion_matrix: 
 [[590480  13637]
 [   517    643]]


In [9]:
# KNN

knn = KNeighborsClassifier(n_neighbors=13)
knn.fit(X_train_standard, y_train)

y_pred_knn = knn.predict(X_test_standard)

print("R²: ", r2_score(y_test, y_pred_knn))
print("Precison: ", precision_score(y_test, y_pred_knn, average="macro"))
print("Recall: ", recall_score(y_test, y_pred_knn, average="macro"))
print("Brier Score: ", brier_score_loss(y_test, y_pred_knn))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_knn, beta=0.5))
recall_knn, precision_knn, th_knn = precision_recall_curve(y_test, y_pred_knn)
print("Area under Precision Recall Curve: ", auc(recall_knn, precision_knn))
print("confusion_matrix: \n", confusion_matrix(y_test, y_pred_knn))

R²:  0.0465150613569838
Precison:  0.8148854146741138
Recall:  0.558088787936563
Brier Score:  0.0019462163604432351
F-0.5 Score:  0.3348837209302326
Area under Precision Recall Curve:  0.3728061674895485
confusion_matrix: 
 [[603955     84]
 [  1094    144]]


In [10]:
# Neural Network
a
# 10 camadas ocultas com 10 neurônios cada relu
mlp = MLPClassifier(hidden_layer_sizes=(5,5), max_iter=1000, activation="relu", 
                    solver="adam", random_state=1, early_stopping=True)
mlp.fit(X_train_standard, y_train)

y_pred_nn = mlp.predict(X_test_standard)

print("R²: ", r2_score(y_test, y_pred_nn))
print("Precison: ", precision_score(y_test, y_pred_nn))
print("Recall: ", recall_score(y_test, y_pred_nn))
print("Brier Score: ", brier_score_loss(y_test, y_pred_nn))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_nn, beta=0.5))
recall_nn, precision_nn, th_nn = precision_recall_curve(y_test, y_pred_nn)
print("Area under Precision Recall Curve: ", auc(recall_nn, precision_nn))
print("confusion_matrix: \n", confusion_matrix(y_test, y_pred_nn))

NameError: name 'a' is not defined

In [None]:
# Decision Tree

dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

print("R²: ", r2_score(y_test, y_pred_dt))
print("Precison: ", precision_score(y_test, y_pred_dt))
print("Recall: ", recall_score(y_test, y_pred_dt))
print("Brier Score: ", brier_score_loss(y_test, y_pred_dt))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_dt, beta=0.5))
recall_dt, precision_dt, th_dt = precision_recall_curve(y_test, y_pred_dt)
print("Area under Precision Recall Curve: ", auc(recall_dt, precision_dt))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred_dt))

R²:  -0.2226327881759511
Precison:  0.3942232630757221
Recall:  0.4102355808285946
Brier Score:  0.0024815084663715953
F-0.5 Score:  0.39732494099134535
Area under Precision Recall Curve:  0.4007953677918318
confusion matrix:
 [[603270    776]
 [   726    505]]


In [None]:
# Random Forest

rf = RandomForestClassifier(n_estimators=10, criterion="entropy")
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print("R²: ", r2_score(y_test, y_pred_rf))
print("Precison: ", precision_score(y_test, y_pred_rf))
print("Recall: ", recall_score(y_test, y_pred_rf))
print("Brier Score: ", brier_score_loss(y_test, y_pred_rf))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_rf, beta=0.5))
recall_rf, precision_rf, th_rf = precision_recall_curve(y_test, y_pred_rf)
print("Area under Precision Recall Curve: ", auc(recall_rf, precision_rf))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred_rf))

R²:  0.1965788535754569
Precison:  0.7904761904761904
Recall:  0.2696994313566206
Brier Score:  0.001630658359726208
F-0.5 Score:  0.5702507729302644
Area under Precision Recall Curve:  0.5287966665312727
confusion matrix:
 [[603958     88]
 [   899    332]]


In [None]:
# SVM

svm = SVC(kernel="rbf", gamma="auto", verbose=2)

svm.fit(X_train_standard, y_train)

y_pred_svm = svm.predict(X_test_standard)

print("R²: ", r2_score(y_test, y_pred_svm))
print("Precison: ", precision_score(y_test, y_pred_svm))
print("Recall: ", recall_score(y_test, y_pred_svm))
print("Brier Score: ", brier_score_loss(y_test, y_pred_svm))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_svm, beta=0.5))
recall_svm, precision_svm, th_svm = precision_recall_curve(y_test, y_pred_svm)
print("Area under Precision Recall Curve: ", auc(recall_svm, precision_svm))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred_svm))


[LibSVM]

## Imbalanced Models

In [None]:
# Isolation Forest

isolation_forest = IsolationForest(n_estimators=100, contamination=0.001)
isolation_forest.fit(X_train)

y_pred_if = isolation_forest.predict(X_test)
y_pred_if = np.where(y_pred_if == -1, 1, 0)

print("R²: ", r2_score(y_test, y_pred_if))
print("Precison: ", precision_score(y_test, y_pred_if))
print("Recall: ", recall_score(y_test, y_pred_if))
print("Brier Score: ", brier_score_loss(y_test, y_pred_if))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_if, beta=0.5))
recall_if, precision_if, th_if = precision_recall_curve(y_test, y_pred_if)
print("Area under Precision Recall Curve: ", auc(recall_if, precision_if))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred_if))


R²:  -0.2950790718960974
Precison:  0.2009966777408638
Recall:  0.0982940698619009
Brier Score:  0.0026285485818889533
F-0.5 Score:  0.16625446551250342
Area under Precision Recall Curve:  0.1485285297779022
confusion matrix:
 [[603565    481]
 [  1110    121]]


In [None]:
# One Class SVM

one_class_svm = OneClassSVM()
one_class_svm.fit(X_train_standard)

y_pred_ocs = one_class_svm.predict(X_test_standard)
y_pred_ocs = np.where(y_pred_ocs == -1, 1, 0)

print("R²: ", r2_score(y_test, y_pred_ocs))
print("Precison: ", precision_score(y_test, y_pred_ocs))
print("Recall: ", recall_score(y_test, y_pred_ocs))
print("Brier Score: ", brier_score_loss(y_test, y_pred_ocs))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_ocs, beta=0.5))
recall_ocs, precision_ocs, th_ocs = precision_recall_curve(y_test, y_pred_ocs)
print("Area under Precision Recall Curve: ", auc(recall_ocs, precision_ocs))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred_ocs))

R²:  -41.6422356732674
Precison:  0.02059202059202059
Recall:  0.8827586206896552
Brier Score:  0.5319797838968282
F-0.5 Score:  0.025590787316566034
Area under Precision Recall Curve:  0.4397809323522356
confusion matrix:
 [[5243 6088]
 [  17  128]]


In [None]:
# Local Outlier Factor

local_outlier_factor = LocalOutlierFactor(n_neighbors=20, contamination=0.01, novelty=True)
local_outlier_factor.fit(X_train_standard)

y_pred_lof = local_outlier_factor.predict(X_test_standard)
y_pred_lof = np.where(y_pred_lof == -1, 1, 0)

print("R²: ", r2_score(y_test, y_pred_lof))
print("Precison: ", precision_score(y_test, y_pred_lof))
print("Recall: ", recall_score(y_test, y_pred_lof))
print("Brier Score: ", brier_score_loss(y_test, y_pred_lof))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_lof, beta=0.5))
recall_lof, precision_lof, th_lof = precision_recall_curve(y_test, y_pred_lof)
print("Area under Precision Recall Curve: ", auc(recall_lof, precision_lof))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred_lof))

In [None]:
# Smote
from imblearn.over_sampling import SMOTE

smote = SMOTE()

X_train_smote, y_train_smote = smote.fit_resample(X_train_standard, y_train)

# Logistic Regression

logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train_smote, y_train_smote)

y_pred_lr = logreg.predict(X_test_standard)

print("R²: ", r2_score(y_test, y_pred_lr))
print("Precison: ", precision_score(y_test, y_pred_lr))
print("Recall: ", recall_score(y_test, y_pred_lr))
print("Brier Score: ", brier_score_loss(y_test, y_pred_lr))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_lr, beta=0.5))
recall_lr, precision_lr, th_lr = precision_recall_curve(y_test, y_pred_lr)
print("Area under Precision Recall Curve: ", auc(recall_lr, precision_lr))
print("confusion_matrix: \n", confusion_matrix(y_test, y_pred_lr))

R²:  -40.05273281433582
Precison:  0.018803139306736428
Recall:  0.7809847198641766
Brier Score:  0.07974200242203157
F-0.5 Score:  0.023363299304179998
Area under Precision Recall Curve:  0.398160838785707
confusion_matrix: 
 [[556091  48008]
 [   258    920]]


In [None]:
# neural network

mlp = MLPClassifier(hidden_layer_sizes=(5,5), max_iter=1000, activation="relu",
                    solver="adam", random_state=1, early_stopping=True)
mlp.fit(X_train_smote, y_train_smote)

y_pred_nn = mlp.predict(X_test_standard)

print("R²: ", r2_score(y_test, y_pred_nn))
print("Precison: ", precision_score(y_test, y_pred_nn))
print("Recall: ", recall_score(y_test, y_pred_nn))
print("Brier Score: ", brier_score_loss(y_test, y_pred_nn))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_nn, beta=0.5))
recall_nn, precision_nn, th_nn = precision_recall_curve(y_test, y_pred_nn)
print("Area under Precision Recall Curve: ", auc(recall_nn, precision_nn))
print("confusion_matrix: \n", confusion_matrix(y_test, y_pred_nn))

R²:  -60.03899883329871
Precison:  0.013883915043662709
Recall:  0.8556876061120543
Brier Score:  0.11856389719087294
F-0.5 Score:  0.01728478047642891
Area under Precision Recall Curve:  0.432979975788415
confusion_matrix: 
 [[532505  71594]
 [   170   1008]]


In [None]:
# Decision Tree

dt = DecisionTreeClassifier()
dt.fit(X_train_smote, y_train_smote)

y_pred_dt = dt.predict(X_test)

print("R²: ", r2_score(y_test, y_pred_dt))
print("Precison: ", precision_score(y_test, y_pred_dt))
print("Recall: ", recall_score(y_test, y_pred_dt))
print("Brier Score: ", brier_score_loss(y_test, y_pred_dt))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_dt, beta=0.5))
recall_dt, precision_dt, th_dt = precision_recall_curve(y_test, y_pred_dt)
print("Area under Precision Recall Curve: ", auc(recall_dt, precision_dt))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred_dt))

R²:  -5.953260903269285
Precison:  0.024983027834351662
Recall:  0.15619694397283532
Brier Score:  0.01350621285791464
F-0.5 Score:  0.030028069717344473
Area under Precision Recall Curve:  0.08946488119946629
confusion matrix:
 [[596918   7181]
 [   994    184]]


In [None]:
# Random Forest

rf = RandomForestClassifier(n_estimators=10, criterion="entropy")
rf.fit(X_train_smote, y_train_smote)

y_pred_rf = rf.predict(X_test)

print("R²: ", r2_score(y_test, y_pred_rf))
print("Precison: ", precision_score(y_test, y_pred_rf))
print("Recall: ", recall_score(y_test, y_pred_rf))
print("Brier Score: ", brier_score_loss(y_test, y_pred_rf))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_rf, beta=0.5))
recall_rf, precision_rf, th_rf = precision_recall_curve(y_test, y_pred_rf)
print("Area under Precision Recall Curve: ", auc(recall_rf, precision_rf))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred_rf))

R²:  -2.9856856994152747
Precison:  0.025175961017866812
Recall:  0.07894736842105263
Brier Score:  0.007741909902408319
F-0.5 Score:  0.02914629559984957
Area under Precision Recall Curve:  0.05101173220922061
confusion matrix:
 [[600498   3601]
 [  1085     93]]


In [None]:
# kNN

knn = KNeighborsClassifier(n_neighbors=11)

knn.fit(X_train_smote, y_train_smote)

y_pred_knn = knn.predict(X_test_standard)

print("R²: ", r2_score(y_test, y_pred_knn))
print("Precison: ", precision_score(y_test, y_pred_knn, average="macro"))
print("Recall: ", recall_score(y_test, y_pred_knn, average="macro"))
print("Brier Score: ", brier_score_loss(y_test, y_pred_knn))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_knn, beta=0.5))
recall_knn, precision_knn, th_knn = precision_recall_curve(y_test, y_pred_knn)
print("Area under Precision Recall Curve: ", auc(recall_knn, precision_knn))
print("confusion_matrix: \n", confusion_matrix(y_test, y_pred_knn))

R²:  -7.547194962318416
Precison:  0.5376349519349483
Recall:  0.8290123679145666
Brier Score:  0.016602315964426205
F-0.5 Score:  0.09229553168735759
Area under Precision Recall Curve:  0.37334061807216173
confusion_matrix: 
 [[594434   9665]
 [   384    794]]


In [None]:
#svm

svm = SVC(verbose=True)

svm.fit(X_train_smote, y_train_smote)

y_pred_svm = svm.predict(X_test_standard)

print("R²: ", r2_score(y_test, y_pred_svm))
print("Precison: ", precision_score(y_test, y_pred_svm))
print("Recall: ", recall_score(y_test, y_pred_svm))
print("Brier Score: ", brier_score_loss(y_test, y_pred_svm))
print("F-0.5 Score: ", fbeta_score(y_test, y_pred_svm, beta=0.5))
recall_svm, precision_svm, th_svm = precision_recall_curve(y_test, y_pred_svm)
print("Area under Precision Recall Curve: ", auc(recall_svm, precision_svm))
print("confusion matrix:\n", confusion_matrix(y_test, y_pred_svm))


[LibSVM]

## Validation

In [7]:
# Juntar os dataset de treino e teste para treinar o modelo final
X_train_final = pd.concat([X_train, X_test])
y_train_final = pd.concat([y_train, y_test])

# grid search para o random forest
random_forest_final = RandomForestClassifier()

from sklearn.model_selection import GridSearchCV

param_grid = {"n_estimators": [10, 50, 100, 200, 500],
                "criterion": ["gini", "entropy"]}
grid_search = GridSearchCV(random_forest_final, param_grid, cv=5, verbose=2, n_jobs=-1)
grid_search.fit(X_train_final, y_train_final)

param_grid = grid_search.best_params_ 

# treinar o modelo final com os melhores parâmetros

random_forest_final = RandomForestClassifier(n_estimators=param_grid["n_estimators"],
                                            criterion=param_grid["criterion"])

random_forest_final.fit(X_train_final, y_train_final)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [8]:
# salvar o modelo final

import pickle

pickle.dump(random_forest_final, open("random_forest_final.sav", "wb"))

# carregar o modelo final

random_forest_final = pickle.load(open("random_forest_final.sav", "rb"))

# testar o modelo final

y_pred_rf_final = random_forest_final.predict(X_val)

print("R²: ", r2_score(y_val, y_pred_rf_final))
print("Precison: ", precision_score(y_val, y_pred_rf_final))
print("Recall: ", recall_score(y_val, y_pred_rf_final))
print("Brier Score: ", brier_score_loss(y_val, y_pred_rf_final))
print("F-0.5 Score: ", fbeta_score(y_val, y_pred_rf_final, beta=0.5))
recall_rf_final, precision_rf_final, th_rf_final = precision_recall_curve(y_val, y_pred_rf_final)
print("Area under Precision Recall Curve: ", auc(recall_rf_final, precision_rf_final))
print("confusion matrix:\n", confusion_matrix(y_val, y_pred_rf_final))


R²:  0.24233480354293757
Precison:  0.7777777777777778
Recall:  0.3413173652694611
Brier Score:  0.0014604883383971307
F-0.5 Score:  0.6193728655696988
Area under Precision Recall Curve:  0.5582522967981631
confusion matrix:
 [[603994    114]
 [   770    399]]
