In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.metrics import f1_score

# Revisão de Métodos de Classificação

In [2]:
tb_hotel_train = pd.read_csv('data/tb_hotel_train_clean.csv')
tb_hotel_test = pd.read_csv('data/tb_hotel_test_clean.csv')

X_train = tb_hotel_train[['lead_time', 'adr']]
y_train = tb_hotel_train['is_cancelled']

X_test = tb_hotel_test[['lead_time', 'adr']]
y_test = tb_hotel_test['is_cancelled']

## kNN - Neareast Neighbors

**Hiperparâmetros**

1. `n_neighbors` - quanto maior, menor a complexidade
1. `weights = "distance"`

In [3]:
from sklearn.neighbors import KNeighborsClassifier

In [5]:
knn_fit = KNeighborsClassifier(n_neighbors= 5)
knn_fit.fit(X_train, y_train)

In [14]:
knn_pred = knn_fit.predict(X_test)


In [15]:
knn_pred

array([0, 0, 1, ..., 0, 0, 0])

In [16]:
knn_f1 = f1_score(y_test, knn_pred)
print(f"F1 kNN: {knn_f1}")

F1 kNN: 0.6347706241386101


## Árvores de Decisão

**Hiperparâmetros**

1. `max_depth` - quanto maior, maior a complexidade
1. `min_samples_split` - quanto maior, menor a complexidade
1. `min_samples_leaf` - quanto maior, menor a complexidade

In [18]:
from sklearn.tree import DecisionTreeClassifier

In [25]:
dt_fit = DecisionTreeClassifier(max_depth = 50, min_samples_split=10)
dt_fit.fit(X_train, y_train)

In [26]:
dt_pred = dt_fit.predict(X_test)
dt_f1 = f1_score(y_test, dt_pred)
print(f"F1 DT: {dt_f1}")

F1 DT: 0.6433213696769813


## Redes Neurais

**Hiperparâmetros**

1. `hidden_layer_size` - quanto maior, maior a complexidade

In [27]:
from sklearn.neural_network import MLPClassifier

In [28]:
nn_fit = MLPClassifier(hidden_layer_sizes=(10, 10, 2), activation = 'relu')
nn_fit.fit(X_train, y_train)

In [29]:
nn_pred = nn_fit.predict(X_test)
nn_f1 = f1_score(y_test, nn_pred)
print(f"F1 NN: {nn_f1}")

F1 NN: 0.39213490137514784


## Floresta Aleatória

**Hiperparâmetros**

1. `n_estimators` - quanto maior, maior a complexidade
1. `max_depth` - quanto maior, maior a complexidade

In [30]:
from sklearn.ensemble import RandomForestClassifier

In [31]:
rf_fit = RandomForestClassifier(n_estimators=100, max_depth=15)
rf_fit.fit(X_train, y_train)

In [32]:
rf_pred = rf_fit.predict(X_test)

rf_f1 = f1_score(y_test, rf_pred)
print(f"F1 RF: {rf_f1}")

F1 RF: 0.5905643994211288


## Boosting

**Hiperparâmetros**

1. `iterations` - quanto maior, maior a complexidade
1. `depth` - quanto maior, maior a complexidade

In [33]:
from catboost import CatBoostClassifier


In [35]:
cat_fit = CatBoostClassifier(iterations = 1000, depth = 8, verbose = False)
cat_fit.fit(X_train, y_train)

<catboost.core.CatBoostClassifier at 0x7fd0fa644400>

In [36]:
cat_pred = cat_fit.predict(X_test)
cat_f1 = f1_score(y_test, cat_pred)
print(f"F1 Cat: {cat_f1}")

F1 RF: 0.5684516880093131
