In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.metrics import f1_score

# Revisão de Métodos de Classificação

In [None]:
tb_hotel_train = pd.read_csv('data/tb_hotel_train_clean.csv')
tb_hotel_test = pd.read_csv('data/tb_hotel_test_clean.csv')

X_train = tb_hotel_train[['lead_time', 'adr']]
y_train = tb_hotel_train['is_cancelled']

X_test = tb_hotel_test[['lead_time', 'adr']]
y_test = tb_hotel_test['is_cancelled']

## kNN - Neareast Neighbors

**Hiperparâmetros**

1. `n_neighbors` - quanto maior, menor a complexidade
1. `weights = "distance"`

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn_fit = KNeighborsClassifier(n_neighbors= 5)
knn_fit.fit(X_train, y_train)

In [None]:
knn_pred = knn_fit.predict(X_test)
knn_f1 = f1_score(y_test, knn_pred)
print(f"F1 kNN: {knn_f1}")

## Árvores de Decisão

**Hiperparâmetros**

1. `max_depth` - quanto maior, maior a complexidade
1. `min_samples_split` - quanto maior, menor a complexidade
1. `min_samples_leaf` - quanto maior, menor a complexidade

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dt_fit = DecisionTreeClassifier(max_depth = 50, min_samples_split=5000)
dt_fit.fit(X_train, y_train)

In [None]:
dt_pred = dt_fit.predict(X_test)
dt_f1 = f1_score(y_test, dt_pred)
print(f"F1 DT: {dt_f1}")

## Redes Neurais

**Hiperparâmetros**

1. `hidden_layer_size` - quanto maior, maior a complexidade

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
nn_fit = MLPClassifier(hidden_layer_sizes=(10, 10), activation = 'relu')
nn_fit.fit(X_train, y_train)

In [None]:
nn_pred = nn_fit.predict(X_test)
nn_f1 = f1_score(y_test, nn_pred)
print(f"F1 NN: {nn_f1}")

## Floresta Aleatória

**Hiperparâmetros**

1. `n_estimators` - quanto maior, maior a complexidade
1. `max_depth` - quanto maior, maior a complexidade

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf_fit = RandomForestClassifier(n_estimators=100, max_depth=15)
rf_fit.fit(X_train, y_train)

In [None]:
rf_pred = rf_fit.predict(X_test)
rf_f1 = f1_score(y_test, rf_pred)
print(f"F1 RF: {rf_f1}")

## Boosting

**Hiperparâmetros**

1. `iterations` - quanto maior, maior a complexidade
1. `depth` - quanto maior, maior a complexidade

In [None]:
from catboost import CatBoostClassifier


In [None]:
cat_fit = CatBoostClassifier(iterations = 1000, depth = 8)
cat_fit.fit(X_train, y_train)

In [None]:
cat_pred = cat_fit.predict(X_test)
cat_f1 = f1_score(y_test, cat_pred)
print(f"F1 RF: {cat_f1}")