In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.metrics import f1_score

# Revisão de Métodos de Classificação

In [2]:
tb_hotel_train = pd.read_csv('data/tb_hotel_train_clean.csv')
tb_hotel_test = pd.read_csv('data/tb_hotel_test_clean.csv')

X_train = tb_hotel_train[['lead_time', 'adr']]
y_train = tb_hotel_train['is_cancelled']

X_test = tb_hotel_test[['lead_time', 'adr']]
y_test = tb_hotel_test['is_cancelled']

## kNN - Neareast Neighbors

**Hiperparâmetros**

1. `n_neighbors` - quanto maior, menor a complexidade
1. `weights = "distance"`

In [3]:
from sklearn.neighbors import KNeighborsClassifier

In [4]:
knn_fit = KNeighborsClassifier(n_neighbors= 5)
knn_fit.fit(X_train, y_train)

In [5]:
knn_pred = knn_fit.predict(X_test)
knn_f1 = f1_score(y_test, knn_pred)
print(f"F1 kNN: {knn_f1}")

F1 kNN: 0.6347706241386101


## Árvores de Decisão

**Hiperparâmetros**

1. `max_depth` - quanto maior, maior a complexidade
1. `min_samples_split` - quanto maior, menor a complexidade
1. `min_samples_leaf` - quanto maior, menor a complexidade

In [6]:
from sklearn.tree import DecisionTreeClassifier

In [7]:
dt_fit = DecisionTreeClassifier(max_depth = 50, min_samples_split=5000)
dt_fit.fit(X_train, y_train)

In [8]:
dt_pred = dt_fit.predict(X_test)
dt_f1 = f1_score(y_test, dt_pred)
print(f"F1 DT: {dt_f1}")

F1 DT: 0.4629786274178086


## Redes Neurais

**Hiperparâmetros**

1. `hidden_layer_size` - quanto maior, maior a complexidade

In [9]:
from sklearn.neural_network import MLPClassifier

In [66]:
nn_fit = MLPClassifier(hidden_layer_sizes=(2, 4, 2), activation = 'relu', solver = "adam", max_iter=50)
nn_fit.fit(X_train, y_train)

In [67]:
nn_pred = nn_fit.predict(X_test)
nn_f1 = f1_score(y_test, nn_pred)
print(f"F1 NN: {nn_f1}")

F1 NN: 0.4503064798598949


## Floresta Aleatória

**Hiperparâmetros**

1. `n_estimators` - quanto maior, maior a complexidade
1. `max_depth` - quanto maior, maior a complexidade

In [12]:
from sklearn.ensemble import RandomForestClassifier

In [13]:
rf_fit = RandomForestClassifier(n_estimators=100, max_depth=15)
rf_fit.fit(X_train, y_train)

In [14]:
rf_pred = rf_fit.predict(X_test)
rf_f1 = f1_score(y_test, rf_pred)
print(f"F1 RF: {rf_f1}")

F1 RF: 0.5890900703494407


## Boosting

**Hiperparâmetros**

1. `iterations` - quanto maior, maior a complexidade
1. `depth` - quanto maior, maior a complexidade

In [15]:
from catboost import CatBoostClassifier


In [16]:
cat_fit = CatBoostClassifier(iterations = 1000, depth = 8)
cat_fit.fit(X_train, y_train)

Learning rate set to 0.068297
0:	learn: 0.6776406	total: 68.8ms	remaining: 1m 8s
1:	learn: 0.6644654	total: 74.8ms	remaining: 37.3s
2:	learn: 0.6530214	total: 81ms	remaining: 26.9s
3:	learn: 0.6436734	total: 88.1ms	remaining: 21.9s
4:	learn: 0.6345437	total: 95.6ms	remaining: 19s
5:	learn: 0.6265774	total: 103ms	remaining: 17.1s
6:	learn: 0.6205719	total: 110ms	remaining: 15.6s
7:	learn: 0.6156647	total: 117ms	remaining: 14.5s
8:	learn: 0.6110568	total: 124ms	remaining: 13.7s
9:	learn: 0.6075006	total: 131ms	remaining: 13s
10:	learn: 0.6042538	total: 138ms	remaining: 12.4s
11:	learn: 0.6013234	total: 145ms	remaining: 12s
12:	learn: 0.5988000	total: 152ms	remaining: 11.5s
13:	learn: 0.5961242	total: 159ms	remaining: 11.2s
14:	learn: 0.5938815	total: 166ms	remaining: 10.9s
15:	learn: 0.5921346	total: 172ms	remaining: 10.6s
16:	learn: 0.5906867	total: 180ms	remaining: 10.4s
17:	learn: 0.5892230	total: 187ms	remaining: 10.2s
18:	learn: 0.5877851	total: 194ms	remaining: 9.99s
19:	learn: 0.5

169:	learn: 0.5446290	total: 1.33s	remaining: 6.48s
170:	learn: 0.5443903	total: 1.33s	remaining: 6.47s
171:	learn: 0.5441054	total: 1.34s	remaining: 6.46s
172:	learn: 0.5439841	total: 1.35s	remaining: 6.46s
173:	learn: 0.5438139	total: 1.36s	remaining: 6.45s
174:	learn: 0.5436053	total: 1.36s	remaining: 6.43s
175:	learn: 0.5435312	total: 1.37s	remaining: 6.43s
176:	learn: 0.5433562	total: 1.38s	remaining: 6.42s
177:	learn: 0.5432354	total: 1.39s	remaining: 6.41s
178:	learn: 0.5430514	total: 1.4s	remaining: 6.4s
179:	learn: 0.5429851	total: 1.4s	remaining: 6.39s
180:	learn: 0.5428633	total: 1.41s	remaining: 6.38s
181:	learn: 0.5427161	total: 1.42s	remaining: 6.37s
182:	learn: 0.5425678	total: 1.43s	remaining: 6.36s
183:	learn: 0.5423935	total: 1.43s	remaining: 6.35s
184:	learn: 0.5422290	total: 1.44s	remaining: 6.35s
185:	learn: 0.5420575	total: 1.45s	remaining: 6.34s
186:	learn: 0.5419598	total: 1.46s	remaining: 6.33s
187:	learn: 0.5418528	total: 1.46s	remaining: 6.32s
188:	learn: 0.5

336:	learn: 0.5219971	total: 2.64s	remaining: 5.19s
337:	learn: 0.5218995	total: 2.65s	remaining: 5.19s
338:	learn: 0.5218081	total: 2.66s	remaining: 5.18s
339:	learn: 0.5215458	total: 2.66s	remaining: 5.17s
340:	learn: 0.5214217	total: 2.67s	remaining: 5.16s
341:	learn: 0.5212910	total: 2.68s	remaining: 5.15s
342:	learn: 0.5211978	total: 2.69s	remaining: 5.14s
343:	learn: 0.5211273	total: 2.69s	remaining: 5.14s
344:	learn: 0.5210424	total: 2.7s	remaining: 5.13s
345:	learn: 0.5209192	total: 2.71s	remaining: 5.12s
346:	learn: 0.5208693	total: 2.72s	remaining: 5.12s
347:	learn: 0.5207078	total: 2.73s	remaining: 5.11s
348:	learn: 0.5206513	total: 2.73s	remaining: 5.1s
349:	learn: 0.5206106	total: 2.74s	remaining: 5.09s
350:	learn: 0.5205529	total: 2.75s	remaining: 5.09s
351:	learn: 0.5203522	total: 2.76s	remaining: 5.08s
352:	learn: 0.5202824	total: 2.77s	remaining: 5.07s
353:	learn: 0.5201768	total: 2.77s	remaining: 5.06s
354:	learn: 0.5200168	total: 2.78s	remaining: 5.06s
355:	learn: 0.

501:	learn: 0.5073357	total: 3.96s	remaining: 3.92s
502:	learn: 0.5072737	total: 3.96s	remaining: 3.92s
503:	learn: 0.5071666	total: 3.97s	remaining: 3.91s
504:	learn: 0.5070072	total: 3.98s	remaining: 3.9s
505:	learn: 0.5069042	total: 3.99s	remaining: 3.89s
506:	learn: 0.5068233	total: 4s	remaining: 3.88s
507:	learn: 0.5067609	total: 4s	remaining: 3.88s
508:	learn: 0.5067004	total: 4.01s	remaining: 3.87s
509:	learn: 0.5065961	total: 4.02s	remaining: 3.86s
510:	learn: 0.5065387	total: 4.03s	remaining: 3.85s
511:	learn: 0.5064254	total: 4.04s	remaining: 3.85s
512:	learn: 0.5063845	total: 4.04s	remaining: 3.84s
513:	learn: 0.5062819	total: 4.05s	remaining: 3.83s
514:	learn: 0.5062195	total: 4.06s	remaining: 3.82s
515:	learn: 0.5060788	total: 4.06s	remaining: 3.81s
516:	learn: 0.5059919	total: 4.07s	remaining: 3.8s
517:	learn: 0.5059446	total: 4.08s	remaining: 3.8s
518:	learn: 0.5058938	total: 4.09s	remaining: 3.79s
519:	learn: 0.5058434	total: 4.09s	remaining: 3.78s
520:	learn: 0.5057437

666:	learn: 0.4958741	total: 5.26s	remaining: 2.63s
667:	learn: 0.4958431	total: 5.27s	remaining: 2.62s
668:	learn: 0.4958047	total: 5.28s	remaining: 2.61s
669:	learn: 0.4957786	total: 5.29s	remaining: 2.6s
670:	learn: 0.4957369	total: 5.29s	remaining: 2.6s
671:	learn: 0.4956988	total: 5.3s	remaining: 2.59s
672:	learn: 0.4956589	total: 5.31s	remaining: 2.58s
673:	learn: 0.4955914	total: 5.32s	remaining: 2.57s
674:	learn: 0.4955564	total: 5.33s	remaining: 2.56s
675:	learn: 0.4954997	total: 5.34s	remaining: 2.56s
676:	learn: 0.4954201	total: 5.34s	remaining: 2.55s
677:	learn: 0.4953551	total: 5.35s	remaining: 2.54s
678:	learn: 0.4953094	total: 5.36s	remaining: 2.53s
679:	learn: 0.4952416	total: 5.37s	remaining: 2.52s
680:	learn: 0.4952118	total: 5.38s	remaining: 2.52s
681:	learn: 0.4951799	total: 5.38s	remaining: 2.51s
682:	learn: 0.4951336	total: 5.39s	remaining: 2.5s
683:	learn: 0.4950599	total: 5.4s	remaining: 2.5s
684:	learn: 0.4950008	total: 5.41s	remaining: 2.49s
685:	learn: 0.4949

831:	learn: 0.4875213	total: 6.59s	remaining: 1.33s
832:	learn: 0.4874696	total: 6.6s	remaining: 1.32s
833:	learn: 0.4874213	total: 6.61s	remaining: 1.31s
834:	learn: 0.4873977	total: 6.61s	remaining: 1.31s
835:	learn: 0.4873354	total: 6.62s	remaining: 1.3s
836:	learn: 0.4872843	total: 6.63s	remaining: 1.29s
837:	learn: 0.4872361	total: 6.64s	remaining: 1.28s
838:	learn: 0.4871878	total: 6.64s	remaining: 1.27s
839:	learn: 0.4871754	total: 6.65s	remaining: 1.27s
840:	learn: 0.4871606	total: 6.66s	remaining: 1.26s
841:	learn: 0.4870940	total: 6.67s	remaining: 1.25s
842:	learn: 0.4870658	total: 6.68s	remaining: 1.24s
843:	learn: 0.4870469	total: 6.68s	remaining: 1.24s
844:	learn: 0.4869944	total: 6.69s	remaining: 1.23s
845:	learn: 0.4869532	total: 6.7s	remaining: 1.22s
846:	learn: 0.4869285	total: 6.71s	remaining: 1.21s
847:	learn: 0.4868624	total: 6.71s	remaining: 1.2s
848:	learn: 0.4868178	total: 6.72s	remaining: 1.2s
849:	learn: 0.4867741	total: 6.73s	remaining: 1.19s
850:	learn: 0.486

992:	learn: 0.4804982	total: 7.88s	remaining: 55.5ms
993:	learn: 0.4804511	total: 7.88s	remaining: 47.6ms
994:	learn: 0.4803938	total: 7.89s	remaining: 39.7ms
995:	learn: 0.4803398	total: 7.9s	remaining: 31.7ms
996:	learn: 0.4802911	total: 7.91s	remaining: 23.8ms
997:	learn: 0.4802740	total: 7.92s	remaining: 15.9ms
998:	learn: 0.4802460	total: 7.92s	remaining: 7.93ms
999:	learn: 0.4802083	total: 7.93s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x144e00700>

In [17]:
cat_pred = cat_fit.predict(X_test)
cat_f1 = f1_score(y_test, cat_pred)
print(f"F1 RF: {cat_f1}")

F1 RF: 0.5643992519869099
