## Dicionário de Dados

| Variável | Definição                                      | Chave                                          |
|----------|------------------------------------------------|------------------------------------------------|
| survival | Sobrevivência                                  | 0 = Não, 1 = Sim                               |
| pclass   | Classe do Bilhete                              | 1 = 1ª, 2 = 2ª, 3 = 3ª                         |
| sex      | Sexo                                           |                                                |
| Age      | Idade em anos                                  |                                                |
| sibsp    | Número de irmãos / cônjuges a bordo do Titanic |                                                |
| parch    | Número de pais / filhos a bordo do Titanic     |                                                |
| ticket   | Número do Bilhete                              |                                                |
| fare     | Tarifa do Passageiro                           |                                                |
| cabin    | Número da Cabine                               |                                                |
| embarked | Porto de Embarque                              | C = Cherbourg, Q = Queenstown, S = Southampton |

pclass: Classes economicas: 
    * 1st = Upper,
    * 2nd = Middle,
    * 3rd = Lower
age: Idade estimada
sibsp: Irmãos e cônjuges a bordo.
    * Sibling = Irmão, irmã, meio-irmão, meia-irmã;;;
    * Spouse = Marido, esposa (amantes e noivos foram ignorados)
parch: Pais e filhos a bordo.
    * Parent = Mãe, pai
    * Child = Filha, filho, enteada, enteado
    * Algumas crianças viajaram apenas com uma babá, portanto parch = 0 para elas.

In [None]:
# Parte do código teve IA como copiloto, principalmente na questão de autocompletar códigos e correção de erros.

In [None]:
# Resultados das analises. 
# Os resultados abaixo mostram como sempre devemos nos basear em outras métricas alem da acuracia.

from IPython.display import Image

Image(filename='./output/resultado.png')

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

sns.set_style('whitegrid')

# Machine learning
from sklearn import model_selection, metrics
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier
from catboost import cv

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, cross_val_predict
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from catboost import CatBoostClassifier

In [None]:
# Verificando os arquivos no diretório
import os

for dirname, _, filenames in os.walk('input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Testando a leitura do arquivo
train_data = pd.read_csv('input/train.csv')
train_data.describe()

In [None]:
# Testando a leitura do arquivo
test_data = pd.read_csv('input/test.csv')
test_data.describe()

In [None]:
train_data.info()

In [None]:
test_data.info()

In [None]:
train_data.isnull().sum()

In [None]:
test_data.isnull().sum()

In [None]:
# Salvar PassengerId para auxiliar na criação do arquivo que será enviado ao Kaggle
passengerId = test_data['PassengerId']
passengerId

In [None]:
# Criando um novo dataset com os dados de treino e teste
titanic_df = pd.concat([train_data, test_data], sort=False)

In [None]:
# Para auxiliar no momento separar o DF titanic_df em train e test
train_index = len(train_data)
test_index = len(titanic_df) - len(test_data)

In [None]:
# Verificando o novo dataset
titanic_df.info()

In [None]:
# Verificando os dados faltantes
titanic_df.isnull().sum()

In [None]:
titanic_df.info()

In [None]:
# Verificando a distribuição dos dados
titanic_df.describe()

In [None]:
# Verificando a distribuição dos dados
train_data.describe()

In [None]:
# Criando o df onde iremos processar os modelos de ML

df = pd.DataFrame()

In [None]:
# Sobreviventes
titanic_df['Survived'].nunique()

In [None]:
titanic_df['Survived'].unique()

In [None]:
titanic_df['Survived'].isnull().sum()

In [None]:
titanic_df['Survived'].value_counts()

In [None]:
sns.countplot(data=titanic_df, x='Survived')

In [None]:
# Criando uma função para ajudar no momento de visualizar as informações de cada coluna

def titanic_func(data, column, count=True):
    print(f'Quantidade de valores únicos: {data[column].nunique()}')
    print(f'\nQuais são os valores únicos: {data[column].unique()}')
    print(f'\nQuantidade de valores nulos: {data[column].isnull().sum()}')
    print(f'\nQuantidade por opção: \n{data[column].value_counts()}')

    if count:
        sns.countplot(data=data, x=column, hue='Survived')
    else:
        sns.displot(data[column], kde=True)


titanic_func(titanic_df, 'Survived')

In [None]:
df['Survived'] = titanic_df['Survived']

In [None]:
df

In [None]:
### Pclass

In [None]:
titanic_func(titanic_df, 'Pclass')

In [None]:
df['Pclass'] = titanic_df['Pclass']
df.head()

In [None]:
### Sex

In [None]:
titanic_df['Sex'].unique()

In [None]:
titanic_df['Sex'] = titanic_df['Sex'].replace(['female', 'male'], [1, 0])

In [None]:
titanic_func(titanic_df, 'Sex')

In [None]:
df['Sex'] = titanic_df['Sex']
df.head()

In [None]:
### Age

In [None]:
titanic_func(titanic_df, 'Age', count=False)

In [None]:
# Cria uma cópia do dataframe original, mantendo apenas as colunas numéricas
numeric_df = titanic_df.select_dtypes(include=[np.number])
correlation_matrix = numeric_df.corr()

In [None]:
titanic_df[titanic_df['Pclass'] == 1]['Age'].mean()

In [None]:
titanic_df[titanic_df['Pclass'] == 2]['Age'].mean()

In [None]:
titanic_df[titanic_df['Pclass'] == 3]['Age'].mean()

In [None]:
for i in sorted(numeric_df['Pclass'].unique()):
    print(f'Pessoas da {i}º classe tem em média {round(numeric_df[numeric_df["Pclass"] == i]["Age"].mean(), 2)} anos')

In [None]:
titanic_df[titanic_df['Pclass'] == 1]['Age'].isnull().sum()

In [None]:
titanic_df['Age'].isnull().sum()

In [None]:
# Preenchendo os valores nulos com a média de idade de cada classe
titanic_df['Age'] = titanic_df['Age'].fillna(titanic_df.groupby('Pclass')['Age'].transform('mean'))


In [None]:
titanic_df['Age'].isnull().sum()

In [None]:
titanic_func(titanic_df, 'Age', count=False)

In [None]:
df['Age'] = titanic_df['Age']

In [None]:
df.head()

In [None]:
### SibSp

In [None]:
titanic_func(titanic_df, 'SibSp')

In [None]:
df['SibSp'] = titanic_df['SibSp']
df.head()

In [None]:
### Parch

In [None]:
titanic_func(titanic_df, 'Parch')

In [None]:
df['Parch'] = titanic_df['Parch']

In [None]:
df.head()

In [None]:
### FamilySize 

In [None]:
titanic_df['FamilySize'] = titanic_df['SibSp'] + titanic_df['Parch'] + 1

In [None]:
df['FamilySize'] = titanic_df['FamilySize']
df.head()

In [None]:
titanic_func(titanic_df, 'FamilySize')

In [None]:
### Fare

In [None]:
titanic_func(titanic_df, 'Fare', count=False)

In [None]:
titanic_df['Fare'].isnull().sum()

In [None]:
# Preenchendo os valores nulos com a média de tarifa de cada classe
titanic_df['Fare'] = titanic_df['Fare'].fillna(titanic_df.groupby('Pclass')['Fare'].transform('mean'))

In [None]:
titanic_df['Fare'].isnull().sum()

In [None]:
titanic_func(titanic_df, 'Fare', count=False)

In [None]:
# media da tarifa por classe
titanic_df.groupby('Pclass')['Fare'].mean()

In [None]:
titanic_df.isnull().sum()

In [None]:
df['Fare'] = titanic_df['Fare']
df.head()

In [None]:
### Cabin

In [None]:
titanic_df['Cabin'].isnull().sum()

In [None]:
titanic_df['Cabin'].nunique()

In [None]:
titanic_df['Cabin'].unique()

In [None]:
titanic_df['Cabin'].value_counts()

In [None]:
titanic_df['Cabin'].fillna('N', inplace=True)

In [None]:
titanic_df['Cabin'].isnull().sum()

In [None]:
titanic_df['Cabin'].nunique()

In [None]:
titanic_df['Cabin'].unique()

In [None]:
titanic_df['Cabin'].value_counts()

In [None]:
titanic_df['Cabin'] = titanic_df['Cabin'].apply(lambda x: x[0])

In [None]:
titanic_df['Cabin'].unique()

In [None]:
titanic_df['Cabin'].value_counts()

In [None]:
titanic_func(titanic_df, 'Cabin')

In [None]:
df['Cabin'] = titanic_df['Cabin']

In [None]:
df.head()

In [None]:
### Embarked

In [None]:
print(titanic_df[titanic_df.index.duplicated()])

In [None]:
titanic_df = titanic_df.reset_index(drop=True)

In [None]:
titanic_func(titanic_df, 'Embarked')

In [None]:
titanic_df[titanic_df['Embarked'] == 'S']['Survived'].mean()

In [None]:
titanic_df[titanic_df['Embarked'] == "S"]['Pclass'].mean()

In [None]:
titanic_df[titanic_df['Embarked'] == 'C']['Survived'].mean()

In [None]:
titanic_df[titanic_df['Embarked'] == "C"]['Pclass'].mean()

In [None]:
titanic_df[titanic_df['Embarked'] == 'Q']['Survived'].mean()

In [None]:
titanic_df[titanic_df['Embarked'] == "Q"]['Pclass'].mean()

In [None]:
titanic_df['Embarked'].fillna('S', inplace=True)

In [None]:
titanic_df['Embarked'].isnull().sum()

In [None]:
titanic_func(titanic_df, 'Embarked')

In [None]:
df['Embarked'] = titanic_df['Embarked']

In [None]:
### Nome

In [None]:
titanic_df.head()

In [None]:
titanic_df['Name']

In [None]:
titanic_df['Title'] = titanic_df['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)

In [None]:
titanic_df['Title'].unique()

In [None]:
titanic_df['Title'].value_counts()

In [None]:
titanic_df['Title'].isnull().sum()

In [None]:
titanic_df['Title'] = titanic_df['Title'].replace(
    ['Lady', 'Countess', 'Capt', 'Col', 'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')

In [None]:
titanic_df['Title'] = titanic_df['Title'].replace('Mlle', 'Miss')

In [None]:
titanic_df['Title'] = titanic_df['Title'].replace('Ms', 'Miss')

In [None]:
titanic_df['Title'] = titanic_df['Title'].replace('Mme', 'Mrs')

In [None]:
titanic_df['Title'].unique()

In [None]:
titanic_df['Title'].value_counts()

In [None]:
titanic_func(titanic_df, 'Title')

In [None]:
df['Title'] = titanic_df['Title']

In [None]:
df.head()

In [None]:
titanic_df.isnull().sum()

In [None]:
df.isnull().sum()

In [None]:
# Verificando a correlação entre as variáveis numeŕicas

In [None]:
numeric_df = df.select_dtypes(include=[np.number])
correlation_matrix = numeric_df.corr()

In [None]:
numeric_df.corr()

In [None]:
# Verificando a correlação entre as variáveis categóricas

In [None]:
categoric_df = df.select_dtypes(exclude=[np.number])

In [None]:
categoric_df.head()

In [None]:
categoric_df = pd.get_dummies(categoric_df)

In [None]:
categoric_df.head()

In [None]:
categoric_df.corr()

In [None]:
# Verificando a correlação entre as variáveis numeŕicas e categóricas

In [None]:
df = pd.concat([numeric_df, categoric_df], axis=1)

In [None]:
df.head()

In [None]:
df.corr()

In [None]:
train = df[:train_index].copy()
test = df[test_index:].copy()

train['Survived'] = train['Survived'].astype(int)

In [None]:
train.head()

In [None]:
test.head()

In [None]:
X = train.drop('Survived', axis=1)
y = train['Survived']

In [None]:
X_test = test.drop('Survived', axis=1)

In [None]:
### Testando varios modelos

In [None]:
def func_acuracia(algoritmo, X_train, y_train, vc):
    modelo = algoritmo.fit(X_train, y_train)
    acuracia = round(modelo.score(X_train, y_train) * 100, 2)

    train_pred = model_selection.cross_val_predict(algoritmo, X_train, y_train, cv=vc, n_jobs=-1)
    acuracia_vc = round(metrics.accuracy_score(y_train, train_pred) * 100, 2)

    return acuracia, acuracia_vc

In [None]:
from sklearn.metrics import recall_score

from sklearn.metrics import f1_score


def func_acuracia_GS(algoritmo, params, X_train, y_train, cv):
    # Instanciando a GridSearchCV
    grid = GridSearchCV(algoritmo, params, cv=cv, n_jobs=-1, verbose=1)

    # Ajustando a GridSearchCV nos dados de treinamento
    grid.fit(X_train, y_train)

    # Selecionando o melhor estimador
    modelo = grid.best_estimator_

    # Verificando a acurácia nos dados de treinamento
    acuracia = round(modelo.score(X_train, y_train) * 100, 2)

    # Obtendo as previsões via cross-validation para verificar a acuracia
    train_pred = cross_val_predict(modelo, X_train, y_train, cv=cv, n_jobs=-1)

    acuracia_cv = round(accuracy_score(y_train, train_pred) * 100, 2)

    # Calcular Recall
    recall = round(recall_score(y_train, train_pred, average='macro') * 100,
                   2)  # Para labels binários pode remover "average"

    # Calcular F1-score
    f1 = round(f1_score(y_train, train_pred, average='macro') * 100, 2)  # Para labels binários pode remover "average"

    print(f"Melhores parâmetros: {grid.best_params_}")
    print(f"Acurácia após GridSearchCV: {acuracia}")
    print(f"Acurácia de validação cruzada: {acuracia_cv}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")

    return modelo, acuracia, acuracia_cv, recall, f1

In [None]:
### Regressão Logística

In [None]:
acc_log, acc_log_vc = func_acuracia(LogisticRegression(max_iter=1000), X, y, 10)

print(f'Acurácia Regressão Logística: {acc_log}')
print(f'Acurácia Regressão Logística com Validação Cruzada: {acc_log_vc}')

In [None]:
### Regressao Logistica com GridSearchCV

In [None]:
log_params = {'penalty': ['l2', 'elasticnet', 'none'],
              'C': [0.1, 0.5, 1, 5, 10],
              'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
              'max_iter': [1000, 10000, 25000, 50000]}

modelo, acc_log, acc_log_cv, recall_log, f1_log = func_acuracia_GS(LogisticRegression(), log_params, X, y, 10)
print(f'Parâmetros otimizados do Modelo: {modelo}')
print(f'Acurácia Regressão Logística: {acc_log}')
print(f'Acurácia Regressão Logística com Validação Cruzada: {acc_log_cv}')
print(f'Recall Regressão Logística: {recall_log}')
print(f'F1-Score Regressão Logística: {f1_log}')

In [None]:
### KNN

In [None]:
acc_knn, acc_knn_vc = func_acuracia(KNeighborsClassifier(n_neighbors=3), X, y, 10)

print(f'Acurácia KNN: {acc_knn}')
print(f'Acurácia KNN com Validação Cruzada: {acc_knn_vc}')

In [None]:
### KNN com GridSearchCV

In [None]:
knn_params = {'n_neighbors': [3, 5, 10, 20],
              'weights': ['uniform', 'distance'],
              'metric': ['euclidean', 'manhattan', 'minkowski']}

modelo, acc_knn, acc_knn_cv, recall_knn, f1_knn = func_acuracia_GS(KNeighborsClassifier(), knn_params, X, y, 10)
print(f'Parâmetros otimizados do Modelo: {modelo}')
print(f'Acurácia KNN: {acc_knn}')
print(f'Acurácia KNN com Validação Cruzada: {acc_knn_cv}')
print(f'Recall KNN: {recall_knn}')
print(f'F1-Score KNN: {f1_knn}')

In [None]:
### Gaussian Naive Bayes

In [None]:
acc_gaussian, acc_gaussian_vc = func_acuracia(GaussianNB(), X, y, 10)

print(f'Acurácia Gaussian Naive Bayes: {acc_gaussian}')
print(f'Acurácia Gaussian Naive Bayes com Validação Cruzada: {acc_gaussian_vc}')

In [None]:
### Gaussian Naive Bayes com GridSearchCV

In [None]:
gaussian_params = {'var_smoothing': np.logspace(0, -9, num=100)}

modelo, acc_gaussian, acc_gaussian_cv, recall_gaussian, f1_gaussian = func_acuracia_GS(GaussianNB(), gaussian_params,
                                                                                       X, y, 10)
print(f'Parâmetros otimizados do Modelo: {modelo}')
print(f'Acurácia Gaussian Naive Bayes: {acc_gaussian}')
print(f'Acurácia Gaussian Naive Bayes com Validação Cruzada: {acc_gaussian_cv}')
print(f'Recall Gaussian Naive Bayes: {recall_gaussian}')
print(f'F1-Score Gaussian Naive Bayes: {f1_gaussian}')

In [None]:
### Linear SVC

In [None]:
acc_linear_svc, acc_linear_svc_vc = func_acuracia(LinearSVC(max_iter=10000), X, y, 10)

print(f'Acurácia Linear SVC: {acc_linear_svc}')
print(f'Acurácia Linear SVC com Validação Cruzada: {acc_linear_svc_vc}')

In [None]:
### Linear SVC com GridSearchCV

In [None]:
svc_params = {'C': [0.1, 0.5, 1, 5, 10],
              'max_iter': [1000, 10000, 25000, 50000]}

modelo, acc_svc, acc_svc_cv, recall_svc, f1_svc = func_acuracia_GS(LinearSVC(), svc_params, X, y, 10)
print(f'Parâmetros otimizados do Modelo: {modelo}')
print(f'Acurácia Linear SVC: {acc_svc}')
print(f'Acurácia Linear SVC com Validação Cruzada: {acc_svc_cv}')
print(f'Recall Linear SVC: {recall_svc}')
print(f'F1-Score Linear SVC: {f1_svc}')

In [None]:
### Stochastic Gradient Descent

In [None]:
acc_sgd, acc_sgd_vc = func_acuracia(SGDClassifier(max_iter=1000), X, y, 10)

print(f'Acurácia Stochastic Gradient Descent: {acc_sgd}')
print(f'Acurácia Stochastic Gradient Descent com Validação Cruzada: {acc_sgd_vc}')

In [None]:
### Stochastic Gradient Descent com GridSearchCV

In [None]:
sgd_params = {
    'loss': ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'],
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['none', 'l1', 'l2', 'elasticnet']
}

modelo, acc_sgd, acc_sgd_cv, recall_sgd, f1_sgd = func_acuracia_GS(SGDClassifier(), sgd_params, X, y, 10)
print(f'Parâmetros otimizados do Modelo: {modelo}')
print(f'Acurácia Stochastic Gradient Descent: {acc_sgd}')
print(f'Acurácia Stochastic Gradient Descent com Validação Cruzada: {acc_sgd_cv}')
print(f'Recall Stochastic Gradient Descent: {recall_sgd}')
print(f'F1-Score Stochastic Gradient Descent: {f1_sgd}')

In [None]:
### Decision Tree

In [None]:
acc_decision_tree, acc_decision_tree_vc = func_acuracia(DecisionTreeClassifier(), X, y, 10)

print(f'Acurácia Decision Tree: {acc_decision_tree}')
print(f'Acurácia Decision Tree com Validação Cruzada: {acc_decision_tree_vc}')

In [None]:
### Decision Tree com GridSearchCV

In [None]:
decision_tree_params = {'criterion': ['gini', 'entropy'],
                        'splitter': ['best', 'random'],
                        'max_depth': [None, 5, 10, 15, 20],
                        'min_samples_split': [2, 5, 10]}

modelo, acc_decision_tree, acc_decision_tree_cv, recall_decision_tree, f1_decision_tree = func_acuracia_GS(
    DecisionTreeClassifier(), decision_tree_params, X, y, 10)
print(f'Parâmetros otimizados do Modelo: {modelo}')
print(f'Acurácia Decision Tree: {acc_decision_tree}')
print(f'Acurácia Decision Tree com Validação Cruzada: {acc_decision_tree_cv}')
print(f'Recall Decision Tree: {recall_decision_tree}')
print(f'F1-Score Decision Tree: {f1_decision_tree}')

In [None]:
### Random Forest

In [None]:
acc_random_forest, acc_random_forest_vc = func_acuracia(RandomForestClassifier(n_estimators=100), X, y, 10)

print(f'Acurácia Random Forest: {acc_random_forest}')
print(f'Acurácia Random Forest com Validação Cruzada: {acc_random_forest_vc}')

In [None]:
### Random Forest com GridSearchCV

In [None]:
random_forest_params = {'n_estimators': [50, 100, 200],
                        'criterion': ['gini', 'entropy'],
                        'max_depth': [None, 5, 10, 15, 20],
                        'min_samples_split': [2, 5, 10]}

modelo, acc_random_forest, acc_random_forest_cv, recall_random_forest, f1_random_forest = func_acuracia_GS(
    RandomForestClassifier(), random_forest_params, X, y, 10)
print(f'Parâmetros otimizados do Modelo: {modelo}')
print(f'Acurácia Random Forest: {acc_random_forest}')
print(f'Acurácia Random Forest com Validação Cruzada: {acc_random_forest_cv}')
print(f'Recall Random Forest: {recall_random_forest}')
print(f'F1-Score Random Forest: {f1_random_forest}')

In [None]:
### Gradient Boosting Classifier

In [None]:
acc_gradient_boosting, acc_gradient_boosting_vc = func_acuracia(GradientBoostingClassifier(), X, y, 10)

print(f'Acurácia Gradient Boosting Classifier: {acc_gradient_boosting}')
print(f'Acurácia Gradient Boosting Classifier com Validação Cruzada: {acc_gradient_boosting_vc}')

In [None]:
### Gradient Boosting Classifier com GridSearchCV

In [None]:
gradient_boosting_params = {'loss': ['deviance', 'exponential'],
                            'learning_rate': [0.01, 0.1, 1],
                            'n_estimators': [50, 100, 200],
                            'criterion': ['friedman_mse', 'mse', 'mae'],
                            'max_depth': [None, 5, 10, 15, 20],
                            'min_samples_split': [2, 5, 10]}

modelo, acc_gradient_boosting, acc_gradient_boosting_cv, recall_gradient_boosting, f1_gradient_boosting = func_acuracia_GS(
    GradientBoostingClassifier(), gradient_boosting_params, X, y, 10)
print(f'Parâmetros otimizados do Modelo: {modelo}')
print(f'Acurácia Gradient Boosting Classifier: {acc_gradient_boosting}')
print(f'Acurácia Gradient Boosting Classifier com Validação Cruzada: {acc_gradient_boosting_cv}')
print(f'Recall Gradient Boosting Classifier: {recall_gradient_boosting}')
print(f'F1-Score Gradient Boosting Classifier: {f1_gradient_boosting}')

In [None]:
### CatBoost

In [None]:
acc_catboost, acc_catboost_vc = func_acuracia(CatBoostClassifier(verbose=False), X, y, 10)

print(f'Acurácia CatBoost: {acc_catboost}')
print(f'Acurácia CatBoost com Validação Cruzada: {acc_catboost_vc}')

In [None]:
### CatBoost com GridSearchCV (cancelado pois demorou muito)

In [None]:
##catboost_params = {'iterations': [50, 100, 200],
##                   'learning_rate': [0.01, 0.1, 1],
##                   'depth': [None, 5, 10, 15, 20]}
##
##modelo, acc_catboost, acc_catboost_cv, recall_catboost, f1_catboost = func_acuracia_GS(
##    CatBoostClassifier(verbose=True),
##    catboost_params, X, y, 3)
##print(f'Parâmetros otimizados do Modelo: {modelo}')
##print(f'Acurácia CatBoost: {acc_catboost}')
##print(f'Acurácia CatBoost com Validação Cruzada: {acc_catboost_cv}')
##print(f'Recall CatBoost: {recall_catboost}')
##print(f'F1-Score CatBoost: {f1_catboost}')

In [None]:
### Comparando os modelos

In [None]:
recall_catboost = 0
f1_catboost = 0

models = pd.DataFrame({
    'Modelo': ['Regressão Logística', 'KNN', 'Gaussian Naive Bayes', 'Linear SVC', 'Stochastic Gradient Descent',
               'Decision Tree', 'Random Forest', 'Gradient Boosting Classifier', 'CatBoost'],
    'Acurácia': [acc_log, acc_knn, acc_gaussian, acc_linear_svc, acc_sgd, acc_decision_tree, acc_random_forest,
                 acc_gradient_boosting, acc_catboost],
    'Acurácia com Validação Cruzada': [acc_log_vc, acc_knn_vc, acc_gaussian_vc, acc_linear_svc_vc, acc_sgd_vc,
                                       acc_decision_tree_vc, acc_random_forest_vc, acc_gradient_boosting_vc,
                                       acc_catboost_vc],
    'Recall': [recall_log, recall_knn, recall_gaussian, recall_svc, recall_sgd, recall_decision_tree,
               recall_random_forest, recall_gradient_boosting, recall_catboost],
    'F1-Score': [f1_log, f1_knn, f1_gaussian, f1_svc, f1_sgd, f1_decision_tree, f1_random_forest, f1_gradient_boosting,
                 f1_catboost]})

In [None]:
models