# Importar os módulos necessários

In [65]:
import warnings
warnings.filterwarnings("ignore")

In [66]:
import pandas as pd
from sklearn.metrics import (
        make_scorer,
        confusion_matrix, 
        cohen_kappa_score, 
        accuracy_score, 
        precision_score, 
        recall_score, 
        f1_score, 
        roc_auc_score
)
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier # decision trees for classification
from sklearn.neural_network import  MLPClassifier # neural networks for classification
from sklearn.naive_bayes import GaussianNB # naive bayes for classification
from sklearn.svm import SVC # support vector machines for classification

# Definir as Métricas para Avaliação dos Modelos

In [67]:
def specificity_score(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    return tn / (tn+fp)

In [68]:
METRICS = {
        "accuracy": make_scorer(accuracy_score),
        "precision": make_scorer(precision_score),
        "recall": make_scorer(recall_score),
        "f1": make_scorer(f1_score),
        "AUC": make_scorer(roc_auc_score, needs_proba=True),
        "specificity": make_scorer(specificity_score),
        "kappa":make_scorer(cohen_kappa_score)
}

# Ler o Conjunto de Dados

In [69]:
d = pd.read_csv("Xcenario1.csv")
d

Unnamed: 0,id_body_type,id_modelos,id_categoria_marca,ano,id_city,id_insurance,seats,id_engine_capacity_group,id_fuel_type,id_kms_group,id_max_power_group,id_owner_type,resale_price_Lakh
0,0.364112,3.086522,0.073531,2019.0,1.096102,0.256633,5.0,0.126137,0.180334,0.364786,0.206644,0.150465,5.66
1,0.598869,2.403638,0.073531,2018.0,1.281577,1.306401,5.0,0.126137,0.180334,0.364786,0.206644,1.365236,6.64
2,0.578053,2.583846,0.073531,2015.0,0.922710,0.256633,5.0,0.126137,0.180334,0.255059,0.446040,0.620328,5.65
3,0.598869,3.650793,0.073531,2021.0,0.922710,0.256633,7.0,0.126137,0.514919,0.364786,0.446040,0.150465,23.00
4,0.364112,2.747703,0.073531,2019.0,1.170307,1.306401,5.0,0.126137,0.180334,0.255059,0.206644,0.150465,6.87
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13420,1.347597,3.650793,0.903640,2021.0,1.027199,0.256633,7.0,1.105074,0.514919,0.255059,0.446040,0.620328,26.50
13421,0.364112,2.951823,0.073531,2017.0,1.223199,0.256633,5.0,0.126137,0.180334,0.364786,0.206644,0.150465,5.87
13422,0.578053,3.282816,0.903640,2017.0,1.027199,0.445408,5.0,0.126137,0.514919,0.255059,0.446040,0.620328,7.43
13423,0.598869,2.747703,0.073531,2017.0,0.868320,0.445408,5.0,0.126137,0.514919,0.364786,0.206644,0.620328,9.45


In [70]:
X, y = d.drop("resale_price_Lakh", axis=1), d["resale_price_Lakh"]

# Definir o Método de Validação Cruzada

In [71]:
splitter = StratifiedKFold(n_splits=10, shuffle=True, random_state=1234)

# Implementação dos Algoritmos de Machine Learning

### Árvores de Decisão

In [73]:
dt = DecisionTreeClassifier(max_depth=3, random_state=1234)
splitter = StratifiedKFold(10, random_state=1234, shuffle=True)
scores = cross_validate(dt, X, y, cv=splitter, scoring=METRICS)
dt_scores = pd.DataFrame(scores)
pd.DataFrame(dt_scores.mean()).T

ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead.

### Redes Neuronais

In [27]:
nn = MLPClassifier(hidden_layer_sizes=(50,50), max_iter=20, random_state=1234)
scores_nn = cross_validate(nn, X, y, cv=splitter, scoring=METRICS)
nn_scores = pd.DataFrame(scores_nn)
pd.DataFrame(nn_scores.mean()).T

ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead.

### Naive Bayes

In [None]:
nb = GaussianNB()
scores_nb = cross_validate(nb, X, y, cv=splitter, scoring=METRICS)
nb_scores = pd.DataFrame(scores_nb)
pd.DataFrame(nb_scores.mean()).T

### Support Vector Machine

In [55]:
svm = SVC(random_state=1234, probability=True)
scores_svm = cross_validate(svm, X, y, cv=splitter, scoring=METRICS)
svm_scores = pd.DataFrame(scores_svm)
pd.DataFrame(svm_scores.mean()).T

ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead.