# Importar os módulos necessários

In [20]:
import warnings
warnings.filterwarnings("ignore")

In [21]:
import pandas as pd
#necessario por causa do metodo de avaliaçao
from sklearn.model_selection import KFold
from sklearn.metrics import (
        make_scorer,
        confusion_matrix, 
        cohen_kappa_score, 
        accuracy_score, 
        precision_score, 
        recall_score, 
        f1_score, 
        roc_auc_score
)
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier # decision trees for classification
from sklearn.neural_network import  MLPClassifier # neural networks for classification
from sklearn.naive_bayes import GaussianNB # naive bayes for classification
from sklearn.svm import SVC # support vector machines for classification

# Definir as Métricas para Avaliação dos Modelos

In [22]:
#metricas regressao
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def custom_regression_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    
    return mse, mae, r2


In [23]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import make_scorer
import numpy as np

# Definição da função RMSE
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

# Dicionário de métricas de regressão
REGRESSION_METRICS = {
    "MSE": make_scorer(mean_squared_error),
    "MAE": make_scorer(mean_absolute_error),
    "R2": make_scorer(r2_score),
    "RMSE": make_scorer(root_mean_squared_error)  # Adicionando o RMSE
}



# Ler o Conjunto de Dados

In [24]:
d = pd.read_csv("treino.csv")
d

Unnamed: 0,id_nome,id_ano,engine_capacity,id_insurance,id_transmission_type,kms_driven,owner_type,id_fuel_type,max_power,seats,mileage,id_body_type,id_city,resale_price_Lakh
0,3.366610,2.275529,1199.0,0.230259,0.142984,30910.0,1.0,0.186675,83.8,5.0,23.84,0.374572,1.089000,5.66
1,2.889488,2.153535,1199.0,1.321287,0.142984,48089.0,3.0,0.186675,88.7,5.0,17.50,0.597602,1.289242,6.64
2,3.030818,2.236276,1497.0,0.230259,0.142984,51000.0,2.0,0.186675,117.3,5.0,17.40,0.562698,0.918161,5.65
3,3.667640,1.703852,1956.0,0.230259,0.551807,30000.0,1.0,0.500914,167.6,7.0,14.08,0.597602,0.918161,23.00
4,3.445791,1.879943,1197.0,1.321287,0.551807,61113.0,1.0,0.186675,83.1,5.0,21.40,0.374572,1.173949,6.87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13804,3.667640,1.703852,2199.0,0.230259,0.551807,80000.0,2.0,0.500914,197.2,7.0,14.11,1.365886,1.033162,26.50
13805,3.366610,2.331848,1197.0,0.230259,0.142984,17923.0,1.0,0.186675,81.8,5.0,18.60,0.374572,1.221517,5.87
13806,3.445791,2.171633,1498.0,0.439582,0.142984,63389.0,2.0,0.500914,108.6,5.0,21.13,0.562698,1.033162,7.43
13807,3.103368,2.162490,1248.0,0.439582,0.142984,40000.0,2.0,0.500914,88.5,5.0,24.30,0.597602,0.870141,9.45


In [25]:
X, y = d.drop("resale_price_Lakh", axis=1), d["resale_price_Lakh"]

# Definir o Método de Validação Cruzada

In [26]:
#Classificação
splitter = StratifiedKFold(n_splits=10, shuffle=True, random_state=1234)

# Utilize KFold para um problema de regressão
kf = KFold(n_splits=10, shuffle=True, random_state=1234)


# Implementação dos Algoritmos de Machine Learning

### Árvores de Decisão

In [27]:
from sklearn.model_selection import KFold, cross_validate
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score



# Definir o regressor e o método de validação cruzada
dt_regressor = DecisionTreeRegressor(max_depth=3, random_state=1234)
kf = KFold(n_splits=10, shuffle=True, random_state=1234)

# Realizar a validação cruzada e avaliar as métricas
scores = cross_validate(dt_regressor, X, y, cv=kf, scoring=REGRESSION_METRICS)
dt_scores = {k: v.mean() for k, v in scores.items()}  # Negar os valores para tornar os scores positivos
pd.DataFrame(dt_scores, index=[0])



Unnamed: 0,fit_time,score_time,test_MSE,test_MAE,test_R2,test_RMSE
0,0.022585,0.003118,38.004513,3.221406,0.681167,6.14637


### Redes Neuronais

In [29]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import cross_validate

# Criar uma Rede Neural para regressão
nn_regressor = MLPRegressor(hidden_layer_sizes=(50, 50), max_iter=20, random_state=1234)

# Realizar a validação cruzada e avaliar as métricas
scores_nn = cross_validate(nn_regressor, X, y, cv=kf, scoring=REGRESSION_METRICS)
nn_scores = pd.DataFrame(scores_nn)
pd.DataFrame(nn_scores.mean()).T


Unnamed: 0,fit_time,score_time,test_MSE,test_MAE,test_R2,test_RMSE
0,1.667878,0.003257,70.189836,4.815917,0.440163,8.138709


### Naive Bayes

In [30]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error



# Criar um regressor RandomForest
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=1234)

# Realizar a validação cruzada e avaliar as métricas
scores_rf = cross_validate(rf_regressor, X, y, cv=kf, scoring=REGRESSION_METRICS)
rf_scores = pd.DataFrame(scores_rf)
pd.DataFrame(rf_scores.mean()).T


Unnamed: 0,fit_time,score_time,test_MSE,test_MAE,test_R2,test_RMSE
0,13.804738,0.046662,17.923186,1.553114,0.852978,4.170272


Random Forest

### Support Vector Machine