In [None]:
!pip install catboost

## **DataSet**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

import warnings
warnings.filterwarnings("ignore")

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
carPrice = pd.read_csv('/content/drive/MyDrive/Estudos Manu + Rodriguinho/2° ano/Machine Learning/Abril/03.04/CarPrice_Assignment(1).csv')

## **Análise Exploratória**

In [None]:
carPrice.sample(5)

In [None]:
carPrice.info()

In [None]:
# Verificando a existência de nulos
carPrice.isnull().sum()

In [None]:
carPrice.describe()

## **Exploração Gráfica**

In [None]:
# Verificando a distruição da variável target
sns.histplot(carPrice["price"])

plt.xlabel('Price')
plt.ylabel('Ocorrências')
plt.title('Histograma com Seaborn')
plt.show()

In [None]:
# Verificando a correlação entre as variáveis
num_cols = carPrice.select_dtypes(include=['int64', 'float64']).columns
correlacao = carPrice[num_cols].corr()

# Criando o heatmap
plt.figure(figsize=(8,6))
sns.heatmap(correlacao, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)

# Personalizando o gráfico
plt.title("Matriz de Correlação")
plt.show()

## **Treinando modelos e escolhendo variáveis**

In [None]:
def assertividade(y_test, y_pred):
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    r2 = r2_score(y_test, y_pred)

    return {
        "MSE": mse,
        "MAE": mae,
        "RMSE": rmse,
        "MAPE": mape,
        "R²": r2
    }

In [None]:
X = carPrice[num_cols].drop(['price', 'car_ID'], axis=1)
y = carPrice['price']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) #Separando entre treino e teste

### **Bagging models**

#### **Random Forest**

In [None]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

In [None]:
rf_model.fit(X_train, y_train)

In [None]:
y_pred = rf_model.predict(X_test)

In [None]:
assertividade(y_test, y_pred)

#### **Extra Tree Regressor**

In [None]:
et_model = ExtraTreesRegressor(n_estimators=100, random_state=42)

In [None]:
et_model.fit(X_train, y_train)

In [None]:
y_pred = et_model.predict(X_test)

In [None]:
assertividade(y_test, y_pred)

### **Boosting models**

#### **Gradient Boost Regressor**

In [None]:
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)

In [None]:
gb_model.fit(X_train, y_train)

In [None]:
y_pred = gb_model.predict(X_test)

In [None]:
assertividade(y_test, y_pred)

#### **Ada Boost Regressor**

In [None]:
ada_model = AdaBoostRegressor(n_estimators=100, random_state=42)

In [None]:
ada_model.fit(X_train, y_train)

In [None]:
y_pred = ada_model.predict(X_test)

In [None]:
assertividade(y_test, y_pred)

#### **XGBoost Regressor**

In [None]:
xgb_model = XGBRegressor(n_estimators=100, random_state=42)

In [None]:
xgb_model.fit(X_train, y_train)

In [None]:
y_pred = xgb_model.predict(X_test)

In [None]:
assertividade(y_test, y_pred)

#### **LightGBM Regressor**

In [None]:
lgbm_model = LGBMRegressor(n_estimators=100, random_state=42)

In [None]:
lgbm_model.fit(X_train, y_train)

In [None]:
y_pred = lgbm_model.predict(X_test)

In [None]:
assertividade(y_test, y_pred)