In [None]:
import pandas as pd
import numpy as np
np.random.seed(42)

In [None]:
dataset = pd.read_csv("insurance.csv")
dataset.head()

In [None]:
dataset.shape

In [None]:
dataset.info()

In [None]:
dataset.describe().T

## Label Encoder

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

dataset['genero_tipo'] = label_encoder.fit_transform(dataset['gênero'])
dataset['fumante_tipo'] = label_encoder.fit_transform(dataset['fumante'])
dataset['regiao_tipo'] = label_encoder.fit_transform(dataset['região'])

dataset.head()

In [None]:
dataset_tratado = dataset.drop(columns = ["fumante", "região", "gênero"]).copy()

dataset_tratado.head()

In [None]:
corr_matrix = dataset_tratado.corr()
corr_matrix["encargos"].sort_values(ascending=False)

## Preparando os dados para colocar no algoritmo

In [None]:
X = dataset_tratado.drop("encargos", axis=1) # apagando a target para a base de treino (nosso x)
y = dataset_tratado["encargos"].copy() #armazenando a target (nosso y)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.head()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score

#LinearRegression()
modelo = LinearRegression()
modelo.fit(X_train, y_train)
predictions = modelo.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)

errors = np.abs(y_test - predictions)
relative_errors = errors / np.abs(y_test)
mape = np.mean(relative_errors) * 100
print(f"LinearRegressor: mae {mae}, mse {mse}, rmse {rmse}, r2 {r2}, mape {mape}")

#DecisionTreeRegressor()
modelo = DecisionTreeRegressor()
modelo.fit(X_train, y_train)
predictions = modelo.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)

errors = np.abs(y_test - predictions)
relative_errors = errors / np.abs(y_test)
mape = np.mean(relative_errors) * 100
print(f"DecisionTreeRegressor: mae {mae}, mse {mse}, rmse {rmse}, r2 {r2}, mape {mape}")

#RandomForestRegressor()
modelo = RandomForestRegressor()
modelo.fit(X_train, y_train)
predictions = modelo.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)

errors = np.abs(y_test - predictions)
relative_errors = errors / np.abs(y_test)
mape = np.mean(relative_errors) * 100
print(f"RandomForestRegressor: mae {mae}, mse {mse}, rmse {rmse}, r2 {r2}, mape {mape}")