# Modelos de Regressão

In [3]:
import numpy as np 
import pandas as pd

df = pd.read_csv('data/kc_house_data.csv')
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [4]:
df.shape

(21613, 21)

In [5]:
df = df[['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront']]

colunas_para_remover = ['id', 'date']
df = df.drop(colunas_para_remover, axis=1)

rows, cols = df.shape
print(f'Linhas: {rows}. Colunas: {cols}')

Linhas: 21613. Colunas: 7


In [6]:
x = df.drop('price', axis=1) # FEATURES - retirando a coluna alvo para treinar
y = df['price'] # TARGET - selecionando coluna alvo

In [7]:
# normalizador
from sklearn.preprocessing import StandardScaler

# normalização dos dados
min_max_scaler = StandardScaler()
x = min_max_scaler.fit_transform(x)

In [8]:
x

array([[-0.39873715, -1.44746357, -0.97983502, -0.22832133, -0.915427  ,
        -0.08717263],
       [-0.39873715,  0.1756067 ,  0.53363434, -0.18988538,  0.93650577,
        -0.08717263],
       [-1.47395936, -1.44746357, -1.42625404, -0.12329847, -0.915427  ,
        -0.08717263],
       ...,
       [-1.47395936, -1.77207762, -1.15404732, -0.33213703,  0.93650577,
        -0.08717263],
       [-0.39873715,  0.50022075, -0.52252773, -0.30707641,  0.93650577,
        -0.08717263],
       [-1.47395936, -1.77207762, -1.15404732, -0.33875227,  0.93650577,
        -0.08717263]])

In [9]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=23)

print(f'Exemplos para o treinamento: {len(y_train)}. E para o teste: {len(y_test)}')

Exemplos para o treinamento: 15129. E para o teste: 6484


## Regressão Linear

In [10]:
# métricas
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error

from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

In [11]:
mse = mean_squared_error(y_test,y_pred, squared=True)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(y_test,y_pred)

print(f'MSE: {mse} RMSE: {rmse} MAPE: {mape}')

MSE: 58290430743.82153 RMSE: 241434.11263494132 MAPE: 0.3468906172169916




## K-NN Regressor

In [12]:
from sklearn.neighbors import KNeighborsRegressor

model = KNeighborsRegressor(n_neighbors=7, metric='euclidean')

model.fit(x_train, y_train)
y_pred = model.predict(x_test)

mape = mean_absolute_percentage_error(y_test,y_pred)
print(f'MAPE: {mape}')

MAPE: 0.3246391981351605


## Support Vector Machines Regressor - SVR

In [13]:
from sklearn.svm import SVR

model = SVR()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

mape = mean_absolute_percentage_error(y_test,y_pred)

print(f'MAPE: {mape}')

MAPE: 0.4214532788664194


In [14]:
from sklearn.svm import SVR

model = SVR(kernel="linear", C=100)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

mape = mean_absolute_percentage_error(y_test,y_pred)

print(f'MAPE: {mape}')

MAPE: 0.3208912662881946


## Arvore de decisão Regressor

In [15]:
from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

mape = mean_absolute_percentage_error(y_test,y_pred)

print(f'MAPE: {mape}')
MAPE: 0.4132546879741096

MAPE: 0.41117873508584263


## XGBoost

In [16]:
from xgboost import XGBRegressor

model = XGBRegressor()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

mape = mean_absolute_percentage_error(y_test,y_pred)

print(f'MAPE: {mape}')
MAPE: 0.3115478188030054

MAPE: 0.3115478188030054


In [17]:
params = {"n_estimators": 100,
           "max_depth": 6,
           "learning_rate": 0.1}

# Treinando o modelo
model = XGBRegressor(**params)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

mape = mean_absolute_percentage_error(y_test,y_pred)

print(f'MAPE: {mape}')

MAPE: 0.30671574201583884
