# Modelo Linear (*SGDRegressor*)

## Instalação de Pacotes Necessários

In [None]:
%time
import os
import numpy as np
from numpy import mean
from numpy import std
import seaborn as sns
import statsmodels.api as sm

!pip install --upgrade pandas
import pandas as pd
from pandas import read_csv

import matplotlib.pyplot as plt
%matplotlib inline

import math as math

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error, r2_score, make_scorer
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVR
from sklearn.model_selection import RepeatedKFold

!pip install --upgrade scipy
import scipy as scipy
from scipy import stats
from scipy.stats import t

!pip install --upgrade shap
import shap as shap
shap.initjs()

## Importação de Dados Geolocalizados

In [None]:
!pip install --upgrade gspread

In [None]:
# autorização de acesso ao Google Drive 

from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

In [None]:
planilha = gc.open('nome_basededados')
pagina = planilha.sheet1
pagina.row_values(1)

In [None]:
df_Uniao = pd.DataFrame(pagina.get_all_records())

In [None]:
df_Uniao.head(5)

In [None]:
df_Uniao.columns

In [None]:
df_Uniao.shape

In [None]:
df_Uniao.isna().sum()

In [None]:
df_Uniao = df_Uniao.dropna()

In [None]:
df_Uniao = df_Uniao.reset_index(drop=True)

In [None]:
df_Uniao.dtypes

In [None]:
df_Uniao = df_Uniao.astype({"% Superior Completo": float})
df_Uniao = df_Uniao.astype({"% Rede Geral de Distribuicao de Agua": float})
df_Uniao = df_Uniao.astype({"% Microcomputador com Acesso a Internet": float})
df_Uniao = df_Uniao.astype({"Rendimento Domiciliar Medio": float})

df_Uniao.dtypes

In [None]:
df_Uniao.describe()

In [None]:
df_Uniao[df_Uniao["Fonte"]  == "ME"].count()

In [None]:
df_Uniao[df_Uniao["Fonte"]  == "EB"].count()

In [None]:
# remoção dos pontos influenciantes (Distância de Cook)

df_Uniao.drop([2730,4341,4581], axis=0, inplace=True)

# Abordagem 2 

Transformação da área do terreno (área projetada horizontal do imóvel) e do valor total atualizado com logaritmo neperiano (sem remoção de *outliers*), logaritmo neperiano do valor total atualizado como variável dependente.

## Preparação dos Conjuntos de Treinamento e de Teste

### Cenário A

In [None]:
df_Uniao.columns

In [None]:
df_Uniao.describe()

In [None]:
df_Uniao['CUB*AreaConstruida'] = df_Uniao['CUBJan22'] * df_Uniao['AreaConstruidaTratada']

In [None]:
df_Uniao.columns

In [None]:
# U1

col_list = df_Uniao.columns.tolist()

listaaremover = ['Fonte', 'Id', 'Município', 'UF', 'TipologiaMunicipalIBGE', 'IVS_2010', 'TaxaHomicidios_2019',
       'TipoImovel', 'CUBJan22', 'VidaUtil', 'AreaConstruidaTratada',
       'AreaProjetadaTratada', 'IndiceFipeZapAvaliacao', 'IndiceFipeZapJan22',
       'ValorTotalAtualizado', 'ValorUnitarioAtualizado', 'CodigoAP', 'NomeAP',
       '% Superior Completo', '% Rede Geral de Distribuicao de Agua',
       '% Microcomputador com Acesso a Internet', 'xcoord', 'ycoord', 'LatLong',
       'qtd_equipamento', 'qtd_hospital',
       'qtd_parque', 'qtd_delegacia', 'qtd_escola',
       'qtd_universidade', 'qtd_atracaoturistica', 'qtd_supermercado',
       'qtd_restaurante', 'qtd_padaria', 'qtd_cafeteria', 'qtd_loja', 'qtd_parada_onibus', 'qtd_estacao_vlt',
       'qtd_estacao_metro', 'qtd_estacao_trem', 'qtd_aeroporto',
       'acessibilidade', 'GoogleTrends2022', 'GrauUrbanizacaoIBGE', 'Rendimento Domiciliar Medio', 'qtd_shopping', 'qtd_estabelecimento', 'IdadeAparente']

for e in listaaremover:
  col_list.remove(e)

X_final_log_N1 = df_Uniao[col_list]

y_final_log_N1 = np.log(df_Uniao['ValorTotalAtualizado'])

y_final_log_N1 = y_final_log_N1[:,np.newaxis]

In [None]:
# U2

col_list = df_Uniao.columns.tolist()

listaaremover = ['Fonte', 'Id', 'Município', 'UF', 'TipologiaMunicipalIBGE', 'IVS_2010', 'TaxaHomicidios_2019',
       'TipoImovel', 'CUBJan22', 'VidaUtil', 'AreaConstruidaTratada',
       'AreaProjetadaTratada', 'IndiceFipeZapAvaliacao', 'IndiceFipeZapJan22',
       'ValorTotalAtualizado', 'ValorUnitarioAtualizado', 'CodigoAP', 'NomeAP',
       '% Superior Completo', '% Rede Geral de Distribuicao de Agua',
       '% Microcomputador com Acesso a Internet', 'xcoord', 'ycoord', 'LatLong',
       'qtd_equipamento', 'qtd_hospital',
       'qtd_parque', 'qtd_delegacia', 'qtd_escola',
       'qtd_universidade', 'qtd_atracaoturistica', 'qtd_supermercado',
       'qtd_restaurante', 'qtd_padaria', 'qtd_cafeteria', 'qtd_loja', 'qtd_parada_onibus', 'qtd_estacao_vlt',
       'qtd_estacao_metro', 'qtd_estacao_trem', 'qtd_aeroporto',
       'acessibilidade', 'GoogleTrends2022', 'GrauUrbanizacaoIBGE', 'Rendimento Domiciliar Medio', 'qtd_shopping', 'qtd_estabelecimento']

for e in listaaremover:
  col_list.remove(e)

X_final_log_N2 = df_Uniao[col_list]

y_final_log_N2 = np.log(df_Uniao['ValorTotalAtualizado'])

y_final_log_N2 = y_final_log_N2[:,np.newaxis]

In [None]:
# U3

col_list = df_Uniao.columns.tolist()

listaaremover = ['Fonte', 'Id', 'Município', 'UF', 'TipologiaMunicipalIBGE', 'IVS_2010', 'TaxaHomicidios_2019',
       'TipoImovel', 'CUBJan22', 'VidaUtil', 'AreaConstruidaTratada',
       'AreaProjetadaTratada', 'IndiceFipeZapAvaliacao', 'IndiceFipeZapJan22',
       'ValorTotalAtualizado', 'ValorUnitarioAtualizado', 'CodigoAP', 'NomeAP',
       '% Superior Completo', '% Rede Geral de Distribuicao de Agua',
       '% Microcomputador com Acesso a Internet', 'xcoord', 'ycoord', 'LatLong', 'qtd_hospital',
       'qtd_parque', 'qtd_delegacia', 'qtd_escola',
       'qtd_universidade', 'qtd_atracaoturistica', 'qtd_supermercado',
       'qtd_restaurante', 'qtd_padaria', 'qtd_cafeteria', 'qtd_loja', 'qtd_parada_onibus', 'qtd_estacao_vlt',
       'qtd_estacao_metro', 'qtd_estacao_trem', 'qtd_aeroporto',
       'acessibilidade', 'GoogleTrends2022', 'GrauUrbanizacaoIBGE']

for e in listaaremover:
  col_list.remove(e)

X_final_log_N3 = df_Uniao[col_list]

y_final_log_N3 = np.log(df_Uniao['ValorTotalAtualizado'])

y_final_log_N3 = y_final_log_N3[:,np.newaxis]

In [None]:
X_final_log_N3.dtypes

In [None]:
# Teste de Shapiro-Wilk 

col_list = df_Uniao.columns.tolist()

listaaremover_testesnormalidade = ['Fonte', 'Id', 'Município', 'UF', 'TipologiaMunicipalIBGE', 'IVS_2010', 'TaxaHomicidios_2019',
       'TipoImovel', 'CUBJan22', 'VidaUtil', 'AreaConstruidaTratada',
       'AreaProjetadaTratada', 'IndiceFipeZapAvaliacao', 'IndiceFipeZapJan22',
       'ValorTotalAtualizado', 'ValorUnitarioAtualizado', 'CodigoAP', 'NomeAP',
       '% Superior Completo', '% Rede Geral de Distribuicao de Agua',
       '% Microcomputador com Acesso a Internet', 'xcoord', 'ycoord', 'LatLong', 'qtd_hospital',
       'qtd_parque', 'qtd_delegacia', 'qtd_escola',
       'qtd_universidade', 'qtd_atracaoturistica', 'qtd_supermercado',
       'qtd_restaurante', 'qtd_padaria', 'qtd_cafeteria', 'qtd_loja', 'qtd_parada_onibus', 'qtd_estacao_vlt',
       'qtd_estacao_metro', 'qtd_estacao_trem', 'qtd_aeroporto',
       'acessibilidade', 'GoogleTrends2022', 'GrauUrbanizacaoIBGE', 'Vocacao']

for e in listaaremover_testesnormalidade:
  col_list.remove(e)

X_testesnormalidade = df_Uniao[col_list]

logArea_normalidade = np.log(df_Uniao.loc[:,'AreaProjetadaTratada'])

X_testesnormalidade['logArea'] = logArea_normalidade

for variavel in X_testesnormalidade.columns:
  shapiro_test = stats.shapiro(X_testesnormalidade[variavel])
  print(variavel)
  print(shapiro_test)

In [None]:
# Teste de Jarque-Bera

for variavel in X_testesnormalidade.columns:
  jarque_bera_test = stats.jarque_bera(X_testesnormalidade[variavel])
  print(variavel)
  print(jarque_bera_test)

Há evidências de que as variáveis têm assimetria e curtose significativamente
diferentes de uma distribuição normal. Portanto, não se aplicará a transformação StandardScaler().

In [None]:
logArea = np.log(df_Uniao.loc[:,'AreaProjetadaTratada'])

X_final_log_N1.loc[:,'logArea'] = logArea
X_final_log_N2.loc[:,'logArea'] = logArea
X_final_log_N3.loc[:,'logArea'] = logArea

In [None]:
X_train_log_N1, X_test_log_N1, y_train_log_N1, y_test_log_N1 = train_test_split(X_final_log_N1, y_final_log_N1, test_size=0.33, shuffle=True, random_state=7, stratify=X_final_log_N1['Vocacao'])

In [None]:
X_train_log_N2, X_test_log_N2, y_train_log_N2, y_test_log_N2 = train_test_split(X_final_log_N2, y_final_log_N2, test_size=0.33, shuffle=True, random_state=7, stratify=X_final_log_N2['Vocacao'])

In [None]:
X_train_log_N3, X_test_log_N3, y_train_log_N3, y_test_log_N3 = train_test_split(X_final_log_N3, y_final_log_N3, test_size=0.33, shuffle=True, random_state=7, stratify=X_final_log_N3['Vocacao'])

In [None]:
# gráfico de frequências (histograma)

plt.figure(figsize=(12,10))
sns.histplot(data=y_train_log_N3, kde = False, legend = False, color = 'go')

ax = plt.subplot(1,1,1)

plt.rcParams.update({'font.size': 10})

ax.set_title('Histograma dos Conjuntos de Treinamento e de Validação para a Variável Explicada (Ln Valor Total Atualizado)')
plt.rcParams.update({'font.size': 10})
ax.set_ylabel('Número de Ocorrências')
ax.set_xlabel('Ln Valor Total Atualizado')

In [None]:
# gráfico de frequências (histograma)

plt.figure(figsize=(12,10))
sns.histplot(data=y_test_log_N3, kde = False, legend = False, color="green")

ax = plt.subplot(1,1,1)

plt.rcParams.update({'font.size': 10})

ax.set_title('Histograma do Conjunto de Teste para a Variável Explicada (Ln Valor Total Atualizado)')
plt.rcParams.update({'font.size': 10})
ax.set_ylabel('Número de Ocorrências')
ax.set_xlabel('Ln Valor Total Atualizado')

In [None]:
X_train_log_N1 = pd.get_dummies(X_train_log_N1, columns = ['Vocacao'])
X_test_log_N1 = pd.get_dummies(X_test_log_N1, columns = ['Vocacao'])

In [None]:
X_train_log_N2 = pd.get_dummies(X_train_log_N2, columns = ['Vocacao'])
X_test_log_N2 = pd.get_dummies(X_test_log_N2, columns = ['Vocacao'])

In [None]:
X_train_log_N3 = pd.get_dummies(X_train_log_N3, columns = ['Vocacao'])
X_test_log_N3 = pd.get_dummies(X_test_log_N3, columns = ['Vocacao'])

In [None]:
X_train_log_N1.columns

In [None]:
X_train_log_N2.columns

In [None]:
X_train_log_N3.columns

In [None]:
# U1

mms_N1_A = MinMaxScaler()

X_train_log_N1[['CapitalUF', 'IDHM_2010', 'Terreno', 'CUB*AreaConstruida',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']] = mms_N1_A.fit_transform(X_train_log_N1[['CapitalUF', 'IDHM_2010', 'Terreno', 'CUB*AreaConstruida',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']])

X_test_log_N1[['CapitalUF', 'IDHM_2010', 'Terreno', 'CUB*AreaConstruida',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']] = mms_N1_A.transform(X_test_log_N1[['CapitalUF', 'IDHM_2010', 'Terreno', 'CUB*AreaConstruida',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']])

In [None]:
# U2

mms_N2_A = MinMaxScaler()

X_train_log_N2[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'CUB*AreaConstruida', 'Vocacao_Comercial',
       'Vocacao_Institucional', 'Vocacao_Misto', 'Vocacao_Residencial']] = mms_N2_A.fit_transform(X_train_log_N2[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'CUB*AreaConstruida', 'Vocacao_Comercial',
       'Vocacao_Institucional', 'Vocacao_Misto', 'Vocacao_Residencial']])

X_test_log_N2[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'CUB*AreaConstruida', 'Vocacao_Comercial',
       'Vocacao_Institucional', 'Vocacao_Misto', 'Vocacao_Residencial']] = mms_N2_A.transform(X_test_log_N2[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'CUB*AreaConstruida', 'Vocacao_Comercial',
       'Vocacao_Institucional', 'Vocacao_Misto', 'Vocacao_Residencial']])

In [None]:
# U3

mms_N3_A = MinMaxScaler()

X_train_log_N3[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'Rendimento Domiciliar Medio', 'qtd_equipamento',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida', 'Vocacao_Comercial', 'Vocacao_Institucional',
       'Vocacao_Misto', 'Vocacao_Residencial']] = mms_N3_A.fit_transform(X_train_log_N3[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'Rendimento Domiciliar Medio', 'qtd_equipamento',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida', 'Vocacao_Comercial', 'Vocacao_Institucional',
       'Vocacao_Misto', 'Vocacao_Residencial']])

X_test_log_N3[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'Rendimento Domiciliar Medio', 'qtd_equipamento_semshopping',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida', 'Vocacao_Comercial', 'Vocacao_Institucional',
       'Vocacao_Misto', 'Vocacao_Residencial']] = mms_N3_A.transform(X_test_log_N3[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'Rendimento Domiciliar Medio', 'qtd_equipamento_semshopping',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida', 'Vocacao_Comercial', 'Vocacao_Institucional',
       'Vocacao_Misto', 'Vocacao_Residencial']])

### Cenário B

In [None]:
df_EB = df_Uniao[df_Uniao.Fonte == "EB"].sample(130,random_state=0)

In [None]:
df_EB.head()

In [None]:
df_EB.describe()

In [None]:
df_Uniao_reduzido = df_Uniao.drop(df_EB.index)

In [None]:
df_Uniao_reduzido.head()

In [None]:
df_Uniao_reduzido.describe()

In [None]:
df_Uniao_reduzido.isna().sum()

In [None]:
# U1, U2 e U3 (Cenário B)

col_list = df_Uniao.columns.tolist()

listaaremover = ['Fonte', 'Id', 'Município', 'UF', 'TipologiaMunicipalIBGE', 'IVS_2010', 'TaxaHomicidios_2019',
       'TipoImovel', 'CUBJan22', 'VidaUtil', 'AreaConstruidaTratada',
       'AreaProjetadaTratada', 'IndiceFipeZapAvaliacao', 'IndiceFipeZapJan22',
       'ValorTotalAtualizado', 'ValorUnitarioAtualizado', 'CodigoAP', 'NomeAP',
       '% Superior Completo', '% Rede Geral de Distribuicao de Agua',
       '% Microcomputador com Acesso a Internet', 'xcoord', 'ycoord', 'LatLong', 'qtd_hospital',
       'qtd_parque', 'qtd_delegacia', 'qtd_escola',
       'qtd_universidade', 'qtd_atracaoturistica', 'qtd_supermercado',
       'qtd_restaurante', 'qtd_padaria', 'qtd_cafeteria', 'qtd_loja', 'qtd_parada_onibus', 'qtd_estacao_vlt',
       'qtd_estacao_metro', 'qtd_estacao_trem', 'qtd_aeroporto',
       'acessibilidade', 'GoogleTrends2022', 'GrauUrbanizacaoIBGE']

for e in listaaremover:
  col_list.remove(e)

X_final_log_reduzido = df_Uniao_reduzido[col_list]
X_final_log_EB = df_EB[col_list]

y_final_log_reduzido = np.log(df_Uniao_reduzido['ValorTotalAtualizado'])
y_final_log_EB = np.log(df_EB['ValorTotalAtualizado'])

y_final_log_reduzido = y_final_log_reduzido[:,np.newaxis]
y_final_log_EB = y_final_log_EB[:,np.newaxis]

In [None]:
logArea_reduzido = np.log(df_Uniao_reduzido.loc[:,'AreaProjetadaTratada'])
logArea_EB = np.log(df_EB.loc[:,'AreaProjetadaTratada'])

X_final_log_reduzido['logArea'] = logArea_reduzido
X_final_log_EB['logArea'] = logArea_EB

In [None]:
X_final_log_reduzido = pd.get_dummies(X_final_log_reduzido, columns = ['Vocacao'])
X_final_log_EB = pd.get_dummies(X_final_log_EB, columns = ['Vocacao'])

In [None]:
X_final_log_reduzido.columns

In [None]:
X_final_log_EB.columns

In [None]:
# Transformação de Variáveis

mms_B = MinMaxScaler()

X_final_log_reduzido[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'Rendimento Domiciliar Medio', 'qtd_equipamento',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']] = mms_B.fit_transform(X_final_log_reduzido[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'Rendimento Domiciliar Medio', 'qtd_equipamento',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']])

X_final_log_EB[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'Rendimento Domiciliar Medio', 'qtd_equipamento',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']] = mms_B.transform(X_final_log_EB[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'Rendimento Domiciliar Medio', 'qtd_equipamento',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']])

In [None]:
X_final_log_reduzido.columns

In [None]:
X_trainB_N1 = X_final_log_reduzido[['CapitalUF', 'IDHM_2010', 'Terreno', 
      'CUB*AreaConstruida', 'logArea',
      'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
      'Vocacao_Residencial']]

X_trainB_N2 = X_final_log_reduzido[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente', 
      'CUB*AreaConstruida', 'logArea',
      'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
      'Vocacao_Residencial']]

X_trainB_N3 = X_final_log_reduzido[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente',
       'Rendimento Domiciliar Medio', 'qtd_equipamento',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida', 'logArea',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']]

In [None]:
X_testeEB_N1 = X_final_log_EB[['CapitalUF', 'IDHM_2010', 'Terreno', 
      'CUB*AreaConstruida', 'logArea',
      'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
      'Vocacao_Residencial']]

X_testeEB_N2 = X_final_log_EB[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparente', 
      'CUB*AreaConstruida', 'logArea',
      'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
      'Vocacao_Residencial']]

X_testeEB_N3 = X_final_log_EB[['CapitalUF', 'IDHM_2010', 'Terreno', 'IdadeAparenteCategorica',
       'Rendimento Domiciliar Medio', 'qtd_equipamento',
       'qtd_shopping', 'qtd_estabelecimento', 'CUB*AreaConstruida', 'logArea',
       'Vocacao_Comercial', 'Vocacao_Institucional', 'Vocacao_Misto',
       'Vocacao_Residencial']]

In [None]:
X_final_log_reduzido.isna().sum()

## Modelagem Linear (*SGDRegressor*)

### Definições Gerais

In [None]:
# Validação Cruzada (testes de 2 a 15 folds)

cv = KFold(n_splits=10, random_state=0, shuffle=True)

In [None]:
# t de Student para cálculo do IC

t_student_bicaudal_90 = 1.645

### Cenário A

In [None]:
grid = {
    'alpha': [0.1,0.01,0.001,0.0001],
    'random_state': [0,1,12,123,1234,12345]
}

sgdregressor_cv = GridSearchCV(estimator=SGDRegressor(), param_grid=grid, cv= 10, scoring='r2')
sgdregressor_cv.fit(X_train_log_N3, y_train_log_N3.ravel())

sgdregressor_cv.best_params_

melhores parâmetros com base em U3: {'alpha': 0.0001, 'random_state': 123}

#### U1

In [None]:
nn_log_A_N1 = SGDRegressor(alpha=0.0001, learning_rate='constant', eta0=0.01,
             max_iter=1000, random_state=123, verbose=False, early_stopping=False, validation_fraction=0.2, penalty='l2')

nn_log_A_N1.fit(X_train_log_N1, y_train_log_N1.ravel())

nn_log_A_N1.predict(X_test_log_N1)

# avaliação do modelo utilizando validação cruzada
scores_log = cross_val_score(estimator=nn_log_A_N1, X=X_train_log_N1, y=y_train_log_N1.ravel(), scoring='r2', cv=cv)

print('Coeficiente de Determinacao Medio (R2) para os 10 folds: %.4f (dp: %.4f)' % (mean(scores_log), std(scores_log)))

In [None]:
# U1

Y_hat_train_sgd = nn_log_A_N1.predict(X_train_log_N1)

rmse_train = np.sqrt(mean_squared_error(y_train_log_N1,Y_hat_train_sgd))
RSQ_train = r2_score(y_train_log_N1,Y_hat_train_sgd)
MBE_train = np.mean(y_train_log_N1 - Y_hat_train_sgd)
MAE_train = np.mean(abs(y_train_log_N1 - Y_hat_train_sgd))
STD_train = np.std(y_train_log_N1 - Y_hat_train_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Treinamento ############")
print("RMSE: %0.4f" % rmse_train)
print("R2: %0.4f" % RSQ_train)
print("MBE: %0.4f" % MBE_train)
print("MAE: %0.4f" % MAE_train)
print("STD: %0.4f" % STD_train)
print("########################################")
print(" ")
# #----------------------------------------------------------------------------------------------------------
# # AVALIAÇÃO DO MODELO
# #----------------------------------------------------------------------------------------------------------
Y_hat_test_sgd = nn_log_A_N1.predict(X_test_log_N1)

rmse_test = np.sqrt(mean_squared_error(y_test_log_N1,Y_hat_test_sgd))
RSQ_test = r2_score(y_test_log_N1,Y_hat_test_sgd)
MBE_test = np.mean(y_test_log_N1 - Y_hat_test_sgd)
MAE_test = np.mean(abs(y_test_log_N1 - Y_hat_test_sgd))
STD_test = np.std(y_test_log_N1 - Y_hat_test_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Teste ############")
print("RMSE: %0.4f" % rmse_test)
print("R2: %0.4f" % RSQ_test)
print("MBE: %0.4f" % MBE_test)
print("MAE: %0.4f" % MAE_test)
print("STD: %0.4f" % STD_test)
print("########################################")
print(" ")

In [None]:
residuos_U1_A = y_test_log_N1 - Y_hat_test_sgd

In [None]:
ecdf = ECDF(residuos_U1_A.ravel())
ecdf_neg_U1_A = ecdf(0)
ecdf_neg_U1_A

In [None]:
# Intervalo de Confiança U1 (SGDRegressor) - Cenário A

dif_relativa_sup = math.exp(rmse_test * (1 - ecdf_neg_U1_A) + t_student_bicaudal_90 * STD_test / (len(y_test_log_N1)**(0.5)))
dif_relativa_inf = math.exp(- rmse_test * ecdf_neg_U1_A - t_student_bicaudal_90 * STD_test / (len(y_test_log_N1)**(0.5)))

print("Semiamplitude Superior do IC 90 U1 (Cenário A): %0.4f" % (dif_relativa_sup - 1))
print("Semiamplitude Inferior do IC 90 U1 (Cenário A): %0.4f" % (dif_relativa_inf - 1))

#### U2

In [None]:
nn_log_A_N2 = SGDRegressor(alpha=0.0001, learning_rate='constant', eta0=0.01,
             max_iter=1000, random_state=123, verbose=False, early_stopping=False, validation_fraction=0.2, penalty='l2')

nn_log_A_N2.fit(X_train_log_N2, y_train_log_N2.ravel())

nn_log_A_N2.predict(X_test_log_N2)

# avaliação do modelo utilizando validação cruzada
scores_log = cross_val_score(estimator=nn_log_A_N2, X=X_train_log_N2, y=y_train_log_N2.ravel(), scoring='r2', cv=cv)

print('Coeficiente de Determinacao Medio (R2) para os 10 folds: %.4f (dp: %.4f)' % (mean(scores_log), std(scores_log)))

In [None]:
# U2

Y_hat_train_sgd = nn_log_A_N2.predict(X_train_log_N2)

rmse_train = np.sqrt(mean_squared_error(y_train_log_N2,Y_hat_train_sgd))
RSQ_train = r2_score(y_train_log_N2,Y_hat_train_sgd)
MBE_train = np.mean(y_train_log_N2 - Y_hat_train_sgd)
MAE_train = np.mean(abs(y_train_log_N2 - Y_hat_train_sgd))
STD_train = np.std(y_train_log_N2 - Y_hat_train_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Treinamento ############")
print("RMSE: %0.4f" % rmse_train)
print("R2: %0.4f" % RSQ_train)
print("MBE: %0.4f" % MBE_train)
print("MAE: %0.4f" % MAE_train)
print("STD: %0.4f" % STD_train)
print("########################################")
print(" ")
# #----------------------------------------------------------------------------------------------------------
# # AVALIAÇÃO DO MODELO
# #----------------------------------------------------------------------------------------------------------
Y_hat_test_sgd = nn_log_A_N2.predict(X_test_log_N2)

rmse_test = np.sqrt(mean_squared_error(y_test_log_N2,Y_hat_test_sgd))
RSQ_test = r2_score(y_test_log_N2,Y_hat_test_sgd)
MBE_test = np.mean(y_test_log_N2 - Y_hat_test_sgd)
MAE_test = np.mean(abs(y_test_log_N2 - Y_hat_test_sgd))
STD_test = np.std(y_test_log_N2 - Y_hat_test_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Teste ############")
print("RMSE: %0.4f" % rmse_test)
print("R2: %0.4f" % RSQ_test)
print("MBE: %0.4f" % MBE_test)
print("MAE: %0.4f" % MAE_test)
print("STD: %0.4f" % STD_test)
print("########################################")
print(" ")

In [None]:
residuos_U2_A = y_test_log_N2 - Y_hat_test_sgd

In [None]:
ecdf = ECDF(residuos_U2_A.ravel())
ecdf_neg_U2_A = ecdf(0)
ecdf_neg_U2_A

In [None]:
# Intervalo de Confiança U2 (SGDRegressor) - Cenário A

dif_relativa_sup = math.exp(rmse_test * (1 - ecdf_neg_U2_A) + t_student_bicaudal_90 * STD_test / (len(y_test_log_N2)**(0.5)))
dif_relativa_inf = math.exp(- rmse_test * ecdf_neg_U2_A - t_student_bicaudal_90 * STD_test / (len(y_test_log_N2)**(0.5)))

print("Semiamplitude Superior do IC 90 U2 (Cenário A): %0.4f" % (dif_relativa_sup - 1))
print("Semiamplitude Inferior do IC 90 U2 (Cenário A): %0.4f" % (dif_relativa_inf - 1))

#### U3

In [None]:
nn_log_A_N3 = SGDRegressor(alpha=0.0001, learning_rate='constant', eta0=0.01,
             max_iter=1000, random_state=123, verbose=False, early_stopping=False, validation_fraction=0.2, penalty='l2')

nn_log_A_N3.fit(X_train_log_N3, y_train_log_N3.ravel())

nn_log_A_N3.predict(X_test_log_N3)

# avaliação do modelo utilizando validação cruzada
scores_log = cross_val_score(nn_log_A_N3, X_train_log_N3, y_train_log_N3.ravel(), scoring='r2', cv=cv)

print('Coeficiente de Determinacao Medio (R2) para os 10 folds: %.4f (dp: %.4f)' % (mean(scores_log), std(scores_log)))

In [None]:
nn_log_A_N3.coef_

In [None]:
nn_log_A_N3.feature_names_in_

In [None]:
# U3

Y_hat_train_sgd = nn_log_A_N3.predict(X_train_log_N3)

rmse_train = np.sqrt(mean_squared_error(y_train_log_N3,Y_hat_train_sgd))
RSQ_train = r2_score(y_train_log_N3,Y_hat_train_sgd)
MBE_train = np.mean(y_train_log_N3 - Y_hat_train_sgd)
MAE_train = np.mean(abs(y_train_log_N3 - Y_hat_train_sgd))
STD_train = np.std(y_train_log_N3 - Y_hat_train_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Treinamento ############")
print("RMSE: %0.4f" % rmse_train)
print("R2: %0.4f" % RSQ_train)
print("MBE: %0.4f" % MBE_train)
print("MAE: %0.4f" % MAE_train)
print("STD: %0.4f" % STD_train)
print("########################################")
print(" ")
# #----------------------------------------------------------------------------------------------------------
# # AVALIAÇÃO DO MODELO
# #----------------------------------------------------------------------------------------------------------
Y_hat_test_sgd = nn_log_A_N3.predict(X_test_log_N3)

rmse_test = np.sqrt(mean_squared_error(y_test_log_N3,Y_hat_test_sgd))
RSQ_test = r2_score(y_test_log_N3,Y_hat_test_sgd)
MBE_test = np.mean(y_test_log_N3 - Y_hat_test_sgd)
MAE_test = np.mean(abs(y_test_log_N3 - Y_hat_test_sgd))
STD_test = np.std(y_test_log_N3 - Y_hat_test_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Teste ############")
print("RMSE: %0.4f" % rmse_test)
print("R2: %0.4f" % RSQ_test)
print("MBE: %0.4f" % MBE_test)
print("MAE: %0.4f" % MAE_test)
print("STD: %0.4f" % STD_test)
print("########################################")
print(" ")

In [None]:
residuos_U3_A = y_test_log_N3 - Y_hat_test_sgd

In [None]:
ecdf = ECDF(residuos_U3_A.ravel())
ecdf_neg_U3_A = ecdf(0)
ecdf_neg_U3_A

In [None]:
# Intervalo de Confiança U3 (SGDRegressor) - Cenário A

dif_relativa_sup = math.exp(rmse_test * (1 - ecdf_neg_U3_A) + t_student_bicaudal_90 * STD_test / (len(y_test_log_N3)**(0.5)))
dif_relativa_inf = math.exp(- rmse_test * ecdf_neg_U3_A - t_student_bicaudal_90 * STD_test / (len(y_test_log_N3)**(0.5)))

print("Semiamplitude Superior do IC 90 U3 (Cenário A): %0.4f" % (dif_relativa_sup - 1))
print("Semiamplitude Inferior do IC 90 U3 (Cenário A): %0.4f" % (dif_relativa_inf - 1))

In [None]:
fig = plt.figure(figsize=(15,15))
ax = plt.subplot(1,1,1)

#ax.plot(Y_hat_train_N3,y_train_log_N3,'go',label='dados de treinamento')
#ax.plot(Y_hat_test_N3,y_test_log_N3,'bo',label='dados de teste')

ax.plot(Y_hat_train_sgd,y_train_log_N3,'go')
ax.plot(Y_hat_test_sgd,y_test_log_N3,'bo')

plt.rcParams.update({'font.size': 10})

ax.set_title('Observações vs Projeções do Modelo Linear SGDRegressor (Ln Valor Total Atualizado) - Cenário A')
plt.rcParams.update({'font.size': 10})
ax.set_ylabel('Ln Valores Totais Calculados pelas Instituições Atualizados (JAN 22)')
ax.set_xlabel('Projeções do Modelo (Ln Valor Total do Imóvel)')

plt.legend(loc="lower right")

x_bis = np.linspace(0, 30, 1000)
plt.plot(x_bis, x_bis + 0, linestyle='solid', color='red')

ax.set_xlim([8, 20])
ax.set_ylim([8, 20])

### Cenário B

#### U1

In [None]:
sgd_B_U1 = SGDRegressor(alpha=0.0001, learning_rate='constant', eta0=0.01,
             max_iter=1000, random_state=123, verbose=False, early_stopping=False, validation_fraction=0.2, penalty='l2')

sgd_B_U1.fit(X_trainB_N1, y_final_log_reduzido.ravel())

sgd_B_U1.predict(X_testeEB_N1)

# avaliação do modelo utilizando validação cruzada
scores_log = cross_val_score(sgd_B_U1, X_trainB_N1, y_final_log_reduzido.ravel(), scoring='r2', cv=cv)

print('Coeficiente de Determinacao Medio (R2) para os 10 folds: %.4f (dp: %.3f)' % (mean(scores_log), std(scores_log)))

In [None]:
Y_hat_train_sgd = sgd_B_U1.predict(X_trainB_N1)

rmse_train = np.sqrt(mean_squared_error(y_final_log_reduzido,Y_hat_train_sgd))
RSQ_train = r2_score(y_final_log_reduzido,Y_hat_train_sgd)
MBE_train = np.mean(y_final_log_reduzido - Y_hat_train_sgd)
MAE_train = np.mean(abs(y_final_log_reduzido - Y_hat_train_sgd))
STD_train = np.std(y_final_log_reduzido - Y_hat_train_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Treinamento ############")
print("RMSE: %0.3f" % rmse_train)
print("R2: %0.3f" % RSQ_train)
print("MBE: %0.3f" % MBE_train)
print("MAE: %0.3f" % MAE_train)
print("STD: %0.3f" % STD_train)
print("########################################")
print(" ")
# #----------------------------------------------------------------------------------------------------------
# # AVALIAÇÃO DO MODELO
# #----------------------------------------------------------------------------------------------------------
Y_hat_test_sgd = sgd_B_U1.predict(X_testeEB_N1)

rmse_test = np.sqrt(mean_squared_error(y_final_log_EB,Y_hat_test_sgd))
RSQ_test = r2_score(y_final_log_EB,Y_hat_test_sgd)
MBE_test = np.mean(y_final_log_EB - Y_hat_test_sgd)
MAE_test = np.mean(abs(y_final_log_EB - Y_hat_test_sgd))
STD_test = np.std(y_final_log_EB - Y_hat_test_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Teste ############")
print("RMSE: %0.3f" % rmse_test)
print("R2: %0.3f" % RSQ_test)
print("MBE: %0.3f" % MBE_test)
print("MAE: %0.3f" % MAE_test)
print("STD: %0.3f" % STD_test)
print("########################################")
print(" ")

In [None]:
residuos_U1_B = y_final_log_EB - Y_hat_test_sgd

In [None]:
ecdf = ECDF(residuos_U1_B.ravel())
ecdf_neg_U1_B = ecdf(0)
ecdf_neg_U1_B

In [None]:
# Intervalo de Confiança U1 (SGDRegressor) - Cenário B

dif_relativa_sup = math.exp(rmse_test * (1 - ecdf_neg_U1_B) + t_student_bicaudal_90 * STD_test / (len(y_final_log_EB)**(0.5)))
dif_relativa_inf = math.exp(- rmse_test * ecdf_neg_U1_B - t_student_bicaudal_90 * STD_test / (len(y_final_log_EB)**(0.5)))

print("Semiamplitude Superior do IC 90 U1 (Cenário B): %0.4f" % (dif_relativa_sup - 1))
print("Semiamplitude Inferior do IC 90 U1 (Cenário B): %0.4f" % (dif_relativa_inf - 1))

#### U2

In [None]:
sgd_B_U2 = SGDRegressor(alpha=0.0001, learning_rate='constant', eta0=0.01,
             max_iter=1000, random_state=123, verbose=False, early_stopping=False, validation_fraction=0.2, penalty='l2')

sgd_B_U2.fit(X_trainB_N2, y_final_log_reduzido.ravel())

sgd_B_U2.predict(X_testeEB_N2)

# avaliação do modelo utilizando validação cruzada
scores_log = cross_val_score(sgd_B_U2, X_trainB_N2, y_final_log_reduzido.ravel(), scoring='r2', cv=cv)

print('Coeficiente de Determinacao Medio (R2) para os 10 folds: %.4f (dp: %.3f)' % (mean(scores_log), std(scores_log)))

In [None]:
Y_hat_train_sgd = sgd_B_U2.predict(X_trainB_N2)

rmse_train = np.sqrt(mean_squared_error(y_final_log_reduzido,Y_hat_train_sgd))
RSQ_train = r2_score(y_final_log_reduzido,Y_hat_train_sgd)
MBE_train = np.mean(y_final_log_reduzido - Y_hat_train_sgd)
MAE_train = np.mean(abs(y_final_log_reduzido - Y_hat_train_sgd))
STD_train = np.std(y_final_log_reduzido - Y_hat_train_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Treinamento ############")
print("RMSE: %0.3f" % rmse_train)
print("R2: %0.3f" % RSQ_train)
print("MBE: %0.3f" % MBE_train)
print("MAE: %0.3f" % MAE_train)
print("STD: %0.3f" % STD_train)
print("########################################")
print(" ")
# #----------------------------------------------------------------------------------------------------------
# # AVALIAÇÃO DO MODELO
# #----------------------------------------------------------------------------------------------------------
Y_hat_test_sgd = sgd_B_U2.predict(X_testeEB_N2)

rmse_test = np.sqrt(mean_squared_error(y_final_log_EB,Y_hat_test_sgd))
RSQ_test = r2_score(y_final_log_EB,Y_hat_test_sgd)
MBE_test = np.mean(y_final_log_EB - Y_hat_test_sgd)
MAE_test = np.mean(abs(y_final_log_EB - Y_hat_test_sgd))
STD_test = np.std(y_final_log_EB - Y_hat_test_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Teste ############")
print("RMSE: %0.3f" % rmse_test)
print("R2: %0.3f" % RSQ_test)
print("MBE: %0.3f" % MBE_test)
print("MAE: %0.3f" % MAE_test)
print("STD: %0.3f" % STD_test)
print("########################################")
print(" ")

In [None]:
residuos_U2_B = y_final_log_EB - Y_hat_test_sgd

In [None]:
ecdf = ECDF(residuos_U2_B.ravel())
ecdf_neg_U2_B = ecdf(0)
ecdf_neg_U2_B

In [None]:
# Intervalo de Confiança U2 (SGDRegressor) - Cenário B

dif_relativa_sup = math.exp(rmse_test * (1 - ecdf_neg_U2_B) + t_student_bicaudal_90 * STD_test / (len(y_final_log_EB)**(0.5)))
dif_relativa_inf = math.exp(- rmse_test * ecdf_neg_U2_B - t_student_bicaudal_90 * STD_test / (len(y_final_log_EB)**(0.5)))

print("Semiamplitude Superior do IC 90 U2 (Cenário B): %0.4f" % (dif_relativa_sup - 1))
print("Semiamplitude Inferior do IC 90 U2 (Cenário B): %0.4f" % (dif_relativa_inf - 1))

#### U3

In [None]:
sgd_B_U3 = SGDRegressor(alpha=0.0001, learning_rate='constant', eta0=0.01,
             max_iter=1000, random_state=123, verbose=False, early_stopping=False, validation_fraction=0.2, penalty='l2')

sgd_B_U3.fit(X_trainB_N3, y_final_log_reduzido.ravel())

sgd_B_U3.predict(X_testeEB_N3)

# avaliação do modelo utilizando validação cruzada
scores_log = cross_val_score(sgd_B_U3, X_trainB_N3, y_final_log_reduzido.ravel(), scoring='r2', cv=cv)

print('Coeficiente de Determinacao Medio (R2) para os 10 folds: %.4f (dp: %.3f)' % (mean(scores_log), std(scores_log)))

In [None]:
Y_hat_train_sgd = sgd_B_U3.predict(X_trainB_N3)

rmse_train = np.sqrt(mean_squared_error(y_final_log_reduzido,Y_hat_train_sgd))
RSQ_train = r2_score(y_final_log_reduzido,Y_hat_train_sgd)
MBE_train = np.mean(y_final_log_reduzido - Y_hat_train_sgd)
MAE_train = np.mean(abs(y_final_log_reduzido - Y_hat_train_sgd))
STD_train = np.std(y_final_log_reduzido - Y_hat_train_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Treinamento ############")
print("RMSE: %0.3f" % rmse_train)
print("R2: %0.3f" % RSQ_train)
print("MBE: %0.3f" % MBE_train)
print("MAE: %0.3f" % MAE_train)
print("STD: %0.3f" % STD_train)
print("########################################")
print(" ")
# #----------------------------------------------------------------------------------------------------------
# # AVALIAÇÃO DO MODELO
# #----------------------------------------------------------------------------------------------------------
Y_hat_test_sgd = sgd_B_U3.predict(X_testeEB_N3)

rmse_test = np.sqrt(mean_squared_error(y_final_log_EB,Y_hat_test_sgd))
RSQ_test = r2_score(y_final_log_EB,Y_hat_test_sgd)
MBE_test = np.mean(y_final_log_EB - Y_hat_test_sgd)
MAE_test = np.mean(abs(y_final_log_EB - Y_hat_test_sgd))
STD_test = np.std(y_final_log_EB - Y_hat_test_sgd)
# ################################################################
# ################################################################
print("############ Avaliação do Modelo no Conjunto de Teste ############")
print("RMSE: %0.3f" % rmse_test)
print("R2: %0.3f" % RSQ_test)
print("MBE: %0.3f" % MBE_test)
print("MAE: %0.3f" % MAE_test)
print("STD: %0.3f" % STD_test)
print("########################################")
print(" ")

In [None]:
residuos_U3_B = y_final_log_EB - Y_hat_test_sgd

In [None]:
ecdf = ECDF(residuos_U3_B.ravel())
ecdf_neg_U3_B = ecdf(0)
ecdf_neg_U3_B

In [None]:
# Intervalo de Confiança U3 (SGDRegressor) - Cenário B

dif_relativa_sup = math.exp(rmse_test * (1 - ecdf_neg_U3_B) + t_student_bicaudal_90 * STD_test / (len(y_final_log_EB)**(0.5)))
dif_relativa_inf = math.exp(- rmse_test * ecdf_neg_U3_B - t_student_bicaudal_90 * STD_test / (len(y_final_log_EB)**(0.5)))

print("Semiamplitude Superior do IC 90 U3 (Cenário B): %0.4f" % (dif_relativa_sup - 1))
print("Semiamplitude Inferior do IC 90 U3 (Cenário B): %0.4f" % (dif_relativa_inf - 1))

## Interpretabilidade dos Modelos *SGDRegressor* (lineares)

### Cenário A

#### U1

In [None]:
X_test_log_N1.columns

In [None]:
# Valores Shapley - SHAP (SHapley Additive exPlanations) U1 (Cenário A)

explainer = shap.LinearExplainer(nn_log_A_N1, X_train_log_N1)
shap_values = explainer.shap_values(X_test_log_N1)
shap.summary_plot(shap_values, features=X_test_log_N1, feature_names=['Capital UF', 'IDHM', 'Terreno', 'CUB * Área Construída', 'Ln(Área do Terreno)',
       'Vocação Comercial', 'Vocação Institucional', 'Vocação Misto',
       'Vocação Residencial'],plot_size=(14.0,8.0),title=' Valores de Shapley Modelo SGDRegressor Linear U1 (Cenário A)')

#### U2

In [None]:
X_test_log_N2.columns

In [None]:
# Valores Shapley - SHAP (SHapley Additive exPlanations) N3U (Cenário A)

explainer = shap.LinearExplainer(nn_log_A_N2, X_train_log_N2)
shap_values = explainer.shap_values(X_test_log_N2)
shap.summary_plot(shap_values, features=X_test_log_N3, feature_names=['Capital UF', 'IDHM', 'Terreno', 'Idade Aparente', 'CUB * Área Construída', 'Ln(Área do Terreno)',
       'Vocação Comercial', 'Vocação Institucional', 'Vocação Misto',
       'Vocação Residencial'],plot_size=(14.0,8.0),title=' Valores de Shapley Modelo SGDRegressor Linear U2 (Cenário A)')

#### U3

In [None]:
X_test_log_N3.columns

In [None]:
# Valores Shapley - SHAP (SHapley Additive exPlanations) N3U (Cenário A)

explainer = shap.LinearExplainer(nn_log_A_N3, X_train_log_N3)
shap_values = explainer.shap_values(X_test_log_N3)
shap.summary_plot(shap_values, features=X_test_log_N3, feature_names=['Capital UF', 'IDHM', 'Terreno', 'Idade Aparente',
       'Renda Domiciliar AP (IBGE)', 'Qtd Equipamentos (API Google)',
       'Qtd Shopping Centers (API Google)', 'Qtd Estabelecimentos (API Google)', 'CUB * Área Construída', 'Ln(Área do Terreno)',
       'Vocação Comercial', 'Vocação Institucional', 'Vocação Misto',
       'Vocação Residencial'],plot_size=(14.0,8.0),title=' Valores de Shapley Modelo SGDRegressor Linear U3 (Cenário A)')

### Cenário B

#### U1

In [None]:
X_testeEB_N1.columns

In [None]:
# Valores Shapley - SHAP (SHapley Additive exPlanations) N3U (Cenário B)

explainer = shap.LinearExplainer(sgd_B_U1, X_trainB_N1)
shap_values = explainer.shap_values(X_testeEB_N1)
shap.summary_plot(shap_values, features=X_testeEB_N1, feature_names=['Capital UF', 'IDHM', 'Terreno', 'CUB * Área Construída', 'Ln(Área do Terreno)',
       'Vocação Comercial', 'Vocação Institucional', 'Vocação Misto',
       'Vocação Residencial'],plot_size=(14.0,8.0),title=' Valores de Shapley Modelo SGDRegressor Linear U1 (Cenário B)')

#### U2

In [None]:
X_testeEB_N2.columns

In [None]:
# Valores Shapley - SHAP (SHapley Additive exPlanations) N3U (Cenário B)

explainer = shap.LinearExplainer(sgd_B_U2, X_trainB_N2)
shap_values = explainer.shap_values(X_testeEB_N2)
shap.summary_plot(shap_values, features=X_testeEB_N2, feature_names=['Capital UF', 'IDHM', 'Terreno', 'Idade Aparente', 'CUB * Área Construída', 'Ln(Área do Terreno)',
       'Vocação Comercial', 'Vocação Institucional', 'Vocação Misto',
       'Vocação Residencial'],plot_size=(14.0,8.0),title=' Valores de Shapley Modelo SGDRegressor Linear U2 (Cenário B)')

#### U3

In [None]:
X_testeEB_N3.columns

In [None]:
# Valores Shapley - SHAP (SHapley Additive exPlanations) N3U (Cenário B)

explainer = shap.LinearExplainer(sgd_B_U3, X_trainB_N3)
shap_values = explainer.shap_values(X_testeEB_N3)
shap.summary_plot(shap_values, features=X_testeEB_N3, feature_names=['Capital UF', 'IDHM', 'Terreno', 'Idade Aparente',
       'Renda Domiciliar AP (IBGE)', 'Qtd Equipamentos (API Google)',
       'Qtd Shopping Centers (API Google)', 'Qtd Estabelecimentos (API Google)', 'CUB * Área Construída', 'Ln(Área do Terreno)',
       'Vocação Comercial', 'Vocação Institucional', 'Vocação Misto',
       'Vocação Residencial'],plot_size=(14.0,8.0),title=' Valores de Shapley Modelo SGDRegressor Linear U3 (Cenário B)')