# Validação Cruzada

In [7]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import cross_val_score

In [5]:
#carregando o dataset

base = pd.read_csv('autos.csv', encoding='ISO-8859-1')

#removendo algumas colunas que não serão úteis

base = base.drop('dateCrawled', axis=1)
base = base.drop('dateCreated', axis=1)
base = base.drop('nrOfPictures', axis=1)
base = base.drop('postalCode', axis=1)
base = base.drop('lastSeen', axis=1)

#removendo outras colunas que possuem uma variabilidade muito grande

base['name'].value_counts()
base = base.drop('name', axis=1)

base['seller'].value_counts()
base = base.drop('seller', axis=1)

base['offerType'].value_counts()
base = base.drop('offerType', axis=1)

#removendo os valores inconsistentes

i1 = base.loc[base.price<=10]
base = base[base.price>10]

i2 = base.loc[base.price>350000]
base = base[base.price<350000]

#tratamento de dados ausentes

valores = {
    'vehicleType': 'limousine',
    'gearbox': 'manuell',
    'model': 'golf',
    'fuelType': 'benzine',
    'notRepairedDamage': 'nein'
}

base = base.fillna(value=valores)

previsores = base.iloc[:, 1:13].values
preco_real = base.iloc[:, 0].values

#tranformando os dados categóricos em dados numéricos

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

labelencoder_previsores = LabelEncoder()
previsores[:, 0] = labelencoder_previsores.fit_transform(previsores[:, 0])
previsores[:, 1] = labelencoder_previsores.fit_transform(previsores[:, 1])
previsores[:, 3] = labelencoder_previsores.fit_transform(previsores[:, 3])
previsores[:, 5] = labelencoder_previsores.fit_transform(previsores[:, 5])
previsores[:, 8] = labelencoder_previsores.fit_transform(previsores[:, 8])
previsores[:, 9] = labelencoder_previsores.fit_transform(previsores[:, 9])
previsores[:, 10] = labelencoder_previsores.fit_transform(previsores[:, 10])

#transformando os valores utilizando o onehotencoder

encoder2 = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [0,1,3,5,8,9,10])],remainder='passthrough')
previsores = encoder2.fit_transform(previsores).toarray()

In [8]:
#função para criar a rede neural

def criarede():
    regressor = Sequential()
    regressor.add(Dense(units=158, activation='relu', input_dim=317))
    regressor.add(Dense(units=158, activation='relu'))
    regressor.add(Dense(units=1, activation='linear'))
    regressor.compile(loss='mean_absolute_error', optimizer='adam',
                    metrics=['mean_absolute_error'])
    
    return regressor

In [10]:
regressor = KerasRegressor(build_fn=criarede,
                           epochs=100,
                           batch_size=300)

resultados = cross_val_score(estimator=regressor,
                             X=previsores, y=preco_real,
                             cv=10, scoring='neg_mean_absolute_error')

media = resultados.mean()
desvio = resultados.std()

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4344.2285 - mean_absolute_error: 4344.2285
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3508.5220 - mean_absolute_error: 3508.5220
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3268.5852 - mean_absolute_error: 3268.5850
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3044.2051 - mean_absolute_error: 3044.2051
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2841.2356 - mean_absolute_error: 2841.2356
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2816.2729 - mean_absolute_error: 2816.2729
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2750.0164 - mean_absolute_error: 2750.0161
Epoch 8/100
[1m1078

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4312.5254 - mean_absolute_error: 4312.5254
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3467.6238 - mean_absolute_error: 3467.6238
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3297.1785 - mean_absolute_error: 3297.1785
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3033.8262 - mean_absolute_error: 3033.8262
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2935.7476 - mean_absolute_error: 2935.7476
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2844.3186 - mean_absolute_error: 2844.3186
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2779.6255 - mean_absolute_error: 2779.6255
Epoch 8/100
[1m1078

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4442.5757 - mean_absolute_error: 4442.5757
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3417.4138 - mean_absolute_error: 3417.4138
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3241.7913 - mean_absolute_error: 3241.7913
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3086.2305 - mean_absolute_error: 3086.2305
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2989.1616 - mean_absolute_error: 2989.1616
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2891.3154 - mean_absolute_error: 2891.3154
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2813.1006 - mean_absolute_error: 2813.1006
Epoch 8/100
[1m1078

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4350.6538 - mean_absolute_error: 4350.6538
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3423.9944 - mean_absolute_error: 3423.9944
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3068.6255 - mean_absolute_error: 3068.6255
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2848.1870 - mean_absolute_error: 2848.1870
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2808.8835 - mean_absolute_error: 2808.8835
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2771.6526 - mean_absolute_error: 2771.6526
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2728.2000 - mean_absolute_error: 2728.2000
Epoch 8/100
[1m1078

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4350.2153 - mean_absolute_error: 4350.2153
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3513.6946 - mean_absolute_error: 3513.6946
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3397.9802 - mean_absolute_error: 3397.9802
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3157.8557 - mean_absolute_error: 3157.8557
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2930.7834 - mean_absolute_error: 2930.7834
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2850.4817 - mean_absolute_error: 2850.4817
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2762.6201 - mean_absolute_error: 2762.6201
Epoch 8/100
[1m1078

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4378.6812 - mean_absolute_error: 4378.6812
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3496.9143 - mean_absolute_error: 3496.9141
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3218.3821 - mean_absolute_error: 3218.3818
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2969.0945 - mean_absolute_error: 2969.0945
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2860.0261 - mean_absolute_error: 2860.0261
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2785.9849 - mean_absolute_error: 2785.9849
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2738.1421 - mean_absolute_error: 2738.1421
Epoch 8/100
[1m1078

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4384.3867 - mean_absolute_error: 4384.3867
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3438.1030 - mean_absolute_error: 3438.1030
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3127.6729 - mean_absolute_error: 3127.6729
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2916.1829 - mean_absolute_error: 2916.1826
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2825.7979 - mean_absolute_error: 2825.7979
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2767.7417 - mean_absolute_error: 2767.7417
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2733.4807 - mean_absolute_error: 2733.4810
Epoch 8/100
[1m1078

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4261.8066 - mean_absolute_error: 4261.8066
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3454.1284 - mean_absolute_error: 3454.1284
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3239.2986 - mean_absolute_error: 3239.2986
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3037.1504 - mean_absolute_error: 3037.1504
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2884.8516 - mean_absolute_error: 2884.8516
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2798.7773 - mean_absolute_error: 2798.7773
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2765.4260 - mean_absolute_error: 2765.4263
Epoch 8/100
[1m1078

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4293.5303 - mean_absolute_error: 4293.5303
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3462.7312 - mean_absolute_error: 3462.7314
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3166.1777 - mean_absolute_error: 3166.1777
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2967.7485 - mean_absolute_error: 2967.7485
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2838.4443 - mean_absolute_error: 2838.4443
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2778.7136 - mean_absolute_error: 2778.7136
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2759.8826 - mean_absolute_error: 2759.8823
Epoch 8/100
[1m1078

  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4253.3867 - mean_absolute_error: 4253.3867
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3431.8967 - mean_absolute_error: 3431.8967
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 3123.4780 - mean_absolute_error: 3123.4780
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2929.9270 - mean_absolute_error: 2929.9270
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2818.4795 - mean_absolute_error: 2818.4795
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 2803.0984 - mean_absolute_error: 2803.0984
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2747.3621 - mean_absolute_error: 2747.3621
Epoch 8/100
[1m1078

In [11]:
print(media)
print(desvio)

-2248.447364606603
38.31267932721663
