# Regressao - Base Automóveis

Base de dados: https://archive.ics.uci.edu/dataset/53/iris

## Importando bibliotecas

In [5]:
!pip install -q tensorflow==2.16.1

In [6]:
# Importacao desta lib para desativar erro no TensorFlow
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [7]:
import pandas as pd
import tensorflow as tf
import sklearn

In [8]:
pd.__version__,tf.__version__,sklearn.__version__

('2.2.2', '2.16.1', '1.4.2')

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

## Importando base de dados

In [59]:
base = pd.read_csv('autos.csv', encoding='ISO-8859-1')

In [60]:
base.head(5)

Unnamed: 0,dateCrawled,name,seller,offerType,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage,dateCreated,nrOfPictures,postalCode,lastSeen
0,2016-03-24 11:52:17,Golf_3_1.6,privat,Angebot,480,test,,1993,manuell,0,golf,150000,0,benzin,volkswagen,,2016-03-24 00:00:00,0,70435,2016-04-07 03:16:57
1,2016-03-24 10:58:45,A5_Sportback_2.7_Tdi,privat,Angebot,18300,test,coupe,2011,manuell,190,,125000,5,diesel,audi,ja,2016-03-24 00:00:00,0,66954,2016-04-07 01:46:50
2,2016-03-14 12:52:21,"Jeep_Grand_Cherokee_""Overland""",privat,Angebot,9800,test,suv,2004,automatik,163,grand,125000,8,diesel,jeep,,2016-03-14 00:00:00,0,90480,2016-04-05 12:47:46
3,2016-03-17 16:54:04,GOLF_4_1_4__3TÜRER,privat,Angebot,1500,test,kleinwagen,2001,manuell,75,golf,150000,6,benzin,volkswagen,nein,2016-03-17 00:00:00,0,91074,2016-03-17 17:40:17
4,2016-03-31 17:25:20,Skoda_Fabia_1.4_TDI_PD_Classic,privat,Angebot,3600,test,kleinwagen,2008,manuell,69,fabia,90000,7,diesel,skoda,nein,2016-03-31 00:00:00,0,60437,2016-04-06 10:17:21


## Tratando base de dados

### Apagando dados não relevantes

In [61]:
base = base.drop('dateCrawled',axis=1)
base = base.drop('dateCreated',axis=1)
base = base.drop('nrOfPictures',axis=1)
base = base.drop('lastSeen',axis=1)
base = base.drop('postalCode',axis=1)

In [63]:
base.head(5)

Unnamed: 0,name,seller,offerType,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
0,Golf_3_1.6,privat,Angebot,480,test,,1993,manuell,0,golf,150000,0,benzin,volkswagen,
1,A5_Sportback_2.7_Tdi,privat,Angebot,18300,test,coupe,2011,manuell,190,,125000,5,diesel,audi,ja
2,"Jeep_Grand_Cherokee_""Overland""",privat,Angebot,9800,test,suv,2004,automatik,163,grand,125000,8,diesel,jeep,
3,GOLF_4_1_4__3TÜRER,privat,Angebot,1500,test,kleinwagen,2001,manuell,75,golf,150000,6,benzin,volkswagen,nein
4,Skoda_Fabia_1.4_TDI_PD_Classic,privat,Angebot,3600,test,kleinwagen,2008,manuell,69,fabia,90000,7,diesel,skoda,nein


In [64]:
base.shape

(371528, 15)

In [65]:
base['name'].value_counts()

name
Ford_Fiesta                                              657
BMW_318i                                                 627
Opel_Corsa                                               622
Volkswagen_Golf_1.4                                      603
BMW_316i                                                 523
                                                        ... 
Audi_A4_Avant_Klima_Gruene_Plakette_TÜV_&AU_NEU_XENON      1
Renault_clio_in_gold_450VB_!!                              1
Fiat_Doblo_1.6_Multijet                                    1
Renault_Laguna_1                                           1
BMW_M135i_vollausgestattet_NP_52.720____Euro               1
Name: count, Length: 233531, dtype: int64

Como há 233531 itens com total de 1 no name, podemos pensar que este name seja o titulo do anuncio, e sendo assim, o algoritmo não irá se adaptar bem. Podemos excluir esta coluna, e se referenciar à coluna Brand

In [66]:
base = base.drop('name',axis=1)

In [67]:
base['seller'].value_counts()

seller
privat        371525
gewerblich         3
Name: count, dtype: int64

Como praticamente quase todos registros são vendedores privados, esta coluna é irrelevante para o algoritmo

In [68]:
base = base.drop('seller',axis=1)

In [69]:
base['offerType'].value_counts()

offerType
Angebot    371516
Gesuch         12
Name: count, dtype: int64

Como praticamente quase todos registros são vendedores privados, esta coluna é irrelevante para o algoritmo

In [70]:
base = base.drop('offerType',axis=1)

In [71]:
base

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
0,480,test,,1993,manuell,0,golf,150000,0,benzin,volkswagen,
1,18300,test,coupe,2011,manuell,190,,125000,5,diesel,audi,ja
2,9800,test,suv,2004,automatik,163,grand,125000,8,diesel,jeep,
3,1500,test,kleinwagen,2001,manuell,75,golf,150000,6,benzin,volkswagen,nein
4,3600,test,kleinwagen,2008,manuell,69,fabia,90000,7,diesel,skoda,nein
...,...,...,...,...,...,...,...,...,...,...,...,...
371523,2200,test,,2005,,0,,20000,1,,sonstige_autos,
371524,1199,test,cabrio,2000,automatik,101,fortwo,125000,3,benzin,smart,nein
371525,9200,test,bus,1996,manuell,102,transporter,150000,3,diesel,volkswagen,nein
371526,3400,test,kombi,2002,manuell,100,golf,150000,6,diesel,volkswagen,


### Analise de valores inconsistentes

In [72]:
base.loc[base['price'] <= 10]

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
7,0,test,limousine,1980,manuell,50,andere,40000,7,benzin,volkswagen,nein
40,0,test,,1990,,0,corsa,150000,1,benzin,opel,
60,1,control,suv,1994,manuell,286,,150000,11,,sonstige_autos,
91,1,control,limousine,1995,manuell,113,e_klasse,150000,4,diesel,mercedes_benz,nein
115,0,test,,2017,manuell,0,golf,5000,12,benzin,volkswagen,
...,...,...,...,...,...,...,...,...,...,...,...,...
371356,0,control,,2000,manuell,65,corsa,150000,0,,opel,ja
371392,0,test,kleinwagen,2002,manuell,60,fiesta,150000,3,benzin,ford,
371402,0,control,kleinwagen,1999,manuell,53,swift,150000,3,benzin,suzuki,
371431,0,control,kleinwagen,1999,manuell,37,arosa,150000,7,benzin,seat,ja


Os prices acima podem ter sido extraidos de forma equivocada, logo podemos substituir estes registros pela média de prices geral. **Porém** como são mais de 12k de registros, isso pode não ser bom, o ideal é deleta-los. **Valores abaixo de 10**

In [73]:
base['price'].mean()

17295.14186548524

In [74]:
base.shape

(371528, 12)

In [75]:
base = base[base['price'] > 10]

In [76]:
base.shape

(359410, 12)

In [77]:
base.loc[base['price'] > 350000]

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
1846,579000,control,coupe,1980,manuell,277,andere,20000,12,benzin,bmw,nein
10649,420000,control,coupe,2004,manuell,483,911,50000,4,benzin,porsche,nein
14663,11111111,control,coupe,2003,manuell,64,polo,150000,2,benzin,volkswagen,
16889,1000000,control,kombi,1998,,0,mondeo,150000,0,benzin,ford,ja
20143,1250000,test,coupe,2016,manuell,500,911,5000,3,benzin,porsche,nein
...,...,...,...,...,...,...,...,...,...,...,...,...
364171,3890000,test,coupe,2006,,799,,5000,7,,sonstige_autos,nein
365461,599000,control,coupe,1980,manuell,377,andere,5000,3,benzin,bmw,nein
366653,99999999,control,cabrio,1996,manuell,192,3er,150000,0,,bmw,
366861,3895000,test,coupe,2006,,799,,5000,4,benzin,sonstige_autos,nein


Os prices acima podem ter sido extraidos de forma equivocada, logo podemos substituir estes registros pela média de prices geral. **Porém** como são mais de 12k de registros, isso pode não ser bom, o ideal é deleta-los. **Valores acima de 350k**

In [78]:
base = base.loc[base['price'] < 350000]

In [79]:
base.shape

(359291, 12)

### Valores faltantes

In [80]:
base.loc[pd.isnull(base['vehicleType'])]

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
0,480,test,,1993,manuell,0,golf,150000,0,benzin,volkswagen,
16,300,test,,2016,,60,polo,150000,0,benzin,volkswagen,
22,2900,test,,2018,manuell,90,meriva,150000,5,benzin,opel,nein
26,5555,control,,2017,manuell,125,c4,125000,4,,citroen,nein
31,899,control,,2016,manuell,60,clio,150000,6,benzin,renault,
...,...,...,...,...,...,...,...,...,...,...,...,...
371495,180,control,,1995,,0,,125000,3,benzin,opel,
371504,2600,control,,2005,automatik,0,c_klasse,150000,9,,mercedes_benz,
371509,1900,test,,2000,manuell,110,,150000,7,,volkswagen,nein
371519,5250,control,,2016,automatik,150,159,150000,12,,alfa_romeo,nein


Em valores números faltantes, é normal ser substituido os faltantes pela média. No caso de dados categóricos, do vehicleType, pode-se preencher os NaN com a moda

In [81]:
base['vehicleType'].value_counts()

vehicleType
limousine     93614
kleinwagen    78014
kombi         65921
bus           29699
cabrio        22509
coupe         18386
suv           14477
andere         3125
Name: count, dtype: int64

In [82]:
base['vehicleType'].mode()

0    limousine
Name: vehicleType, dtype: object

In [83]:
base.loc[pd.isnull(base['gearbox'])]

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
15,450,test,kleinwagen,1910,,0,ka,5000,0,benzin,ford,
16,300,test,,2016,,60,polo,150000,0,benzin,volkswagen,
32,245,test,limousine,1994,,0,golf,150000,2,benzin,volkswagen,nein
37,1500,test,,2016,,0,kangoo,150000,1,diesel,renault,nein
70,1200,test,coupe,2001,,0,astra,150000,0,,opel,
...,...,...,...,...,...,...,...,...,...,...,...,...
371443,3300,control,kombi,2006,,0,touran,150000,7,diesel,volkswagen,
371460,3500,control,,1995,,0,polo,150000,0,,volkswagen,
371486,350,control,kleinwagen,1996,,65,punto,150000,0,,fiat,
371495,180,control,,1995,,0,,125000,3,benzin,opel,


In [84]:
base['gearbox'].value_counts()

gearbox
manuell      266547
automatik     75508
Name: count, dtype: int64

In [85]:
base.loc[pd.isnull(base['model'])]

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
1,18300,test,coupe,2011,manuell,190,,125000,5,diesel,audi,ja
83,350,control,kleinwagen,1997,manuell,54,,150000,3,,fiat,ja
139,1450,control,limousine,1992,manuell,136,,150000,0,,audi,nein
156,6799,control,kleinwagen,2009,,60,,20000,5,benzin,volkswagen,nein
165,500,control,kleinwagen,1999,manuell,0,,150000,0,benzin,renault,nein
...,...,...,...,...,...,...,...,...,...,...,...,...
371399,560,control,kleinwagen,2001,automatik,170,,90000,0,benzin,fiat,ja
371476,9400,control,kombi,2007,manuell,200,,150000,4,diesel,sonstige_autos,ja
371495,180,control,,1995,,0,,125000,3,benzin,opel,
371509,1900,test,,2000,manuell,110,,150000,7,,volkswagen,nein


In [86]:
base['model'].value_counts()

model
golf               28989
andere             25560
3er                19905
polo               12604
corsa              12149
                   ...  
serie_2                8
rangerover             6
serie_3                3
serie_1                1
discovery_sport        1
Name: count, Length: 251, dtype: int64

In [87]:
base.loc[pd.isnull(base['fuelType'])]

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
9,999,test,kleinwagen,1998,manuell,101,golf,150000,0,,volkswagen,
13,2500,control,kombi,2004,manuell,131,passat,150000,2,,volkswagen,nein
26,5555,control,,2017,manuell,125,c4,125000,4,,citroen,nein
36,1600,control,andere,1991,manuell,75,kadett,70000,0,,opel,
41,7500,control,limousine,2002,automatik,306,e_klasse,150000,4,,mercedes_benz,
...,...,...,...,...,...,...,...,...,...,...,...,...
371496,3850,test,cabrio,2006,manuell,108,2_reihe,125000,2,,peugeot,nein
371504,2600,control,,2005,automatik,0,c_klasse,150000,9,,mercedes_benz,
371509,1900,test,,2000,manuell,110,,150000,7,,volkswagen,nein
371519,5250,control,,2016,automatik,150,159,150000,12,,alfa_romeo,nein


In [88]:
base['fuelType'].value_counts()

fuelType
benzin     217582
diesel     106002
lpg          5222
cng           557
hybrid        271
andere        165
elektro       101
Name: count, dtype: int64

In [89]:
base.loc[pd.isnull(base['notRepairedDamage'])]

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
0,480,test,,1993,manuell,0,golf,150000,0,benzin,volkswagen,
2,9800,test,suv,2004,automatik,163,grand,125000,8,diesel,jeep,
8,14500,control,bus,2014,manuell,125,c_max,30000,8,benzin,ford,
9,999,test,kleinwagen,1998,manuell,101,golf,150000,0,,volkswagen,
12,999,control,kombi,1995,manuell,115,passat,150000,11,benzin,volkswagen,
...,...,...,...,...,...,...,...,...,...,...,...,...
371507,5999,test,kombi,2005,manuell,140,a4,150000,4,diesel,audi,
371514,999,control,cabrio,2000,manuell,95,megane,150000,4,benzin,renault,
371515,1690,test,kombi,2004,manuell,55,fabia,150000,4,benzin,skoda,
371523,2200,test,,2005,,0,,20000,1,,sonstige_autos,


In [90]:
base['notRepairedDamage'].value_counts()

notRepairedDamage
nein    259301
ja       34004
Name: count, dtype: int64

In [91]:
valores = {
    'vehicleType': 'limousine',
    'gearbox':'manuell',
    'model':'golf',
    'fuelType':'benzin',
    'notRepairedDamage':'nein'
}

In [92]:
base = base.fillna(value = valores)

In [93]:
base

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
0,480,test,limousine,1993,manuell,0,golf,150000,0,benzin,volkswagen,nein
1,18300,test,coupe,2011,manuell,190,golf,125000,5,diesel,audi,ja
2,9800,test,suv,2004,automatik,163,grand,125000,8,diesel,jeep,nein
3,1500,test,kleinwagen,2001,manuell,75,golf,150000,6,benzin,volkswagen,nein
4,3600,test,kleinwagen,2008,manuell,69,fabia,90000,7,diesel,skoda,nein
...,...,...,...,...,...,...,...,...,...,...,...,...
371523,2200,test,limousine,2005,manuell,0,golf,20000,1,benzin,sonstige_autos,nein
371524,1199,test,cabrio,2000,automatik,101,fortwo,125000,3,benzin,smart,nein
371525,9200,test,bus,1996,manuell,102,transporter,150000,3,diesel,volkswagen,nein
371526,3400,test,kombi,2002,manuell,100,golf,150000,6,diesel,volkswagen,nein


In [94]:
base.isnull()

Unnamed: 0,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage
0,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
371523,False,False,False,False,False,False,False,False,False,False,False,False
371524,False,False,False,False,False,False,False,False,False,False,False,False
371525,False,False,False,False,False,False,False,False,False,False,False,False
371526,False,False,False,False,False,False,False,False,False,False,False,False


In [95]:
base.isnull().sum()

price                  0
abtest                 0
vehicleType            0
yearOfRegistration     0
gearbox                0
powerPS                0
model                  0
kilometer              0
monthOfRegistration    0
fuelType               0
brand                  0
notRepairedDamage      0
dtype: int64

In [96]:
base.columns

Index(['price', 'abtest', 'vehicleType', 'yearOfRegistration', 'gearbox',
       'powerPS', 'model', 'kilometer', 'monthOfRegistration', 'fuelType',
       'brand', 'notRepairedDamage'],
      dtype='object')

In [99]:
X = base.iloc[:,1:12].values
X

array([['test', 'limousine', 1993, ..., 'benzin', 'volkswagen', 'nein'],
       ['test', 'coupe', 2011, ..., 'diesel', 'audi', 'ja'],
       ['test', 'suv', 2004, ..., 'diesel', 'jeep', 'nein'],
       ...,
       ['test', 'bus', 1996, ..., 'diesel', 'volkswagen', 'nein'],
       ['test', 'kombi', 2002, ..., 'diesel', 'volkswagen', 'nein'],
       ['control', 'limousine', 2013, ..., 'benzin', 'bmw', 'nein']],
      dtype=object)

In [102]:
y = base.iloc[:,0].values
y

array([  480, 18300,  9800, ...,  9200,  3400, 28990])

### Atributos categóricos

In [103]:
base['brand'].value_counts()

brand
volkswagen        76755
bmw               38982
opel              38557
mercedes_benz     34498
audi              31884
ford              24761
renault           17367
peugeot           10777
fiat               9339
seat               6815
skoda              5572
mazda              5529
smart              5159
citroen            5038
nissan             4893
toyota             4616
hyundai            3585
mini               3354
sonstige_autos     3331
volvo              3229
mitsubishi         2956
honda              2726
kia                2494
suzuki             2274
alfa_romeo         2250
porsche            2154
chevrolet          1791
chrysler           1373
dacia               889
jeep                784
daihatsu            774
land_rover          759
subaru              746
jaguar              603
daewoo              530
saab                516
trabant             489
lancia              467
rover               462
lada                213
Name: count, dtype: int64

In [104]:
onehotencoder = ColumnTransformer(transformers=[("OneHot",OneHotEncoder(),[0,1,3,5,8,9,10])],remainder='passthrough') # Neste array são passados os campos categoricos

In [105]:
X = onehotencoder.fit_transform(X).toarray()
X

array([[0.00e+00, 1.00e+00, 0.00e+00, ..., 0.00e+00, 1.50e+05, 0.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, ..., 1.90e+02, 1.25e+05, 5.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, ..., 1.63e+02, 1.25e+05, 8.00e+00],
       ...,
       [0.00e+00, 1.00e+00, 0.00e+00, ..., 1.02e+02, 1.50e+05, 3.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, ..., 1.00e+02, 1.50e+05, 6.00e+00],
       [1.00e+00, 0.00e+00, 0.00e+00, ..., 3.20e+02, 5.00e+04, 8.00e+00]])

In [106]:
base.shape

(359291, 12)

In [107]:
X.shape

(359291, 316)

## Estrutura da rede neural

In [108]:
# Para calcular a quantidade de neuronios na camada dense
# é a soma de entradas + saidas / 2
(316 + 1) / 2 

158.5

In [109]:
regressor = Sequential([
    tf.keras.layers.InputLayer(shape=(316,)),
    tf.keras.layers.Dense(units = 158, activation = 'relu'),
    tf.keras.layers.Dense(units = 158, activation = 'relu'),
    tf.keras.layers.Dense(units = 1, activation = 'linear'),
])

In [110]:
regressor.summary()

In [111]:
regressor.compile(loss='mean_absolute_error', optimizer = 'adam', metrics = ['mean_absolute_error'])

In [112]:
regressor.fit(X,y, batch_size = 300, epochs=100)

Epoch 1/100


2024-11-25 19:04:07.929526: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 454143824 exceeds 10% of free system memory.


[1m1198/1198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 4282.7051 - mean_absolute_error: 4282.7051
Epoch 2/100
[1m1198/1198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 3436.4104 - mean_absolute_error: 3436.4104
Epoch 3/100
[1m1198/1198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 3207.3306 - mean_absolute_error: 3207.3303
Epoch 4/100
[1m1198/1198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 3022.9973 - mean_absolute_error: 3022.9973
Epoch 5/100
[1m1198/1198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 2930.4392 - mean_absolute_error: 2930.4392
Epoch 6/100
[1m1198/1198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 2836.2195 - mean_absolute_error: 2836.2195
Epoch 7/100
[1m1198/1198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 2783.9683 - mean_absolute_error: 2783.9683
Epoch 8/100
[1m1198/1198[0m [

<keras.src.callbacks.history.History at 0x74c483f83770>

In [113]:
X.shape

(359291, 316)

In [114]:
previsoes = regressor.predict(X)

[1m  135/11228[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8s[0m 759us/step

2024-11-25 19:11:10.850909: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 454143824 exceeds 10% of free system memory.


[1m11228/11228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 762us/step


In [115]:
previsoes

array([[ 1193.2343],
       [11197.414 ],
       [13533.362 ],
       ...,
       [ 6754.1504],
       [ 3316.8958],
       [28320.107 ]], dtype=float32)

In [116]:
y

array([  480, 18300,  9800, ...,  9200,  3400, 28990])

In [117]:
y.mean()

5916.833945186492

In [118]:
previsoes.mean()

5841.773

## Validacao Cruzada