## Single Output Regression - Autos Dataset

Dataset: https://www.kaggle.com/datasets/vfsousas/autos

In [1]:
import pandas as pd
import tensorflow as tf
import sklearn
import scikeras

In [2]:
pd.__version__, tf.__version__, sklearn.__version__, scikeras.__version__

('2.2.2', '2.17.0', '1.5.1', '0.13.0')

In [3]:
import time
from scikeras.wrappers import KerasRegressor
from tensorflow.keras import backend as k
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn import metrics

In [4]:
# to monitor how much time training is gonna take 
beginning  = time.time()
beginning

1725759793.890099

In [5]:
data = pd.read_csv('../data/autos/autos.csv', encoding='ISO-8859-1')

In [6]:
data = data.drop('dateCrawled', axis=1)
data = data.drop('dateCreated', axis=1)
data = data.drop('nrOfPictures', axis=1)
data = data.drop('postalCode', axis=1)
data = data.drop('lastSeen', axis=1)
data = data.drop('name', axis=1)
data = data.drop('seller', axis=1)
data = data.drop('offerType', axis=1)

In [7]:
data = data[data.price > 10]
data = data.loc[data.price < 350000]

In [8]:
values = {'vehicleType': 'limousine',
           'gearbox': 'manuell',
           'model': 'golf',
           'fuelType': 'benzin',
           'notRepairedDamage': 'nein'}
data = data.fillna(value=values)

In [9]:
# X: predictor variables
X = data.iloc[:, 1:12].values
y = data.iloc[:, 0].values

In [10]:
X, y

(array([['test', 'limousine', 1993, ..., 'benzin', 'volkswagen', 'nein'],
        ['test', 'coupe', 2011, ..., 'diesel', 'audi', 'ja'],
        ['test', 'suv', 2004, ..., 'diesel', 'jeep', 'nein'],
        ...,
        ['test', 'bus', 1996, ..., 'diesel', 'volkswagen', 'nein'],
        ['test', 'kombi', 2002, ..., 'diesel', 'volkswagen', 'nein'],
        ['control', 'limousine', 2013, ..., 'benzin', 'bmw', 'nein']],
       dtype=object),
 array([  480, 18300,  9800, ...,  9200,  3400, 28990], dtype=int64))

In [11]:
# preprocessing 
onehotencoder = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [0, 1, 3, 5, 8, 9, 10])], remainder='passthrough')
X = onehotencoder.fit_transform(X).toarray()
X.shape

(359291, 316)

In [12]:
def create_net():
    k.clear_session()
    regressor = Sequential([
        tf.keras.layers.InputLayer(input_shape=(316,)), 
        tf.keras.layers.Dense(units=158, activation='relu'),
        tf.keras.layers.Dense(units=158, activation='relu'),
        tf.keras.layers.Dense(units=1, activation='linear'),
    ])
    regressor.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
    return regressor

In [13]:
regressor = KerasRegressor(model = create_net, epochs=100, batch_size=300)

In [14]:
# the cv standard value is 10 but if you don't want it to take too much time to train then change it to 5

results = cross_val_score(estimator=regressor, X=X, y=y, cv=10, scoring='neg_mean_absolute_error')






Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4402.4399 - mean_absolute_error: 4402.4399
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3485.8892 - mean_absolute_error: 3485.8892
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3300.0037 - mean_absolute_error: 3300.0037
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3086.2073 - mean_absolute_error: 3086.2073
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2934.8198 - mean_absolute_error: 2934.8198
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2804.9893 - mean_absolute_error: 2804.9893
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2756.2888 - mean_absolute_error: 2756.2888
Epoch 8/100
[1m1078



Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4270.7344 - mean_absolute_error: 4270.7344
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3502.4102 - mean_absolute_error: 3502.4102
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3244.4670 - mean_absolute_error: 3244.4670
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3009.3579 - mean_absolute_error: 3009.3579
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2882.4214 - mean_absolute_error: 2882.4214
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2784.1729 - mean_absolute_error: 2784.1729
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2728.1995 - mean_absolute_error: 2728.1995
Epoch 8/100
[1m1078



Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4355.4648 - mean_absolute_error: 4355.4648
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3448.0132 - mean_absolute_error: 3448.0132
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3132.3596 - mean_absolute_error: 3132.3596
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2952.0339 - mean_absolute_error: 2952.0339
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2840.7241 - mean_absolute_error: 2840.7241
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2779.5537 - mean_absolute_error: 2779.5537
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2745.9189 - mean_absolute_error: 2745.9189
Epoch 8/100
[1m1078



Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4332.4326 - mean_absolute_error: 4332.4326
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3404.3601 - mean_absolute_error: 3404.3599
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3148.8748 - mean_absolute_error: 3148.8748
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3004.6926 - mean_absolute_error: 3004.6926
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2916.1545 - mean_absolute_error: 2916.1545
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2834.2439 - mean_absolute_error: 2834.2439
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2782.4780 - mean_absolute_error: 2782.4780
Epoch 8/100
[1m1078



Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4291.5181 - mean_absolute_error: 4291.5181
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3481.5969 - mean_absolute_error: 3481.5969
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3286.2310 - mean_absolute_error: 3286.2310
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2977.6655 - mean_absolute_error: 2977.6655
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2868.4443 - mean_absolute_error: 2868.4443
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2786.7429 - mean_absolute_error: 2786.7429
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2736.9622 - mean_absolute_error: 2736.9624
Epoch 8/100
[1m1078



Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4525.3657 - mean_absolute_error: 4525.3657
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3447.6252 - mean_absolute_error: 3447.6252
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3136.4424 - mean_absolute_error: 3136.4424
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2933.3513 - mean_absolute_error: 2933.3513
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2841.4810 - mean_absolute_error: 2841.4810
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2786.0300 - mean_absolute_error: 2786.0300
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2740.2561 - mean_absolute_error: 2740.2561
Epoch 8/100
[1m1078



Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4271.6055 - mean_absolute_error: 4271.6055
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3472.1064 - mean_absolute_error: 3472.1064
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3218.2156 - mean_absolute_error: 3218.2156
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2964.9663 - mean_absolute_error: 2964.9663
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2863.0852 - mean_absolute_error: 2863.0852
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2771.2156 - mean_absolute_error: 2771.2156
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2743.9648 - mean_absolute_error: 2743.9651
Epoch 8/100
[1m1078



Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4352.6577 - mean_absolute_error: 4352.6577
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3497.0081 - mean_absolute_error: 3497.0081
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3193.0449 - mean_absolute_error: 3193.0449
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2947.8586 - mean_absolute_error: 2947.8586
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2841.7617 - mean_absolute_error: 2841.7617
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2800.0339 - mean_absolute_error: 2800.0339
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2757.7349 - mean_absolute_error: 2757.7349
Epoch 8/100
[1m1078



Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4367.0615 - mean_absolute_error: 4367.0620
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3482.5513 - mean_absolute_error: 3482.5513
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3299.6760 - mean_absolute_error: 3299.6760
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3127.6899 - mean_absolute_error: 3127.6899
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2972.6631 - mean_absolute_error: 2972.6631
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2869.2947 - mean_absolute_error: 2869.2947
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2802.9753 - mean_absolute_error: 2802.9753
Epoch 8/100
[1m1078



Epoch 1/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4246.7285 - mean_absolute_error: 4246.7285
Epoch 2/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3404.8875 - mean_absolute_error: 3404.8875
Epoch 3/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3200.7798 - mean_absolute_error: 3200.7798
Epoch 4/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3038.5154 - mean_absolute_error: 3038.5154
Epoch 5/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2905.5063 - mean_absolute_error: 2905.5063
Epoch 6/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2839.7917 - mean_absolute_error: 2839.7917
Epoch 7/100
[1m1078/1078[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2787.2087 - mean_absolute_error: 2787.2087
Epoch 8/100
[1m1078

In [15]:
end = time.time()

In [16]:
# value in hour
(end - beginning)/60/60

0.5629446274704403

In [17]:
abs(results)

array([2257.31140453, 2238.62131299, 2288.52792696, 2294.02055023,
       2153.5439918 , 2269.15986366, 2215.93252983, 2320.64426387,
       2192.66446237, 2167.28911678])

In [18]:
abs(results.mean()), abs(results.std())

(2239.771542300928, 53.394239573946834)