# Project 9: Cross Validation

# Step 1 - Libraries

In [1]:
!pip install skorch



In [2]:
import pandas as pd
import torch.nn as nn        
from skorch import NeuralNetRegressor
import torch
from sklearn.model_selection import cross_val_score
import time
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
torch.__version__

'2.3.0+cu121'

# Step 2 - Database

In [3]:
torch.manual_seed(123)
df = pd.read_csv('Datasets/autos.csv', encoding='ISO-8859-1')

df = df.drop('dateCrawled', axis = 1)
df = df.drop('dateCreated', axis = 1)
df = df.drop('nrOfPictures', axis = 1)
df = df.drop('postalCode', axis = 1)
df = df.drop('lastSeen', axis = 1)
df = df.drop('name', axis = 1)
df = df.drop('seller', axis = 1)
df = df.drop('offerType', axis = 1)

df = df[df.price > 10]
df = df.loc[df.price < 350000]


values = {'vehicleType': 'limousine', 'gearbox': 'manuell',
           'model': 'golf', 'fuelType': 'benzin',
           'notRepairedDamage': 'nein'}
df = df.fillna(value = values)


forecasters = df.iloc[:, 1:13].values
real_price = df.iloc[:, 0].values.reshape(-1, 1)

onehotencoder = ColumnTransformer(transformers = [("OneHot", OneHotEncoder(), [0,1,3,5,8,9,10])], remainder = 'passthrough')
forecasters = onehotencoder.fit_transform(forecasters).toarray()


In [4]:
forecasters = forecasters.astype('float32')
real_price = real_price.astype('float32')

# Step 3 - Model building

In [5]:
class torch_regressor(nn.Module):
    def __init__(self):
        super().__init__()

        self.dense0 = nn.Linear(316, 158)
        self.dense1 = nn.Linear(158, 158)
        self.dense2 = nn.Linear(158, 1)
        self.activation = nn.ReLU()

    def forward(self, X):
        X = self.dense0(X)
        X = self.activation(X)
        X = self.dense1(X)
        X = self.activation(X)
        X = self.dense2(X)

        return X
    
    

In [6]:
sklearn_regressor = NeuralNetRegressor(module = torch_regressor, 
                                       criterion = torch.nn.L1Loss, 
                                       optimizer = torch.optim.Adam,
                                       max_epochs = 5,
                                       batch_size = 300,
                                       train_split = False)

In [7]:
results = cross_val_score(sklearn_regressor, forecasters, real_price, cv = 5, 
                             scoring = 'neg_mean_absolute_error')

  epoch    train_loss      dur
-------  ------------  -------
      1     [36m3597.2495[0m  13.5989
      2     [36m2986.3451[0m  11.2029
      3     [36m2867.4092[0m  10.6106
      4     [36m2812.8217[0m  9.1390
      5     [36m2762.9784[0m  8.9674
  epoch    train_loss      dur
-------  ------------  -------
      1     [36m3611.5526[0m  12.9866
      2     [36m3034.2670[0m  13.9211
      3     [36m2919.6809[0m  12.6361
      4     [36m2805.2190[0m  11.4106
      5     [36m2766.0469[0m  11.1422
  epoch    train_loss     dur
-------  ------------  ------
      1     [36m3584.1112[0m  9.0174
      2     [36m3004.4322[0m  12.6906
      3     [36m2887.9448[0m  11.9188
      4     [36m2805.6345[0m  11.9294
      5     [36m2803.3715[0m  17.2611
  epoch    train_loss      dur
-------  ------------  -------
      1     [36m3605.8809[0m  15.1826
      2     [36m3002.0815[0m  13.8799
      3     [36m2882.4005[0m  9.3142
      4     [36m2837.8237[0m  13.740

In [9]:
mean = results.mean()
std = results.std()

print('Mean:', mean)
print('Std:', std)

Mean: -2891.467919921875
Std: 128.18281714206665
