# Project 10 - Parammeter tuning

# Step 1 - Libraries

In [2]:
import pandas as pd
import torch.nn as nn        
from skorch import NeuralNetRegressor
import torch
from sklearn.model_selection import cross_val_score
import time
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
torch.__version__
from sklearn.model_selection import GridSearchCV


# Step 2 - Data

In [3]:
torch.manual_seed(123)
df = pd.read_csv('Datasets/autos.csv', encoding='ISO-8859-1')

df = df.drop('dateCrawled', axis = 1)
df = df.drop('dateCreated', axis = 1)
df = df.drop('nrOfPictures', axis = 1)
df = df.drop('postalCode', axis = 1)
df = df.drop('lastSeen', axis = 1)
df = df.drop('name', axis = 1)
df = df.drop('seller', axis = 1)
df = df.drop('offerType', axis = 1)

df = df[df.price > 10]
df = df.loc[df.price < 350000]


values = {'vehicleType': 'limousine', 'gearbox': 'manuell',
           'model': 'golf', 'fuelType': 'benzin',
           'notRepairedDamage': 'nein'}
df = df.fillna(value = values)


forecasters = df.iloc[:, 1:13].values
real_price = df.iloc[:, 0].values.reshape(-1, 1)

onehotencoder = ColumnTransformer(transformers = [("OneHot", OneHotEncoder(), [0,1,3,5,8,9,10])], remainder = 'passthrough')
forecasters = onehotencoder.fit_transform(forecasters).toarray()


In [4]:
forecasters = forecasters.astype('float32')
real_price = real_price.astype('float32')

# Step 3 - Model

In [5]:
class torch_regressor(nn.Module):
    def __init__(self):
        super().__init__()

        self.dense0 = nn.Linear(316, 158)
        self.dense1 = nn.Linear(158, 158)
        self.dense2 = nn.Linear(158, 1)
        self.activation = nn.ReLU()

    def forward(self, X):
        X = self.dense0(X)
        X = self.activation(X)
        X = self.dense1(X)
        X = self.activation(X)
        X = self.dense2(X)

        return X
    
    

In [6]:
sklearn_regressor = NeuralNetRegressor(module = torch_regressor, 
                                       criterion = torch.nn.L1Loss, 
                                       optimizer = torch.optim.Adam,
                                       max_epochs = 5,
                                       batch_size = 300,
                                       train_split = False)

# Step 4 - Tuning

In [10]:
params = {'criterion': [torch.nn.MSELoss, torch.nn.L1Loss, torch.nn.SmoothL1Loss]}
grid_search = GridSearchCV(estimator = sklearn_regressor,
                           param_grid = params,                           
                           cv = 5)

In [11]:
grid_search = grid_search.fit(forecasters, real_price)

  epoch     train_loss      dur
-------  -------------  -------
      1  [36m62427937.5376[0m  17.9123
      2  [36m44466315.4628[0m  17.1412
      3  [36m39284693.3322[0m  15.6215
      4  [36m38200367.0622[0m  16.9720
      5  [36m37677253.7721[0m  19.2151
  epoch     train_loss      dur
-------  -------------  -------
      1  [36m60002797.9192[0m  13.3847
      2  [36m43165346.1270[0m  12.1271
      3  [36m38161119.5439[0m  11.8339
      4  38624869.4058  11.3734
      5  [36m36536506.4313[0m  10.7214
  epoch     train_loss     dur
-------  -------------  ------
      1  [36m63011249.6521[0m  9.7365
      2  [36m46249467.3639[0m  10.2188
      3  [36m42884953.6316[0m  10.6032
      4  [36m42311073.9710[0m  9.9232
      5  [36m36870809.0721[0m  10.3619
  epoch     train_loss      dur
-------  -------------  -------
      1  [36m62175218.9911[0m  10.0849
      2  [36m44726153.2354[0m  9.9349
      3  [36m38401668.8965[0m  10.0690
      4  [36m371111

In [12]:
best_params = grid_search.best_params_
best_results = grid_search.best_score_

print(best_params)
print(best_results)

{'criterion': <class 'torch.nn.modules.loss.SmoothL1Loss'>}
0.5225125277428214
