# Train Multi Layer Perceptron

In [1]:
import salary
import numpy as np
from multi_layer_perceptron import Model, TensorTransformer, CustomNeuralNetRegressor
from sklearn.base import clone
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import KFold
import cloudpickle
from skopt import BayesSearchCV
import torch
from torch import nn, optim
import random
from skorch import dataset
from skorch.callbacks import EarlyStopping, LRScheduler, EpochScoring

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.manual_seed(42)
random.seed(42)

In [3]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
(X_train, y_train) = salary.get_train_dataset(include_extracted_salaries=True)

In [5]:
preprocessor = salary.get_preprocessor(DEVICE)
(train_size, num_features) = clone(preprocessor).fit_transform(X_train, y_train).shape
(train_size, num_features)

(32103, 3670)

## Train & Tune Model

In [6]:
y_train_tensor = torch.tensor(np.array(y_train).reshape(-1, 1), dtype=torch.float32)

model = make_pipeline(
    clone(preprocessor), 
    TensorTransformer(),
    BayesSearchCV(
        CustomNeuralNetRegressor(
            Model,
            max_epochs=100,
            torch_load_kwargs={'weights_only': True},
            criterion=nn.MSELoss,
            optimizer=optim.AdamW,
            iterator_train__shuffle=True,
            iterator_train__drop_last=True,
            train_split=dataset.ValidSplit(cv=5),
            callbacks=[
                EarlyStopping(patience=10, monitor='valid_loss', load_best=True),
                LRScheduler(policy=optim.lr_scheduler.ReduceLROnPlateau, patience=5, factor=0.5, monitor='valid_loss'),  # type: ignore
                EpochScoring(scoring='r2', on_train=False),
            ],
            device=DEVICE,
        ),
        # Comment to use tuned hyperparameters
        {
            'lambda1': [0.0001],
            'lr': [0.00447],
            'batch_size': [128],
            'module__num_hidden_layers': [3],
            'module__n_units_last': [256],
            'module__dropout_rate': [0.5],
        },
        # Uncomment to tune hyperparameters
        # { 
        #     'lambda1': (1e-4, 1e-1, 'log-uniform'),
        #     'lr': (1e-4, 1e-1, 'log-uniform'),
        #     'batch_size': [32, 64, 128, 256],
        #     'module__num_hidden_layers': [1, 2, 3, 4],
        #     'module__n_units_last': [16, 32, 64, 128, 256],
        #     'module__dropout_rate': (0.1, 0.5, 'uniform'),
        # },
        verbose=3,
        scoring='r2',
        n_iter=1,
        # n_iter=50,
        random_state=42,
        cv=KFold(n_splits=5, shuffle=True, random_state=42)
    )
).fit(X_train, y_train_tensor)


Fitting 5 folds for each of 1 candidates, totalling 5 fits
  epoch       r2        train_loss        valid_loss      lr     dur
-------  -------  ----------------  ----------------  ------  ------
      1  [36m-2.4578[0m  [32m12863843545.6000[0m  [35m12859947844.6222[0m  0.0045  3.9418
      2  -2.3892  [32m12686213203.2000[0m  [35m12605016614.7713[0m  0.0045  3.9084
      3  -2.2711  [32m12311866124.8000[0m  [35m12165454866.5384[0m  0.0045  3.6980
      4  -2.1129  [32m11772134051.2000[0m  [35m11577183091.1676[0m  0.0045  3.4839
      5  -1.9216  [32m11093737558.4000[0m  [35m10865966323.3919[0m  0.0045  3.7609
      6  -1.7518  [32m10332289120.0000[0m  [35m10234334876.3808[0m  0.0045  4.0624
      7  -1.4998  [32m9504962732.8000[0m  [35m9297213277.6391[0m  0.0045  3.6652
      8  -1.3160  [32m8629168220.8000[0m  [35m8613590567.7680[0m  0.0045  3.7678
      9  -1.1393  [32m7745812745.6000[0m  [35m7956450287.4549[0m  0.0045  5.3044
     10  -0.8829 

In [7]:
search = model[-1]
search.cv_results_

{'mean_fit_time': array([240.29413304]),
 'std_fit_time': array([12.30270861]),
 'mean_score_time': array([0.24128766]),
 'std_score_time': array([0.07713372]),
 'param_batch_size': masked_array(data=[128],
              mask=[False],
        fill_value=999999),
 'param_lambda1': masked_array(data=[0.0001],
              mask=[False],
        fill_value=1e+20),
 'param_lr': masked_array(data=[0.00447],
              mask=[False],
        fill_value=1e+20),
 'param_module__dropout_rate': masked_array(data=[0.5],
              mask=[False],
        fill_value=1e+20),
 'param_module__n_units_last': masked_array(data=[256],
              mask=[False],
        fill_value=999999),
 'param_module__num_hidden_layers': masked_array(data=[3],
              mask=[False],
        fill_value=999999),
 'params': [OrderedDict([('batch_size', 128),
               ('lambda1', 0.0001),
               ('lr', 0.00447),
               ('module__dropout_rate', 0.5),
               ('module__n_units_last', 2

In [8]:
search.best_params_

OrderedDict([('batch_size', 128),
             ('lambda1', 0.0001),
             ('lr', 0.00447),
             ('module__dropout_rate', 0.5),
             ('module__n_units_last', 256),
             ('module__num_hidden_layers', 3)])

In [9]:
result_train = salary.evaluate_train_predictions(model.predict(X_train), y_train)

Train size: 32103
Train R2: 0.9043
Train RMSE: 18642.3884
Train MAE: 8384.3884


## Evaluate on Test Set

In [10]:
(X_test, y_test) = salary.get_test_dataset()

In [11]:
result_test = salary.evaluate_test_predictions(model.predict(X_test))

Test size: 10000
Test R2: 0.6765
Test RMSE: 34085.9679
Test MAE: 19781.8583


## Export Model

In [12]:
trained_preprocessor = model[0]
with open('models/preprocessor.cloudpickle', 'wb') as f:
    cloudpickle.dump(trained_preprocessor, f)

In [13]:
net = model[-1].best_estimator_
net.save_params(f_params='models/mlp_params.pkl')