## Quickstart : Training a model

- https://skorch.readthedocs.io/en/stable/user/quickstart.html

<div style="text-align: right"> <b>Author : Kwang Myung Yu</b></div>
<div style="text-align: right"> Initial upload: 2023. 7. 10</div>
<div style="text-align: right"> Last update: 2023. 7.10</div>

In [1]:
import os
import sys
import time
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from scipy import stats
import warnings; warnings.filterwarnings('ignore')
#plt.style.use('ggplot')
plt.style.use('seaborn-whitegrid')
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification
from torch import nn

from skorch import NeuralNetClassifier

In [3]:
X, y = make_classification(n_samples=1000, n_features=20,
                           n_informative=10, random_state=10)

X = X.astype(np.float32)
y = y.astype(np.int64)

In [4]:
class MyModule(nn.Module):
    def __init__(self, num_units=10, nonlin=nn.ReLU()):
        super().__init__()

        self.dense0 = nn.Linear(20, num_units)
        self.nonlin = nonlin
        self.dropout = nn.Dropout(0.5)
        self.dense1 = nn.Linear(num_units, num_units)
        self.output = nn.Linear(num_units, 2)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.dropout(X)
        X = self.nonlin(self.dense1(X))
        X = self.output(X)
        return X

In [5]:
net = NeuralNetClassifier(
    MyModule,
    max_epochs = 10,
    criterion=nn.CrossEntropyLoss(),
    lr = 0.1,
    iterator_train__shuffle=True, # Shuffle training data on each epoch
)

In [6]:
net

<class 'skorch.classifier.NeuralNetClassifier'>[uninitialized](
  module=<class '__main__.MyModule'>,
)

In [7]:
net.fit(X, y)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.7343[0m       [32m0.5050[0m        [35m0.6814[0m  0.1546
      2        [36m0.6827[0m       [32m0.6300[0m        [35m0.6561[0m  0.0077
      3        [36m0.6581[0m       [32m0.7600[0m        [35m0.6284[0m  0.0062
      4        [36m0.6406[0m       0.7500        [35m0.5970[0m  0.0074
      5        [36m0.6193[0m       [32m0.7750[0m        [35m0.5692[0m  0.0057
      6        [36m0.5955[0m       [32m0.8050[0m        [35m0.5357[0m  0.0073
      7        [36m0.5766[0m       [32m0.8100[0m        [35m0.5074[0m  0.0055
      8        0.5790       [32m0.8250[0m        [35m0.4898[0m  0.0081
      9        [36m0.5468[0m       0.8050        [35m0.4682[0m  0.0057
     10        [36m0.5324[0m       0.8200        [35m0.4468[0m  0.0102


<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (dense0): Linear(in_features=20, out_features=10, bias=True)
    (nonlin): ReLU()
    (dropout): Dropout(p=0.5, inplace=False)
    (dense1): Linear(in_features=10, out_features=10, bias=True)
    (output): Linear(in_features=10, out_features=2, bias=True)
  ),
)

In [8]:
y_proba = net.predict_proba(X)

In [9]:
y_proba

array([[0.21821484, 0.7817852 ],
       [0.7066223 , 0.29337773],
       [0.5109845 , 0.48901558],
       ...,
       [0.509295  , 0.49070507],
       [0.78450555, 0.21549445],
       [0.13525324, 0.86474675]], dtype=float32)

### Sklearn pipeline 사용하기

In [10]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [11]:
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('net', net),
])
pipe.fit(X, y)
y_proba = pipe.predict_proba(X)

Re-initializing module.
Re-initializing criterion.
Re-initializing optimizer.
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.6903[0m       [32m0.5450[0m        [35m0.6886[0m  0.0080
      2        [36m0.6852[0m       [32m0.5750[0m        [35m0.6840[0m  0.0093
      3        [36m0.6788[0m       [32m0.6150[0m        [35m0.6787[0m  0.0080
      4        [36m0.6773[0m       [32m0.6250[0m        [35m0.6734[0m  0.0073
      5        [36m0.6658[0m       [32m0.6500[0m        [35m0.6656[0m  0.0109
      6        [36m0.6621[0m       [32m0.6650[0m        [35m0.6574[0m  0.0073


      7        [36m0.6526[0m       [32m0.6850[0m        [35m0.6447[0m  0.0067
      8        [36m0.6415[0m       [32m0.7250[0m        [35m0.6303[0m  0.0088
      9        [36m0.6293[0m       [32m0.7450[0m        [35m0.6121[0m  0.0066
     10        [36m0.6099[0m       [32m0.7750[0m        [35m0.5915[0m  0.0065


In [12]:
y_proba

array([[0.4202679 , 0.57973206],
       [0.63516164, 0.36483833],
       [0.48618823, 0.5138117 ],
       ...,
       [0.44923657, 0.5507635 ],
       [0.56587446, 0.4341256 ],
       [0.35765237, 0.64234763]], dtype=float32)

### Grid search

In [13]:
from sklearn.model_selection import GridSearchCV

net.set_params(train_split=False, verbose=0)
params = {
    'lr': [0.01, 0.02],
    'max_epochs': [10, 20],
    'module__num_units': [10, 20],
}
gs = GridSearchCV(net, params, refit=False, cv=3, scoring='accuracy')

gs.fit(X, y)
print(gs.best_score_, gs.best_params_)

0.7340124555693418 {'lr': 0.02, 'max_epochs': 20, 'module__num_units': 20}


### Regression

In [14]:
import torch
from torch import nn
import torch.nn.functional as F

In [15]:
torch.cuda.is_available()

True

In [16]:
from sklearn.datasets import make_regression

X_regr, y_regr = make_regression(1000, 20, n_informative=10, random_state=0)
X_regr = X_regr.astype(np.float32)
y_regr = y_regr.astype(np.float32) / 100
y_regr = y_regr.reshape(-1, 1)

In [17]:
X_regr.shape, y_regr.shape, y_regr.min(), y_regr.max()

((1000, 20), (1000, 1), -6.4901485, 6.154505)

In [18]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [19]:
from skorch import NeuralNetRegressor

In [20]:
class RegressorModule(nn.Module):
    def __init__(
        self,
        num_units = 10,
        nonlin = F.relu,
    ):
        super(RegressorModule, self).__init__()
        self.num_units = num_units
        self.nonlin = nonlin
        
        self.dense0 = nn.Linear(20, num_units)
        self.nonlin = nonlin
        self.dense1 = nn.Linear(num_units, 10)
        self.output = nn.Linear(10, 1)
        
    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = F.relu(self.dense1(X))
        X = self.output(X)
        return X

In [21]:
net_regr = NeuralNetRegressor(
    RegressorModule,
    max_epochs = 20,
    lr = 0.1,
    device = device
)

In [22]:
net_regr.fit(X_regr, y_regr)

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m4.5306[0m        [32m3.7274[0m  0.8688
      2        [36m3.3071[0m        [32m1.5840[0m  0.0112
      3        [36m0.9924[0m        [32m0.9718[0m  0.0120
      4        1.2342        [32m0.6791[0m  0.0122
      5        [36m0.3962[0m        [32m0.1578[0m  0.0122
      6        [36m0.1068[0m        [32m0.0900[0m  0.0114
      7        [36m0.0672[0m        [32m0.0694[0m  0.0115
      8        [36m0.0514[0m        [32m0.0566[0m  0.0113
      9        [36m0.0398[0m        [32m0.0457[0m  0.0109
     10        [36m0.0310[0m        [32m0.0374[0m  0.0110
     11        [36m0.0244[0m        [32m0.0315[0m  0.0113
     12        [36m0.0198[0m        [32m0.0271[0m  0.0105
     13        [36m0.0165[0m        [32m0.0239[0m  0.0102
     14        [36m0.0141[0m        [32m0.0215[0m  0.0117
     15        [36m0.0123[0m        [32m0.0196[0m

<class 'skorch.regressor.NeuralNetRegressor'>[initialized](
  module_=RegressorModule(
    (dense0): Linear(in_features=20, out_features=10, bias=True)
    (dense1): Linear(in_features=10, out_features=10, bias=True)
    (output): Linear(in_features=10, out_features=1, bias=True)
  ),
)

In [23]:
# Making prediction for first 5 data points of X
y_pred = net_regr.predict(X_regr[:5])
y_pred

array([[ 0.60129064],
       [-1.447263  ],
       [-0.5105232 ],
       [-0.25952503],
       [-0.7679593 ]], dtype=float32)

전체 모델 저장

In [24]:
import pickle

In [25]:
file_name = "../model/skorch_reg.pkl"

with open(file_name, "wb") as f:
    pickle.dump(net_regr, f)

In [26]:
with open(file_name, "rb") as f:
    new_net = pickle.load(f)

In [27]:
new_net.predict(X_regr[:5])

array([[ 0.60129064],
       [-1.447263  ],
       [-0.5105232 ],
       [-0.25952503],
       [-0.7679593 ]], dtype=float32)

파라미터만 저장하기

- 이방법은 모델의 파라미터만 저장함  
- 따라서 lr, max_epochs 같은 파라미터는 저장하지 않음

In [28]:
param_name = "../model/skorch_reg_params.pkl"

new_net.save_params(f_params=param_name)

In [29]:
new_net = NeuralNetRegressor(
    RegressorModule,
    max_epochs = 20,
    lr = 0.1,
).initialize()

In [30]:
new_net.load_params(param_name)

In [31]:
new_net.predict(X_regr[:5])

array([[ 0.60129064],
       [-1.447263  ],
       [-0.5105232 ],
       [-0.25952503],
       [-0.76795924]], dtype=float32)

사이킷런 파이프라인

In [32]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [33]:
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('net', net_regr)
])

In [34]:
pipe

In [35]:
pipe.fit(X_regr, y_regr)

Re-initializing module.
Re-initializing criterion.
Re-initializing optimizer.
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m4.6679[0m        [32m3.9519[0m  0.0158
      2        [36m4.4123[0m        [32m3.4078[0m  0.0115
      3        [36m2.8493[0m        [32m1.2331[0m  0.0123
      4        [36m0.9822[0m        [32m0.5511[0m  0.0130
      5        [36m0.4767[0m        [32m0.3299[0m  0.0126
      6        [36m0.3146[0m        0.7192  0.0218
      7        1.1051        [32m0.2318[0m  0.0148
      8        0.5604        0.4048  0.0145
      9        0.4989        0.4355  0.0147
     10        0.5613        [32m0.2157[0m  0.0119
     11        [36m0.2372[0m        0.2630  0.0130
     12        0.3281        [32m0.1639[0m  0.0115
     13        [36m0.1639[0m        0.1855  0.0124
     14        0.2158        [32m0.1202[0m  0.0131
     15        [36m0.1103[0m        0.1403  0.0127
     16     

In [36]:
pipe.predict_proba(X_regr[:5])

array([[ 0.6341337 ],
       [-1.2901615 ],
       [-0.42953563],
       [-0.1663003 ],
       [-0.5268229 ]], dtype=float32)

### Calbacks

여기서는 ROC(AUC) 점수 아래 영역을 결정하는 새 콜백을 추가하는 방법을 보여줍니다.

In [37]:
from skorch.callbacks import EpochScoring

EpochScoring을 사용하여 계산할 점수를 지정해야 합니다.  
3가지 선택지가 있습니다:   
- 문자열 전달 : sklearn에서 유효한 메트릭이어야 함
- None 전달 : .score 메서드를 구현한후, scoring=None을 전달하면 skorch가 이를 사용하도록 지시함
- 함수 또는 callable 전달하기

sklearn에는dlau 'roc_auc'가 있기 때문에 이를 사용한다.

In [38]:
auc =EpochScoring(scoring = 'roc_auc', lower_is_better=False)

In [39]:
class ClassifierModule(nn.Module):
    def __init__(
            self,
            num_units=10,
            nonlin=F.relu,
            dropout=0.5,
    ):
        super(ClassifierModule, self).__init__()
        self.num_units = num_units
        self.nonlin = nonlin
        self.dropout = dropout

        self.dense0 = nn.Linear(20, num_units)
        self.nonlin = nonlin
        self.dropout = nn.Dropout(dropout)
        self.dense1 = nn.Linear(num_units, 10)
        self.output = nn.Linear(10, 2)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.dropout(X)
        X = F.relu(self.dense1(X))
        X = F.softmax(self.output(X), dim=-1)
        return X

In [40]:
net = NeuralNetClassifier(
    ClassifierModule,
    max_epochs=20,
    lr=0.1,
    callbacks=[auc],
)

In [41]:
net.fit(X, y)

  epoch    roc_auc    train_loss    valid_acc    valid_loss     dur
-------  ---------  ------------  -----------  ------------  ------
      1     [36m0.4850[0m        [32m0.7101[0m       [35m0.4950[0m        [31m0.7039[0m  0.0112
      2     [36m0.5474[0m        [32m0.6893[0m       [35m0.5100[0m        [31m0.6957[0m  0.0085
      3     [36m0.6027[0m        [32m0.6874[0m       [35m0.5250[0m        [31m0.6887[0m  0.0067
      4     [36m0.6564[0m        [32m0.6696[0m       [35m0.5500[0m        [31m0.6800[0m  0.0087
      5     [36m0.7237[0m        [32m0.6635[0m       [35m0.6400[0m        [31m0.6653[0m  0.0078
      6     [36m0.7883[0m        [32m0.6470[0m       [35m0.6850[0m        [31m0.6454[0m  0.0072
      7     [36m0.8426[0m        [32m0.6421[0m       [35m0.7150[0m        [31m0.6324[0m  0.0111
      8     [36m0.8723[0m        [32m0.6268[0m       [35m0.7700[0m        [31m0.6066[0m  0.0073
      9     [36m0.9070[0m     

     14     [36m0.9449[0m        0.5296       0.8600        [31m0.4302[0m  0.0087
     15     0.9428        [32m0.4848[0m       0.8500        [31m0.4049[0m  0.0089
     16     [36m0.9454[0m        0.4881       0.8600        [31m0.3891[0m  0.0102
     17     [36m0.9458[0m        0.4850       0.8550        [31m0.3836[0m  0.0121
     18     [36m0.9491[0m        [32m0.4749[0m       0.8550        [31m0.3773[0m  0.0136
     19     [36m0.9514[0m        [32m0.4598[0m       0.8700        [31m0.3684[0m  0.0167
     20     0.9506        0.4796       0.8650        [31m0.3676[0m  0.0067


<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=ClassifierModule(
    (dense0): Linear(in_features=20, out_features=10, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (dense1): Linear(in_features=10, out_features=10, bias=True)
    (output): Linear(in_features=10, out_features=2, bias=True)
  ),
)

### Usage with sklearn GridSearchCV

The NeuralNet class allows to directly access parameters of the pytorch module by using the module__ prefix. So e.g. if you defined the module to have a num_units parameter, you can set it via the module__num_units argument. This is exactly the same logic that allows to access estimator parameters in sklearn Pipelines and FeatureUnions.

This feature is useful in several ways. For one, it allows to set those parameters in the model definition. Furthermore, it allows you to set parameters in an sklearn GridSearchCV as shown below.

In addition to the parameters prefixed by module__, you may access a couple of other attributes, such as those of the optimizer by using the optimizer__ prefix (again, see below). All those special prefixes are stored in the prefixes_ attribute:

In [42]:
net.prefixes_

['iterator_train',
 'iterator_valid',
 'callbacks',
 'dataset',
 'compile',
 'module',
 'criterion',
 'optimizer']

In [43]:
net.module_

ClassifierModule(
  (dense0): Linear(in_features=20, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (dense1): Linear(in_features=10, out_features=10, bias=True)
  (output): Linear(in_features=10, out_features=2, bias=True)
)

In [44]:
net.module_.num_units

10

In [45]:
from sklearn.model_selection import GridSearchCV

In [46]:
net = NeuralNetClassifier(
    ClassifierModule,
    max_epochs=20,
    lr=0.1,
    optimizer__momentum=0.9,
    verbose=0,
    train_split=False,
)

In [47]:
params = {
    'lr': [0.05, 0.1],
    'module__num_units': [10, 20],
    'module__dropout': [0, 0.5],
    'optimizer__nesterov': [False, True],
}

In [48]:
gs = GridSearchCV(net, params, refit=False, cv=3, scoring='accuracy', verbose=2)

In [49]:
gs.fit(X, y)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] END lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=False; total time=   0.1s
[CV] END lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=False; total time=   0.1s
[CV] END lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=False; total time=   0.1s
[CV] END lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=True; total time=   0.1s
[CV] END lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=True; total time=   0.1s
[CV] END lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=True; total time=   0.1s
[CV] END lr=0.05, module__dropout=0, module__num_units=20, optimizer__nesterov=False; total time=   0.1s
[CV] END lr=0.05, module__dropout=0, module__num_units=20, optimizer__nesterov=False; total time=   0.1s
[CV] END lr=0.05, module__dropout=0, module__num_units=20, optimizer__nesterov=False; total time=   0.

In [50]:
print(gs.best_score_)

0.9099728470985956


In [51]:
print(gs.best_params_)

{'lr': 0.05, 'module__dropout': 0, 'module__num_units': 20, 'optimizer__nesterov': False}
