## Problem with Skorch and RNN with LSTM cells

- https://discuss.pytorch.org/t/problem-with-skorch-and-rnn-with-lstm-cells/80674/1

<div style="text-align: right"> <b>Author : Kwang Myung Yu</b></div>
<div style="text-align: right"> Initial upload: 2023. 7. 10</div>
<div style="text-align: right"> Last update: 2023. 7.10</div>

In [4]:
import os
import sys
import time
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from scipy import stats
import warnings; warnings.filterwarnings('ignore')
#plt.style.use('ggplot')
plt.style.use('seaborn-whitegrid')
%matplotlib inline

In [5]:
from sklearn.datasets import make_regression
from skorch import NeuralNetRegressor
import unittest

import torch
import torch.nn as nn


In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [7]:
X_regr, y_regr = make_regression(1000, 20, n_informative=10, random_state=0)
X_regr = X_regr.astype(np.float32)
y_regr = y_regr.astype(np.float32) / 100
y_regr = y_regr.reshape(-1, 1)
ni = 20
no = 1
nh = 10
nlayers = 3

In [8]:
class MyLSTM(nn.Module):
    def __init__(
        self,
        ni = 6,
        no = 3,
        nh = 10,
        nlayers = 1
    ):
        super(MyLSTM, self).__init__()
        
        self.ni = ni
        self.no = no
        self.nh = nh
        self.nlayers = nlayers
        
        self.lstms = nn.ModuleList(
            [nn.LSTMCell(self.ni, self.nh)] + [nn.LSTMCell(self.nh, self.nh) for i in range(nlayers - 1)]
        )
        self.out = nn.Linear(self.nh, self.no)
        self.actfn = nn.Tanh()
        # self.device = torch.device('cpu')
        self.dtype = torch.float
        
    def forward(self, x, h0 = None, train = False):
        hs = x # initialize hidden state
        if h0 in None:
            # h = torch.zeros(hs.shape[0], self.nh, device=device)
            # c = torch.zeros(hs.shape[0], self.nh, device=device)
            h = torch.zeros(hs.shape[0], self.nh)
            c = torch.zeros(hs.shape[0], self.nh)
        else:
            (h, c) = h0
            
        # LSTM cells
        for i in range(self.nlayers):
            h, c = self.lstms[i](hs, (h, c))
            if train:
                hs = self.do(h)
            else:
                hs = h
        y = self.out(hs)
        return y, (h, c)

참고 : LSTM, LSTMCell의 차이
- https://discuss.pytorch.kr/t/nn-rnn-nn-rnncell/214/2

In [9]:
X_regr, y_regr = make_regression(1000, 20, n_informative=10, random_state=0)
X_regr = X_regr.astype(np.float32)
y_regr = y_regr.astype(np.float32) / 100
y_regr = y_regr.reshape(-1, 1)
ni = 20
no = 1
nh = 10
nlayers = 3

In [10]:
net_regr = NeuralNetRegressor(
    module=MyLSTM,
    module__ni=ni,
    module__no=no,
    module__nh=nh,
    module__nlayers=nlayers,
    max_epochs=20,
    lr=0.1,
    #     device='cuda',  # uncomment this to train with CUDA
)

In [11]:
net_regr.fit(X_regr,y_regr)

TypeError: argument of type 'NoneType' is not iterable

In [15]:
class MyLSTM(nn.Module):
    def __init__(self, ni=6, no=3, nh=10, nlayers=1):
        super(MyLSTM, self).__init__()

        self.ni = ni
        self.no = no
        self.nh = nh
        self.nlayers = nlayers

        self.lstms = nn.ModuleList(
            [nn.LSTMCell(self.ni, self.nh)] + [nn.LSTMCell(self.nh, self.nh) for i in range(nlayers - 1)])
        self.out = nn.Linear(self.nh, self.no)
        self.do = nn.Dropout(p=0.2)
        self.actfn = nn.Tanh()
        #self.device = torch.device('cpu')
        self.dtype = torch.float

    # description of the whole block
    def forward(self, x, h0=None, train=False):
        hs = x  # initiate hidden state
        if h0 is None:
            h = torch.zeros(hs.shape[0], self.nh)
            c = torch.zeros(hs.shape[0], self.nh)
        else:
            (h, c) = h0

        # LSTM cells
        for i in range(self.nlayers):
            h, c = self.lstms[i](hs, (h, c))
            if train:
                hs = self.do(h)
            else:
                hs = h
        y = self.out(hs)
        return y, (h, c)

Yes, skorch just passes the module's output to get_loss and, subsequently, to the criterion. You have several options:

- create your own simple criterion that extracts the prediction and discards the context / hidden state
- implement your own get_loss that extracts the prediction and discards the context / hidden state
- don't return anything except the prediction
- make a case for why we should only pass the first argument to get_loss / the criterion :)

In [16]:
class ContextlessMSE(torch.nn.MSELoss):
    def forward(self, y_pred, y_true):
        y, (h, c) = y_pred # extract prediction and context information 
        return super().forward(y, y_true)

net_regr = NeuralNetRegressor(
    module=MyLSTM,
    module__ni=ni,
    module__no=no,
    module__nh=nh,
    module__nlayers=nlayers,
    max_epochs=20,
    lr=0.1,
    criterion=ContextlessMSE,
)

In [17]:
net_regr.fit(X_regr,y_regr)

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m4.6593[0m        [32m4.0033[0m  0.0128


      2        [36m4.6153[0m        [32m3.9702[0m  0.0113
      3        [36m4.5801[0m        [32m3.9338[0m  0.0133
      4        [36m4.5287[0m        [32m3.8749[0m  0.0121
      5        [36m4.4394[0m        [32m3.7641[0m  0.0113
      6        [36m4.2622[0m        [32m3.5232[0m  0.0104
      7        [36m3.8518[0m        [32m2.9143[0m  0.0111
      8        [36m2.8290[0m        [32m1.5763[0m  0.0106
      9        [36m1.2824[0m        [32m0.6832[0m  0.0102
     10        [36m0.6700[0m        [32m0.5607[0m  0.0122
     11        [36m0.5737[0m        [32m0.5160[0m  0.0102
     12        [36m0.5184[0m        [32m0.4735[0m  0.0111
     13        [36m0.4715[0m        [32m0.4303[0m  0.0104
     14        [36m0.4213[0m        [32m0.3977[0m  0.0108
     15        [36m0.3814[0m        [32m0.3695[0m  0.0102
     16        0.3846        1.2951  0.0098
     17        1.4625        0.3879  0.0112
     18        [36m0.3738[0m        0.3934

<class 'skorch.regressor.NeuralNetRegressor'>[initialized](
  module_=MyLSTM(
    (lstms): ModuleList(
      (0): LSTMCell(20, 10)
      (1-2): 2 x LSTMCell(10, 10)
    )
    (out): Linear(in_features=10, out_features=1, bias=True)
    (do): Dropout(p=0.2, inplace=False)
    (actfn): Tanh()
  ),
)