# model.net
본 노트에서는 `model.ops.ipynb` 노트에서 구현한 `MultiChannelEmbedding`, `ConvolutionLayer`, `MaxOverTimepooling` class를 활용하여, [Convolutional Neural Networks for Sentence Classification](https://arxiv.org/abs/1408.5882)에서 제시하고 있는 전체 구조를 `SenCNN` class로 구현합니다.

![Alt text](https://raw.githubusercontent.com/aisolab/strnlp/master/materials/img/sencnn_structure.png)

### Setup

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from model.ops import MultiChannelEmbedding, ConvolutionLayer, MaxOverTimePooling
from model.utils import Vocab

### Implementation tips
**전체 구조를 구현하는 방법은 연산을 구현할 때와 마찬가지로 예제데이터를 미리 생각해놓고 확인하면서 구현하는 것입니다.** 구현해놓은 연산관련 코드들을 기반으로  전체 구조를 구현할 때 가장 간단한 미니배치(eg. 미니배치 사이즈가 2인 경우)를 상정하고 구현합니다. 이를 위해서 `model.ops.ipynb` 노트에서 활용했던 코드들을 가져와서 가장 간단한 미니배치를 구성합니다.

In [2]:
import pickle
import torch
from pathlib import Path
from pprint import pprint
from mecab import MeCab
from torch.utils.data import DataLoader
from model.data import Corpus
from model.utils import Vocab, Tokenizer, PadSequence

data_dir = Path.cwd() / 'data'
list_of_dataset = list(data_dir.iterdir())
pprint(list_of_dataset)
with open(list_of_dataset[-1], mode='rb') as io:
    vocab = pickle.load(io)

pad_sequence = PadSequence(length=32, pad_val=vocab.to_indices(vocab.padding_token))
split_morphs = MeCab().morphs
tokenizer = Tokenizer(vocab, split_fn=split_morphs, pad_fn=pad_sequence)

[PosixPath('/root/Documents/archive/strnlp/exercise/data/.DS_Store'),
 PosixPath('/root/Documents/archive/strnlp/exercise/data/train.txt'),
 PosixPath('/root/Documents/archive/strnlp/exercise/data/morphs_vec.pkl'),
 PosixPath('/root/Documents/archive/strnlp/exercise/data/validation.txt'),
 PosixPath('/root/Documents/archive/strnlp/exercise/data/tokenizer.pkl'),
 PosixPath('/root/Documents/archive/strnlp/exercise/data/test.txt'),
 PosixPath('/root/Documents/archive/strnlp/exercise/data/vocab.pkl')]


In [3]:
tr_corpus = Corpus(list_of_dataset[1], transform_fn=tokenizer.split_and_transform)

#### `DataLoader` class의 instance를 이용, 가장 간단한 미니배치 구성

In [4]:
tr_dl = DataLoader(tr_corpus, batch_size=2, shuffle=True)

In [5]:
mb = next(iter(tr_dl))
x_mb, y_mb = mb

print(mb)
print(x_mb, y_mb)

[tensor([[7270, 2086, 8244, 1953,   46,   66, 3289, 2424,  471, 1953, 2455, 2952,
         2251, 3231, 1916,   46,   46,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1],
        [1111, 6491,    0,    0, 7593,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1]]), tensor([0, 1])]
tensor([[7270, 2086, 8244, 1953,   46,   66, 3289, 2424,  471, 1953, 2455, 2952,
         2251, 3231, 1916,   46,   46,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1],
        [1111, 6491,    0,    0, 7593,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1]]) tensor([0, 1])


### `SenCNN` class 구현

#### Load modules

In [6]:
import torch
import torch.nn as nn
from model.ops import MultiChannelEmbedding, ConvolutionLayer, MaxOverTimePooling
from model.utils import Vocab

#### `SenCNN` class

In [7]:
# MultiChannelEmbedding, ConvolutionLayer, MaxOverTimepooling, nn.Dropout, nn.Linear를 이용하여 직접 구현해보세요.
# https://github.com/aisolab/strnlp/blob/master/exercise/model/net.py
class SenCNN(nn.Module):
    """SenCNN class"""
    def __init__(self, num_classes: int, vocab: Vocab) -> None:
        """Instantiating SenCNN class

        Args:
            num_classes (int): the number of classes
            vocab (model.utils.Vocab): the instance of model.utils.Vocab
        """
        super(SenCNN, self).__init__()
        pass
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        pass

In [8]:
model = SenCNN(num_classes=2, vocab=vocab)

In [9]:
yhat = model(x_mb)
print(yhat, yhat.shape)

tensor([[0.3085, 0.0949],
        [0.2435, 0.0354]], grad_fn=<AddmmBackward>) torch.Size([2, 2])
