In [0]:
# !pip install hoge
# ドライブをマウント
from google.colab import drive
drive.mount('/content/gdrive')
# カレントディレクトリの変更
import os
os.chdir('/content/gdrive/My Drive/NAL-LAB/Colab Notebooks/')
!pwd

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/My Drive/NAL-LAB/Colab Notebooks


In [0]:
# google colab で chainer などを使うための設定

# colab の cuda に応じて、いい感じに chainer と Cupy をインストールするコマンド
!curl https://colab.chainer.org/install | sh -

# chainer のインストール確認コマンド
!python -c "import chainer; chainer.print_runtime_info()"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  1580  100  1580    0     0  11048      0 --:--:-- --:--:-- --:--:-- 11048
+ apt -y -q install cuda-libraries-dev-10-0
Reading package lists...
Building dependency tree...
Reading state information...
cuda-libraries-dev-10-0 is already the newest version (10.0.130-1).
0 upgraded, 0 newly installed, 0 to remove and 8 not upgraded.
+ pip install -q cupy-cuda100  chainer 
+ set +ex
Installation succeeded!
Platform: Linux-4.14.79+-x86_64-with-Ubuntu-18.04-bionic
Chainer: 5.0.0
NumPy: 1.14.6
CuPy:
  CuPy Version          : 5.2.0
  CUDA Root             : /usr/local/cuda
  CUDA Build Version    : 10000
  CUDA Driver Version   : 10000
  CUDA Runtime Version  : 10000
  cuDNN Build Version   : 7301
  cuDNN Version         : 7301
  NCCL Build Version    : 23

In [0]:
import re
import numpy as np
import chainer
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions

In [0]:
print('GPU availability:', chainer.cuda.available)
print('cuDNN availablility:', chainer.cuda.cudnn_enabled)

GPU availability: True
cuDNN availablility: True


In [0]:
import re
import numpy as np
import chainer
from chainer import ChainList, optimizers, training
from chainer.training import extensions
import chainer.functions as F
import chainer.links as L

In [0]:
data = [
    ["Could I exchange business cards, if you don’t mind?", 1],
    ["I'm calling regarding the position advertised in the newspaper.", 0],
    ["I'd like to apply for the programmer position.", 0],
    ["Could you tell me what an applicant needs to submit?", 1],
    ["Could you tell me what skills are required?", 1],
    ["We will assist employees with training and skill development.", 0],
    ["What kind of in-house training system do you have for your new recruits?", 1],
    ["For office equipment I think rental is better.", 0],
    ["Is promotion based on the seniority system?", 1],
    ["What's still pending from February?", 1],
    ["Which is better, rental or outright purchase?", 1],
    ["General Administration should do all the preparations for stockholder meetings.", 0],
    ["One of the elevators is out of order. When do you think you can have it fixed?", 1],
    ["General Administration is in charge of office building maintenance.", 0],
    ["Receptionists at the entrance hall belong to General Administration.", 0],
    ["Who is managing the office supplies inventory?", 1],
    ["Is there any difference in pay between males and females?", 1],
    ["The General Administration Dept. is in charge of office maintenance.", 0],
    ["Have you issued the meeting notice to shareholders?", 1],
    ["What is an average annual income in Japan?", 1],
    ["Many Japanese companies introduced the early retirement system.", 0],
    ["How much did you pay for the office equipment?", 1],
    ["Is the employee training very popular here?", 1],
    ["What kind of amount do you have in mind?", 1],
    ["We must prepare our financial statement by next Monday.", 0],
    ["Would it be possible if we check the draft?", 1],
    ["The depreciation of fixed assets amounts to $5 million this year.", 0],
    ["Please expedite the completion of the balance sheet.", 0],
    ["Could you increase the maximum lending limit for us?", 1],
    ["We should cut down on unnecessary expenses to improve our profit ratio.", 0],
    ["What percentage of revenue are we spending for ads?", 1],
    ["One of the objectives of internal auditing is to improve business efficiency.", 0],
    ["Did you have any problems finding us?", 1],
    ["How is your business going?", 1],
    ["Not really well. I might just sell the business.", 0],
    ["What line of business are you in?", 1],
    ["He has been a valued client of our bank for many years.", 0],
    ["Would you like for me to show you around our office?", 1],
    ["It's the second door on your left down this hall.", 0],
    ["This is the … I was telling you about earlier.", 0],
    ["We would like to take you out to dinner tonight.", 0],
    ["Could you reschedule my appointment for next Wednesday?", 1],
    ["Would you like Japanese, Chinese, Italian, French or American?", 1],
    ["Is there anything you prefer not to have?", 1],
    ["Please give my regards to the staff back in San Francisco.", 0],
    ["This is a little expression of our thanks.", 0],
    ["Why don’t you come along with us to the party this evening?", 1],
    ["Unfortunately, I have a prior engagement on that day.", 0],
    ["I am very happy to see all of you today.", 0],
    ["It is a great honor to be given this opportunity to present here.", 0],
    ["The purpose of this presentation is to show you the new direction our business is taking in 2009.", 0],
    ["Could you please elaborate on that?", 1],
    ["What's your proposal?", 1]
]

In [0]:
class SentenceClassifierCNN(chainer.ChainList):
    
    def __init__(self, in_channel, out_channel, filter_height_list, filter_width, out_size, max_sentence_size):
        """クラスの初期化
        
        Args:
            in_channel: 入力チャネル数
            out_channel: 出力チャネル数
            filter_height_list: フィルター縦サイズの配列
            filter_width: フィルター横サイズ
            out_size: 分類ラベル数
            max_sentence_size: 文章の長さの最大サイズ
        """
        self.filter_height_list = filter_height_list
        self.max_sentence_size = max_sentence_size
        self.convolution_num = len(filter_height_list)
        # Linkの定義
        link_list = [L.Convolution2D(in_channel, out_channel, (i, filter_width), pad=0) for i in filter_height_list] # Convolution層用のLinkをフィルター毎に追加
        link_list.append(L.Linear(out_channel * self.convolution_num, out_channel * self.convolution_num)) # 隠れ層
        link_list.append(L.Linear(out_channel * self.convolution_num, out_size)) # 出力層
        # 定義したLinkのリストを用いてクラスを初期化する
        super(SentenceClassifierCNN, self).__init__(*link_list)
        
    def __call__(self, x):
        """順伝播の計算を行う関数
        
        Args:
            x: 入力値
        Returns:
            y:
        """
        # フィルタを通した結果を格納する配列
        xcs = [None for i in self.filter_height_list]
        chs = [None for i in self.filter_height_list]
        # フィルタごとにループ
        for i, filter_height in enumerate(self.filter_height_list):
            xcs[i] = F.relu(self[i](x))
            chs[i] = F.max_pooling_2d(xcs[i], (self.max_sentence_size+1-filter_height))
        # Convolution+Poolingの結果の結合
        h = F.concat(chs, axis=2)
        h = F.dropout(F.tanh(self[self.convolution_num+0](h)))
        y = self[self.convolution_num+1](h)
        return y

In [0]:
N = len(data)
data_x, data_t = [], []
for d in data:
    data_x.append(d[0]) # 文書
    data_t.append(d[1]) # ラベル

def sentence2words(sentence):
    stopwords = ["i", "a", "an", "the", "and", "or", "if", "is", "are", "am", "it", "this", "that", "of", "from", "in", "on"]
    sentence = sentence.lower() # 小文字化
    sentence = sentence.replace("\n", "") # 改行削除
    sentence = re.sub(re.compile(r"[!-\/:-@[-`{-~]"), " ", sentence) # 記号をスペースに置き換え
    sentence = sentence.split(" ") # スペースで区切る
    sentence_words = []
    for word in sentence:
        if (re.compile(r"^.*[0-9]+.*$").fullmatch(word) is not None): # 数字が含まれるものは除外
            continue
        if word in stopwords: # ストップワードに含まれるものは除外
            continue
        sentence_words.append(word)        
    return sentence_words

# 単語辞書
words = {}
for sentence in data_x:
    sentence_words = sentence2words(sentence)
    for word in sentence_words:
        if word not in words:
            words[word] = len(words)

# 文章を単語ベクトル配列にする
data_x_vec = []
for sentence in data_x:
    sentence_words = sentence2words(sentence)
    sentence_vec = []
    for word in sentence_words:
        word_vec = np.zeros((len(words)))
        word_vec[words[word]] = 1
        sentence_vec.append(word_vec)
    data_x_vec.append(sentence_vec)

# 文章の長さを揃えるため、ゼロパディングする
max_sentence_size = 0
for sentence_vec in data_x_vec:
    if max_sentence_size < len(sentence_vec):
        max_sentence_size = len(sentence_vec)
for sentence_vec in data_x_vec:
    while len(sentence_vec) < max_sentence_size:
        sentence_vec.append(np.zeros((len(words))))

# データセット
data_x_vec = np.array(data_x_vec, dtype="float32")
data_t = np.array(data_t, dtype="int32")
dataset = []
for x, t in zip(data_x_vec, data_t):
    dataset.append((x.reshape(1, max_sentence_size, len(words)), t))

In [0]:
gpu = 0

# 定数
epoch_num = 10
batch_size = 5
out_size = 2
filter_height_list = [1,2,3]
out_channel = 32

# モデルの定義
model = L.Classifier(SentenceClassifierCNN(
    in_channel=1,
    out_channel=out_channel,
    filter_height_list=filter_height_list,
    filter_width=len(words),
    out_size=out_size,
    max_sentence_size=max_sentence_size
))

optimizer = chainer.optimizers.Adam()
optimizer.setup(model)

if gpu >= 0:
    
    chainer.cuda.get_device(gpu).use()
    model.to_gpu(gpu)

In [0]:
# 学習

train, test = chainer.datasets.split_dataset_random(dataset, N-20)
train_iter = chainer.iterators.SerialIterator(train, batch_size)
test_iter = chainer.iterators.SerialIterator(test, batch_size, repeat=False, shuffle=False)
updater = chainer.training.StandardUpdater(train_iter, optimizer, device=gpu)
trainer = chainer.training.Trainer(updater, (epoch_num, "epoch"), out="result")
trainer.extend(extensions.Evaluator(test_iter, model, device=gpu))
trainer.extend(extensions.LogReport(trigger=(1, "epoch")))
trainer.extend(extensions.PrintReport( ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy", "elapsed_time"]))
#trainer.extend(extensions.ProgressBar()) # プログレスバー出力
trainer.run()

epoch       main/loss   validation/main/loss  main/accuracy  validation/main/accuracy  elapsed_time
[J1           0.698077    0.6902                0.6            0.55                      0.0919376     
[J2           0.674164    0.686691              0.542857       0.55                      0.194717      
[J3           0.653516    0.684227              0.666667       0.55                      0.294735      
[J4           0.626934    0.682688              0.857143       0.6                       0.39747       
[J5           0.615515    0.679652              0.766667       0.65                      0.492704      
[J6           0.54016     0.674049              0.971429       0.45                      0.598433      
[J7           0.533044    0.669157              0.942857       0.45                      0.705415      
[J8           0.469153    0.66412               0.966667       0.45                      0.804957      
[J9           0.408502    0.65849               1          