In [1]:
# coding: utf-8
import sys
sys.path.append('..')
import numpy as np
from common.functions import softmax
from rnnlm import Rnnlm
from better_rnnlm import BetterRnnlm


class RnnlmGen(Rnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten()) # p is numpy array and its shape is (10000,)

            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids

    def get_state(self):
        return self.lstm_layer.h, self.lstm_layer.c

    def set_state(self, state):
        self.lstm_layer.set_state(*state)


class BetterRnnlmGen(BetterRnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x).flatten()
            p = softmax(score).flatten()

            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids

    def get_state(self):
        states = []
        for layer in self.lstm_layers:
            states.append((layer.h, layer.c))
        return states

    def set_state(self, states):
        for layer, state in zip(self.lstm_layers, states):
            layer.set_state(*state)

In [11]:
# coding: utf-8
import sys
sys.path.append('..')
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = RnnlmGen()
# model.load_params('./Rnnlm.pkl')

# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# 문장 생성
word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

you preamble duty arrogant polled effectively united deteriorating strictly roper conservatorship administrator rest miners upscale payments islands pursuing guide recalls sterling momentum werner wide giants balked failure plant behavior brink good positive southmark number current hotels listen penalty persuade accounted improving sounded called ufo crush distributes guards louis sec bullish mehl property\/casualty nonetheless anacomp herself injured milk kkr viewers rated legg retains yourself thoughts plan shaking rupert installations oppenheimer ortega currently convey enabling par coverage reduces restoring situations belongs size predict chase monitors liquidation touchy tradition pcs equipment alternatively infrastructure conditions across-the-board tourism telephone splitting hart-scott-rodino carter recovering ambitions monopoly


# 학습된 모델의 가중치 불러들이기

In [12]:
# coding: utf-8
import sys
sys.path.append('..')
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = RnnlmGen()
model.load_params('./Rnnlm.pkl')

# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# 문장 생성
word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

you 've yet wait are going to see another member of lebanon who he is east would n't stem from a drink of the political negative.
 mr. templeton cabrera is the shortage of wyoming who forced out of the producers showing that messrs. gorbachev would n't be brought through the financial office of buick in the move that might two years.
 stephen r. breeden in the chairman of the filters showed the members of mr. davis 's findings mr. roman was thriving without the nsc 's political target of american express 's original.
 mr. roman said the


# 확률분포 기준 샘플링 예제

In [7]:
prob = [0.1,0.2,0.5,0.02,0.08,0.1]

In [8]:
sampled = np.random.choice(len(prob), size=1, p=prob)
print(sampled)

[4]


In [9]:
res = {}

for i in range(100):
    sampled = np.random.choice(len(prob), size=1, p=prob)    
    if sampled.item() in res:
        res[sampled.item()] += 1
    else:
        res[sampled.item()] = 1

In [10]:
res

{2: 39, 1: 24, 4: 18, 0: 11, 5: 7, 3: 1}

# join함수의 사용예시

In [15]:
strlist = ['I','say','you','are','handsome']
print(''.join(strlist))
print(' '.join(strlist))
print(','.join(strlist))

Isayyouarehandsome
I say you are handsome
I,say,you,are,handsome


# Better RNNLM

In [16]:
# coding: utf-8
import sys
sys.path.append('..')
from common.np import *
from rnnlm_gen import BetterRnnlmGen
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)


model = BetterRnnlmGen()
model.load_params('BetterRnnlm.pkl') # Downloaded from website!

# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# 문장 생성
word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')

print(txt)


model.reset_state()

start_words = 'the meaning of life is'
start_ids = [word_to_id[w] for w in start_words.split(' ')]

for x in start_ids[:-1]:
    x = np.array(x).reshape(1, 1)
    model.predict(x)

word_ids = model.generate(start_ids[-1], skip_ids)
word_ids = start_ids[:-1] + word_ids
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print('-' * 50)
print(txt)


you could have a habit of talking about the program at ok he.
 i 's not coming to car his business.
 you know how the wrong turn are n't fear ralph is just getting the bad.
 when you come with news principals and got on the courtroom playing.
 i cut over the overseeing indian boxes they are considering and shows what the shot highlight during the community or his supermarket.
 the two other highly educated campaigns though mr. sohmer sent together with the heart of the dealership representative mentioned by other assurances that i should
--------------------------------------------------
the meaning of life is deciding whether it has a long history in which the own agreed to open an indictment.
 when an asset effort to coordinate its territory as a prospectus for employees and the tabloid for any type of money and appropriate controls would act before the los angeles financial condition say.
 it is whether to recent acquisitions the industry has disrupted its rights.
 the firms do n't 

In [17]:
# coding: utf-8
import sys
sys.path.append('..')
from dataset import sequence


(x_train, t_train), (x_test, t_test) = \
    sequence.load_data('addition.txt', seed=1984)
char_to_id, id_to_char = sequence.get_vocab()

print(x_train.shape, t_train.shape)
print(x_test.shape, t_test.shape)
# (45000, 7) (45000, 5)
# (5000, 7) (5000, 5)

print(x_train[0])
print(t_train[0])
# [ 3  0  2  0  0 11  5]
# [ 6  0 11  7  5]

print(''.join([id_to_char[c] for c in x_train[0]]))
print(''.join([id_to_char[c] for c in t_train[0]]))
# 71+118
# _189


(45000, 7) (45000, 5)
(5000, 7) (5000, 5)
[ 3  0  2  0  0 11  5]
[ 6  0 11  7  5]
71+118 
_189 
