<a href="https://colab.research.google.com/github/richardyy1188/Information_Extraction_from_Biographies/blob/master/dajare_model_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Colab related

In [0]:
# Check uptime (max 12 hr)
!cat /proc/uptime | awk '{print $1 /60 /60 "hours "}'

In [28]:
# upgrade numpy to resolve error of loading npy file
# !pip install --upgrade numpy

7.91463hours 


In [0]:
# files.upload()
# files.download()

# Import & Load

In [0]:
import numpy as np
import pickle, time
from google.colab import files
from tensorflow import keras
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Dense, Dropout, Input, LSTM, Reshape, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical

In [0]:
with open('X_v1.pickle', 'rb') as f: bundles_X = pickle.load(f) # (num_bundles, Tx (varing), word_voc_size), every samples in the same bundle have the same length
with open('Y_v1.pickle', 'rb') as f: bundles_Y = pickle.load(f)
with open('index2word.pickle', 'rb') as f: i2w = pickle.load(f)
with open('word2index.pickle', 'rb') as f: w2i = pickle.load(f)
word_voc_size = len(i2w)

# Hyperparameter

In [0]:

n_a = 128 # dimension of hidden states of LSTM
learning_rate = 0.005
learning_rate_decay = 0.01
batch_size = 64

# Model Building

In [0]:
lstm = LSTM(n_a, return_sequences=True, return_state=True) # default activation tanh
densor = Dense(word_voc_size, activation='softmax')

In [0]:
def model(n_a, word_voc_size):
  inputs = Input(shape=(None, word_voc_size)) # None for varing (here, varing Tx)
  X, _, _ = lstm(inputs) # return  (all step outputs, last step output, last step cell state)
  outputs = densor(X)
  return Model(inputs=inputs, outputs=outputs)

In [6]:
model = model(n_a, word_voc_size)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, 45973)       0         
_________________________________________________________________
lstm (LSTM)                  [(None, None, 128), (None 23604224  
_________________________________________________________________
dense (Dense)                (None, None, 45973)       5930517   
Total params: 29,534,741
Trainable params: 29,534,741
Non-trainable params: 0
_________________________________________________________________


In [0]:
opt = Adam(lr=learning_rate, decay=learning_rate_decay)
model.compile(loss='categorical_crossentropy', optimizer=opt)

# Train Model

## helper function

In [0]:
def chunks(lst, size): # equally chunk the list
  for i in range(0, len(lst), size):
    yield lst[i:i + size]

def to_one_hot_arr(indices, classes, first_zero=False):
  arr = to_categorical(indices, classes)
  if first_zero:
    arr[0] = np.zeros((classes,))
  return arr

## Train Process & Monitoring
To take advantage of mini-batch gradient descent, under varying sequence length.  
We feed by several times for different sequence length.

In [0]:
model.load_weights('model_v1-1_13_epochs.h5')

In [9]:
true_epochs = 1
losses = list()
start_time = time.time()
for i in range(true_epochs):
  print('---------TRUE EPOCH {}/{}--------------'.format(i+1, true_epochs))
  total_sample = 0
  loss_weighted_sum = 0
  for bundle_X, bundle_Y in zip(bundles_X, bundles_Y): # a bundle is samples with the same length
    ch_size = 6000
    # cut a bundle into small sub bundles, otherwise resulted np array will be too large for memory
    for sub_bundle_X, sub_bundle_Y in zip(chunks(bundle_X,ch_size), chunks(bundle_Y,ch_size)):
      
      # preprocess: from indices to one-hots, and to np array which is only accepted input/output in Keras
      sub_bundle_X =  np.array([to_one_hot_arr(sample, word_voc_size, True).astype('int8') for sample in sub_bundle_X]) 
      sub_bundle_Y =  np.array([to_one_hot_arr(sample, word_voc_size).astype('int8') for sample in sub_bundle_Y])
      # train
      history = model.fit(sub_bundle_X, sub_bundle_Y, batch_size=batch_size, verbose=0) # default epoch=1
      # record loss
      loss = history.history['loss']
      loss_weighted_sum += loss * len(sub_bundle_X)
      total_sample += len(sub_bundle_X)
      
  # end of a true epoch, calculate true loss and record
  losses.append(loss_weighted_sum / total_sample)

# Print info of training this time
ms = time.time() - start_time
minute = ms /60 % 60
hr = ms /60 /60
print("Spend time: {}hr {}mimutes".format(hr, minute))
for i, loss in enumerate(losses):
  print("True epoch {}: {}".format(i+1, loss))

---------TRUE EPOCH 1/3--------------
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
---------TRUE EPOCH 2/3--------------
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
---------TRUE EPOCH 3/3--------------
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 

In [0]:
# model.save_weights('model_v1-1_13_epochs.h5')
# files.download('model_v1-1_13_epochs.h5')

# Sample

In [0]:
def inference_model():
  x0 = Input(shape=(None, word_voc_size))
  a0 = Input(shape=(n_a,), name='a0')
  c0 = Input(shape=(n_a,), name='c0')
  x = x0
  a = a0
  c = c0
  
  _, a, c = lstm(x, initial_state=[a,c])
  prob = densor(a)
  
  return Model(inputs=[x0,a0,c0], outputs=[prob,a,c])

inf_model = inference_model()

In [27]:
def generate_dajare(max_len, sample_method='max'):
  words = list()
  word = None
  x = np.zeros((1, 1, word_voc_size)) # 1 sample, length 1
  a = np.zeros((1, n_a,))
  c = np.zeros((1, n_a,))
  while len(words) <= max_len and word != '\n':
    prob, a, c = inf_model.predict([x,a,c])
    # assert prob.shape == (1, word_voc_size)
    if sample_method == 'max':
      i = np.argmax(prob) # automatically flattened
    else:
      i = np.random.choice(a = word_voc_size, p = prob.ravel()) # select element from array a by probability 1d array p
    word = i2w[i]
    words.append(word)
    x = np.array( [to_one_hot_arr([i], word_voc_size)] )
    # assert x.shape == (1,1, word_voc_size)
  
  return words

たんねんあるつまようじもの,言った!



In [0]:
words = generate_dajare(30, 'random')
print(''.join(words))