In [1]:
# for Google Colab only: Mount my Google Drive
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive
/gdrive


In [2]:
pwd

'/gdrive'

In [3]:
# for Google Colab only: Navigate to my snaug project folder
cd 'My Drive/Colab Notebooks/snaug'

/gdrive/My Drive/Colab Notebooks/snaug


In [4]:
# peek at current system Path settings
import sys
sys.path

['',
 '/env/python',
 '/usr/lib/python36.zip',
 '/usr/lib/python3.6',
 '/usr/lib/python3.6/lib-dynload',
 '/usr/local/lib/python3.6/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.6/dist-packages/IPython/extensions',
 '/root/.ipython']

In [5]:
# let's add my project path so that I can load my custom written
# library functions saved in '/lib' 
sys.path.append("/gdrive/My Drive/Colab Notebooks/snaug")
sys.path

['',
 '/env/python',
 '/usr/lib/python36.zip',
 '/usr/lib/python3.6',
 '/usr/lib/python3.6/lib-dynload',
 '/usr/local/lib/python3.6/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.6/dist-packages/IPython/extensions',
 '/root/.ipython',
 '/gdrive/My Drive/Colab Notebooks/snaug']

In [0]:
#
# Load saved weights of models that were previously trained on cloud platforms 
# using GPU (eg. Google Colab)
# Save instances of trained models for future prediction tasks

import string
import textwrap
import pickle

from lib.nlplstm_class import (TFModelLSTMCharToken, TFModelLSTMWordToken, 
                               TFModelLSTMWord2vec) 
from lib.data_common import (load_doc, save_doc, clean_doc, prepare_char_tokens)
from lib.data_common import (build_token_lines, prepare_text_tokens, load_word2vec)
from lib.data_common import pathfinder_textfile, fixed_length_token_textfile

In [0]:
# 
# LSTM model that uses character tokenisation  
#
# load document
text = load_doc(pathfinder_textfile).lower()

In [68]:
# tokenize character data and separate into features and target for LSTM training
maxlen = 40
step = 3
X, y, char2indices, indices2char, num_unique_char = prepare_char_tokens(text, maxlen, step)

corpus length: 76633
total chars: 57
number of sequences: 25531
number of next_chars: 25531


In [0]:
# create new object that is an LSTM model using character tokenization
# to generate text
#
textgen_model_1 = TFModelLSTMCharToken(use_gpu=True)

In [70]:
# define the model parameters
textgen_model_1.define(maxlen, num_unique_char)
print(textgen_model_1.model.summary())

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_3 (CuDNNLSTM)     (None, 40, 512)           1169408   
_________________________________________________________________
dropout_17 (Dropout)         (None, 40, 512)           0         
_________________________________________________________________
cu_dnnlstm_4 (CuDNNLSTM)     (None, 512)               2101248   
_________________________________________________________________
dropout_18 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 57)                29241     
_________________________________________________________________
activation_4 (Activation)    (None, 57)                0         
Total params: 3,299,897
Trainable params: 3,299,897
Non-trainable params: 0
____________________________________________

In [0]:
# load model weights trained on platform using GPU
textgen_model_1.load_weights("./model/pathfinder_chartoken_model_50_epoch")

In [0]:
# save model updated with previously trained model weights
textgen_model_1.save("./model/pathfinder_chartoken_model_50_epoch_withcuda")

In [0]:
# 
# Loading text data that uses word tokenisation
#

In [0]:
# load fixed-length lines of tokens
doc = load_doc(fixed_length_token_textfile)
lines = doc.split('\n')
#print('Total lines: %d' % len(lines))

In [0]:
# tokenize and separate into features and target
X, y, seq_length, vocab_size, tokenizer = prepare_text_tokens(lines)
#print(X.shape)

In [0]:
#
# Word tokenization with word embedding model
#

In [0]:
# create new object that is an LSTM model using word tokenization
# and word embedding to generate text
# this model does not use CudaDNN library
#
textgen_model_2 = TFModelLSTMWordToken(use_gpu=False)

In [78]:
# define the model parameters
textgen_model_2.define(vocab_size=vocab_size, 
                       embedding_size=300, 
                       seq_length=seq_length)
print(textgen_model_2.model.summary())

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, 50, 300)           877800    
_________________________________________________________________
lstm_15 (LSTM)               (None, 50, 300)           721200    
_________________________________________________________________
dropout_19 (Dropout)         (None, 50, 300)           0         
_________________________________________________________________
lstm_16 (LSTM)               (None, 300)               721200    
_________________________________________________________________
dropout_20 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 300)               90300     
_________________________________________________________________
dense_16 (Dense)             (None, 2926)            

In [0]:
# load model weights trained on platform using GPU
textgen_model_2.load_weights("./model/pathfinder_wordtoken_model_200_epoch")

In [0]:
# save model updated with previously trained model weights
textgen_model_2.save("./model/pathfinder_wordtoken_model_200_epoch_withcuda")

In [0]:
#
# Word2vec pre-trained model
#

In [0]:
# load gensim Word2Vec word model's pretrained weights
pretrained_weights = pickle.load(open('./model/pathfinder_wordtoken_w2v_word_model_weights.pkl', 'rb'))
vocab_size, emdedding_size = pretrained_weights.shape

In [0]:
# create new object that is an LSTM model using word tokenization
# and word embedding to generate text
# this model does not use CudaDNN library
#
textgen_model_3 = TFModelLSTMWord2vec(use_gpu=False)

In [84]:
# define the model parameters
textgen_model_3.define(vocab_size=vocab_size, 
                       embedding_size=emdedding_size, 
                       pretrained_weights=pretrained_weights)
print(textgen_model_3.model.summary())

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (None, None, 300)         877500    
_________________________________________________________________
lstm_17 (LSTM)               (None, None, 300)         721200    
_________________________________________________________________
dropout_21 (Dropout)         (None, None, 300)         0         
_________________________________________________________________
lstm_18 (LSTM)               (None, 300)               721200    
_________________________________________________________________
dropout_22 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_17 (Dense)             (None, 300)               90300     
_________________________________________________________________
dense_18 (Dense)             (None, 2926)            

In [0]:
# load model weights trained on platform using GPU
textgen_model_3.load_weights("./model/pathfinder_wordtoken_w2v_model_50_epoch")

In [0]:
# save model updated with previously trained model weights
textgen_model_3.save("./model/pathfinder_wordtoken_w2v_model_50_epoch_withcuda")