In [1]:
import json
from pathlib import Path
from collections import Counter

import numpy as np
from tqdm.notebook import trange
import tensorflow as tf
from tensorflow.data import Dataset, AUTOTUNE
from tensorflow.keras.layers import TextVectorization, LSTM, Embedding, Dropout, Dense, LeakyReLU, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.train import Checkpoint, CheckpointManager

from model_utility import get_train_val_data

In [2]:
tf.get_logger().setLevel('ERROR')

gpu_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.get_device_details(gpu_devices[0])

2022-07-31 14:47:54.764926: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-07-31 14:47:54.775597: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-07-31 14:47:54.775999: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-07-31 14:47:54.778320: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


{'compute_capability': (7, 5), 'device_name': 'NVIDIA GeForce GTX 1650'}

In [3]:
!nvidia-smi

Sun Jul 31 14:47:55 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.57       Driver Version: 516.59       CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0  On |                  N/A |
| N/A   58C    P8     4W /  N/A |    234MiB /  4096MiB |     26%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
RANDOM_STATE = 7
SEQ_LEN = 512
VAL_SIZE = 0.05
EPOCHS = 10
BATCH_SIZE = 128
LR = 1e-3
SHUFFLE_BUFFER = BATCH_SIZE * 20
EMBEDDING_DIM = 32
DROPOUT_RATIO = 0.2
TRAIN_STEPS = 4000
EARLY_STOP_PATIENCE = 5
CHARS_TO_PREDICT = 256
MODEL_IDENTIFIER = "V1"

TB_LOGS = Path("tb_logs/" + MODEL_IDENTIFIER)
TB_LOGS.mkdir(exist_ok = True, parents = True)

MODELS_DIR = Path("models/" + MODEL_IDENTIFIER)
MODELS_DIR.mkdir(exist_ok = True, parents = True)

CKPT_DIR = Path("ckpt/" + MODEL_IDENTIFIER)
CKPT_DIR.mkdir(exist_ok = True, parents = True)

In [5]:
X_train, X_val, y_train, y_val = get_train_val_data(
    book_dir = Path("../Data/Text/Sherlock_Holmes/"),
    file_pat = "*.txt",
    seq_len = SEQ_LEN,
    val_size = VAL_SIZE,
    random_state = RANDOM_STATE
)

len(X_train), len(y_train), len(X_val), len(y_val)

(1647675, 1647675, 86720, 86720)

In [6]:
print("Sample Training data\n")

for i in np.random.randint(len(X_train), size = (5)):
    print(f"Input: {X_train[i]!r}")
    print(f"Output: {y_train[i]!r}\n")

Sample Training data

Input: 'rious,” he observed, as we drove to Scotland Yard. “These men have got hold of Melas again. He is a man of no physical courage, as they are well aware from their experience the other night. This villain was able to terrorise him the instant that he got into his presence. No doubt they want his professional services, but, having used him, they may be inclined to punish him for what they will regard as his treachery.” Our hope was that, by taking train, we might get to Beckenham as soon or sooner than the car'
Output: 'r'

Input: 'tion of nitrite of amyl, and the present seemed an admirable opportunity of testing its virtues. The bottle was downstairs in my laboratory, so leaving my patient seated in his chair, I ran down to get it. There was some little delay in finding it—five minutes, let us say—and then I returned. Imagine my amazement to find the room empty and the patient gone. “Of course, my first act was to run into the waiting-room. The son had gone

In [7]:
print("Sample Validation data\n")

for i in np.random.randint(len(X_val), size = (5)):
    print(f"Input: {X_val[i]!r}")
    print(f"Output: {y_val[i]!r}\n")

Sample Validation data

Input: 'uplicates of the one which was destroyed in Morse Hudson’s shop?” “They were taken from the same mould.” “Such a fact must tell against the theory that the man who breaks them is influenced by any general hatred of Napoleon. Considering how many hundreds of statues of the great Emperor must exist in London, it is too much to suppose such a coincidence as that a promiscuous iconoclast should chance to begin upon three specimens of the same bust.” “Well, I thought as you do,” said Lestrade. “On the other hand'
Output: ','

Input: 'quest a description of his cabin, in which it stated that the old logbooks of his vessel were preserved in it. It struck me that if I could see what occurred in the month of August, 1883, on board the _Sea Unicorn_, I might settle the mystery of my father’s fate. I tried last night to get at these logbooks, but was unable to open the door. To-night I tried again and succeeded, but I find that the pages which deal with that month 

In [8]:
char_freq_dict = Counter(y_train)
char_freq_dict.most_common(20)

[(' ', 304968),
 ('e', 156016),
 ('t', 112813),
 ('a', 101601),
 ('o', 98866),
 ('n', 83866),
 ('h', 80282),
 ('i', 77695),
 ('s', 77640),
 ('r', 72685),
 ('d', 53723),
 ('l', 48901),
 ('u', 38652),
 ('m', 32673),
 ('c', 31331),
 ('w', 30771),
 ('f', 26089),
 ('y', 25704),
 ('g', 22231),
 (',', 21892)]

In [9]:
%%time

vectorizer = TextVectorization(standardize = None, split = "character", name = 'TextVectorizer')
vocab_json = Path("vocab.json")

if vocab_json.exists():
    with vocab_json.open("r") as vocab_file:
        vocab = json.load(vocab_file)["vocab"]
    
    vectorizer.set_vocabulary(vocab)
else:
    vectorizer.adapt(X_train)
    vocab = vectorizer.get_vocabulary()[2:]

    with vocab_json.open("w") as vocab_file:
        json.dump({"vocab": vocab}, vocab_file)

vocab = vectorizer.get_vocabulary()
char_count = len(vocab)
char_count

CPU times: user 1.21 s, sys: 336 ms, total: 1.54 s
Wall time: 2.09 s


2022-07-31 14:48:01.293672: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-31 14:48:01.296089: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-07-31 14:48:01.296936: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-07-31 14:48:01.297634: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built witho

106

In [10]:
class_weight_dict = {}
total_freq = sum([v for v in char_freq_dict.values()])

print(f'{"Class ID":12}{"Char":10}{"Freq":10}{"Class weight"}')
for i, v in enumerate(vocab):
    freq = char_freq_dict.get(v, 1)
    class_weight_dict[i] = round(np.sqrt(total_freq / (freq * char_count)), 2)
    print(f"{i:^12}{v!r:^10}{freq:^10}{class_weight_dict[i]:^12}")

Class ID    Char      Freq      Class weight
     0          ''        1        124.68   
     1       '[UNK]'      1        124.68   
     2         ' '      304968      0.23    
     3         'e'      156016      0.32    
     4         't'      112813      0.37    
     5         'a'      101601      0.39    
     6         'o'      98866       0.4     
     7         'n'      83866       0.43    
     8         'h'      80282       0.44    
     9         'i'      77695       0.45    
     10        's'      77640       0.45    
     11        'r'      72685       0.46    
     12        'd'      53723       0.54    
     13        'l'      48901       0.56    
     14        'u'      38652       0.63    
     15        'm'      32673       0.69    
     16        'c'      31331       0.7     
     17        'w'      30771       0.71    
     18        'f'      26089       0.77    
     19        'y'      25704       0.78    
     20        'g'      22231       0.84    
     21   

In [11]:
y_train = vectorizer(y_train).numpy().flatten()
y_val = vectorizer(y_val).numpy().flatten()
y_train.shape, y_val.shape

((1647675,), (86720,))

In [12]:
train_ds = Dataset.from_tensor_slices((X_train, y_train)).shuffle(SHUFFLE_BUFFER).repeat().batch(BATCH_SIZE).prefetch(AUTOTUNE)
train_ds

<PrefetchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.string, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [13]:
val_ds = Dataset.from_tensor_slices((X_val, y_val)).shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE).prefetch(AUTOTUNE)
val_ds, val_ds.cardinality()

(<PrefetchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.string, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>,
 <tf.Tensor: shape=(), dtype=int64, numpy=678>)

In [14]:
def get_lstm_model(char_count: int, embedding_dim: int = 32):
    input_layer = tf.keras.Input(shape = (1,), dtype = tf.string, name = 'Input')

    vectorizer_layer = vectorizer(input_layer)
    embedding_layer = Embedding(char_count + 1, embedding_dim, name = 'EmbeddingLayer')(vectorizer_layer)

    lstm_1 = LSTM(512, return_sequences = True, dropout = DROPOUT_RATIO, name = 'LSTM_1')(embedding_layer)
    lstm_2 = LSTM(256, dropout = DROPOUT_RATIO, name = 'LSTM_2')(lstm_1)
    bn_1 = BatchNormalization(name = 'BN_1')(lstm_2)

    dense_1 = Dense(256, name = 'Dense_1')(bn_1)
    lr_1 = LeakyReLU(name = 'LR_1')(dense_1)
    dropout_1 = Dropout(DROPOUT_RATIO, name = 'Dropout_1')(lr_1)

    dense_2 = Dense(128, name = 'Dense_2')(dropout_1)
    lr_2 = LeakyReLU(name = 'LR_2')(dense_2)
    dropout_2 = Dropout(DROPOUT_RATIO, name = 'Dropout_2')(lr_2)
    bn_2 = BatchNormalization(name = 'BN_2')(dropout_2)

    dense_3 = Dense(128, name = 'Dense_3')(bn_2)
    lr_3 = LeakyReLU(name = 'LR_3')(dense_3)

    output_layer = Dense(char_count, activation = 'softmax', name = "Output")(lr_3)

    model = tf.keras.Model(inputs  = input_layer, outputs = output_layer, name = 'Text_Generation_Model')
    model.compile(optimizer = Adam(LR), loss = 'sparse_categorical_crossentropy', metrics = ['sparse_categorical_accuracy'])
    return model

model = get_lstm_model(char_count, EMBEDDING_DIM)
model.summary()

Model: "Text_Generation_Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input (InputLayer)          [(None, 1)]               0         
                                                                 
 TextVectorizer (TextVectori  (None, None)             0         
 zation)                                                         
                                                                 
 EmbeddingLayer (Embedding)  (None, None, 32)          3424      
                                                                 
 LSTM_1 (LSTM)               (None, None, 512)         1116160   
                                                                 
 LSTM_2 (LSTM)               (None, 256)               787456    
                                                                 
 BN_1 (BatchNormalization)   (None, 256)               1024      
                                             

In [15]:
EPOCH_START = 0

checkpoint = Checkpoint(
    step = tf.Variable(EPOCH_START),
    model = model
)

ckpt_manager = CheckpointManager(checkpoint, CKPT_DIR, max_to_keep = 3)

if CKPT_DIR.joinpath("checkpoint").exists():
    checkpoint.restore(ckpt_manager.latest_checkpoint)
    EPOCH_START = checkpoint.step.numpy()

print(f"Starting training from Epoch {EPOCH_START}")

Starting training from Epoch 3


In [16]:
model.evaluate(val_ds)

2022-07-31 14:48:32.116721: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100




[3.848475694656372, 0.04775138199329376]

In [17]:
%%time

sample_id = np.random.randint(len(X_train))
sample_input = X_train[sample_id]
print(f"Input:\n{sample_input}")

pred_output = ''

for i in trange(CHARS_TO_PREDICT, desc = "Predicting chars", unit = " char"):
    pred = model.predict([sample_input], verbose = False)
    pred_char_id = pred.argmax()
    pred_char = vocab[pred_char_id]
    pred_output += pred_char
    sample_input = sample_input[1:] + pred_char

print(f"Output:\n{pred_output}")

Input:
attacks of jealousy which have amounted to frenzy. It is conjectured that it was in one of these that she committed the terrible crime which has caused such a sensation in London. Her movements upon the Monday night have not yet been traced, but it is undoubted that a woman answering to her description attracted much attention at Charing Cross Station on Tuesday morning by the wildness of her appearance and the violence of her gestures. It is probable, therefore, that the crime was either committed when ins


Predicting chars:   0%|          | 0/256 [00:00<?, ? char/s]

Output:
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
CPU times: user 34.1 s, sys: 5.38 s, total: 39.5 s
Wall time: 35.7 s


In [18]:
%%time

earlystop = EarlyStopping(patience = EARLY_STOP_PATIENCE, restore_best_weights = True)
tensorboard = TensorBoard(log_dir = str(TB_LOGS))

history = model.fit(
        train_ds,
        validation_data = val_ds,
        epochs = EPOCH_START + EPOCHS,
        steps_per_epoch = TRAIN_STEPS,
        class_weight = class_weight_dict,
        initial_epoch = EPOCH_START,
        callbacks = [earlystop, tensorboard]
    )

Epoch 4/6
Epoch 5/6
Epoch 6/6
CPU times: user 4min 37s, sys: 3min 11s, total: 7min 48s
Wall time: 8min 18s


In [19]:
model.evaluate(val_ds)



[3.7088823318481445, 0.06015913188457489]

In [20]:
%%time

sample_input = X_train[sample_id]
print(f"Input:\n{sample_input}")

pred_output = ''

for i in trange(CHARS_TO_PREDICT, desc = "Predicting chars", unit = " char"):
    pred = model.predict([sample_input], verbose = False)
    pred_char_id = pred.argmax()
    pred_char = vocab[pred_char_id]
    pred_output += pred_char
    sample_input = sample_input[1:] + pred_char

print(f"Output:\n{pred_output}")

Input:
attacks of jealousy which have amounted to frenzy. It is conjectured that it was in one of these that she committed the terrible crime which has caused such a sensation in London. Her movements upon the Monday night have not yet been traced, but it is undoubted that a woman answering to her description attracted much attention at Charing Cross Station on Tuesday morning by the wildness of her appearance and the violence of her gestures. It is probable, therefore, that the crime was either committed when ins


Predicting chars:   0%|          | 0/256 [00:00<?, ? char/s]

Output:
oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo
CPU times: user 36.1 s, sys: 5.59 s, total: 41.7 s
Wall time: 37.9 s


In [21]:
checkpoint.step.assign_add(len(history.epoch))
ckpt_manager.save()

'ckpt/V1/ckpt-2'

In [22]:
model.save(MODELS_DIR)

