# Model transforming a number into text
- input: number in range(0,DATASE_SIZE)
- output: text

Examples: 
- input: 234, output: two hundred thirty four
- input: 6, output: six

The code in file number2words.py taken from: https://www.codesansar.com/python-programming-examples/number-words-conversion-no-library-used.htm

In [2]:
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import RNN, LSTM, RepeatVector
import numpy as np
from number2words import getWords

## Model

In [3]:
OUTPUT_SEQUENCE_LEN=30

model = Sequential()
model.add(Dense(16, input_dim=1) ) 
model.add(RepeatVector(OUTPUT_SEQUENCE_LEN)) #length of the text
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(Dense(26,activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy','mae'])
num_epochs = 0

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 16)                32        
_________________________________________________________________
repeat_vector (RepeatVector) (None, 30, 16)            0         
_________________________________________________________________
lstm (LSTM)                  (None, 30, 128)           74240     
_________________________________________________________________
lstm_1 (LSTM)                (None, 30, 128)           131584    
_________________________________________________________________
lstm_2 (LSTM)                (None, 30, 128)           131584    
_________________________________________________________________
dense_1 (Dense)              (None, 30, 26)            3354      
Total params: 340,794
Trainable params: 340,794
Non-trainable params: 0
__________________________________________________

## Dataset creation

In [4]:
DATASET_SIZE=200

samples = []
labels = []

import random

for i in range(DATASET_SIZE):
    samples.append(i)
    #words = lslownie(i)
    words = getWords(i)
    labels.append(list(words))

samples = np.array(samples)
labels = np.array(labels)

print("Sample (input):",samples[5])
print("Label",labels[5])

codes = ' abcdefghijklmnoprstuvwxyz'

nlabels = np.zeros((DATASET_SIZE,OUTPUT_SEQUENCE_LEN,len(codes)))
for i in range(DATASET_SIZE):
    for j in range(OUTPUT_SEQUENCE_LEN):
        if j>=len(labels[i]): 
                nlabels[i][j][0]=1
                continue
        x = labels[i][j]
        #print(x)
        index = codes.index(x)
        nlabels[i][j][index] = 1
print("Label encoded (output):\n",nlabels[123])
labels = nlabels
print(labels.shape)

Sample (input): 5
Label ['f', 'i', 'v', 'e', ' ', ' ']
Label encoded (output):
 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [1. 0.

  labels = np.array(labels)


In [5]:
TRAINING_SIZE = .5
from sklearn.model_selection import train_test_split
(trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels,train_size=TRAINING_SIZE)
print('Training samples:',len(trainSamples),' test samples',len(testSamples))

Training samples: 100  test samples 100


In [8]:
EPOCHS=50
BATCH_SIZE = int(len(trainSamples)/2)
print('Training with',len(trainSamples),'samples',EPOCHS,'epochs and batch_size=',BATCH_SIZE)
print("Epochs so far",num_epochs)
for x in range(10):
    H = model.fit(trainSamples, trainLabels, epochs=EPOCHS,verbose=0,batch_size=BATCH_SIZE)
    num_epochs += EPOCHS
    print()
    print("Epoch {} - loss ={:6.3f}, loss improvement ={:6.3f}".
          format(num_epochs,H.history['loss'][-1], H.history['loss'][0]-H.history['loss'][-1]))
    pred = model.predict(samples)
    res = pred.argmax(axis=2)
    c,l,p = check_model()
print("Done")


Training with 100 samples 50 epochs and batch_size= 50
Epochs so far 50

Epoch 100 - loss = 0.959, loss improvement = 0.263
80 -> one yyy
195 -> nne hundred  ittyyy
182 -> nne hundred  ittyyy
178 -> nne hundred  ittyyy
40 -> oittty  e
37 -> oittty  ee
2 -> ooe
21 -> oitttt
146 -> nne hundrdd  ittyy
155 -> nne hundred  ittyy
Correct 0 of 200  =  0.0

Epoch 150 - loss = 0.761, loss improvement = 0.191
103 -> one hundred  iee
163 -> one hundred  ietyy  oe
10 -> oiiee
132 -> one hundred fietty  ee
158 -> one hundred  ietyy  oe
132 -> one hundred fietty  ee
199 -> one hundred  iitty  oe
8 -> oiee
3 -> ooe
78 -> onetty  iee
Correct 0 of 200  =  0.0

Epoch 200 - loss = 0.615, loss improvement = 0.146
106 -> one hundred  iee
164 -> one hundred siety  oee
84 -> oieety  ie
147 -> one hundred fitty  iee
1 -> ooo
31 -> oirtty  ie
85 -> oieety  ie
97 -> oneety  iee
133 -> one hundred tietty  ie
193 -> one hundred sieety
Correct 0 of 200  =  0.0

Epoch 250 - loss = 0.554, loss improvement = 0.070
12

In [9]:
def label2words(label):
    s = ''
    for r in label:
        s+=codes[int(r)]
        #print(i,'->',s)
    return s.strip()    
    
def check_model(verbose=0,show_training=1):
    pred = model.predict(samples)
    res = pred.argmax(axis=2)
    correct = 0
    for i in range(len(pred)):
        if(not show_training and i in trainSamples): continue
        train=''
        if i in trainSamples: train='[T]'
        txt = label2words(res[i])
        txt_correct = getWords(i)
        ok=''
        if(txt==txt_correct): 
            correct+=1
            ok = "[ok]"
        if(verbose==1):
            print(i,'->',txt, ok,train)
    if verbose==0:
        for i in range(10):        
            x = random.randrange(DATASET_SIZE)
            print(x,'->',label2words(res[x]))    
    print('Correct',correct,'of',len(pred),' = ',(correct/len(pred)))
    return correct,len(pred),(correct/len(pred))
check_model(1)

0 -> trro  [T]
1 -> two  
2 -> two  [T]
3 -> three  [T]
4 -> three  
5 -> tige  
6 -> tige  
7 -> tinh  
8 -> tinh  [T]
9 -> tin  [T]
10 -> tin  [T]
11 -> tinh  
12 -> tinrt  
13 -> tinrteen  [T]
14 -> tifteenn  
15 -> tifteen  [T]
16 -> tigteen  [T]
17 -> tigttenn  
18 -> tietteen  [T]
19 -> tientye  
20 -> twenty  [T]
21 -> twenty  he  
22 -> twenty three  
23 -> twenty three  [T]
24 -> twenty sire  
25 -> twenty six  
26 -> twenty six  [T]
27 -> twenty six  
28 -> twenty nin  
29 -> twenty nin  [T]
30 -> thinty nnn  
31 -> thirty nnn  [T]
32 -> thirty nin  
33 -> thirty fiu  
34 -> thirty fiur  [T]
35 -> thirty five  [T]
36 -> thirty five  
37 -> thirty siven  [T]
38 -> fhrrty eivht  [T]
39 -> forty  
40 -> forty  [T]
41 -> forty  [T]
42 -> forty  i  
43 -> forty oige  [T]
44 -> forty oige  
45 -> forty nige  [T]
46 -> forty nige  [T]
47 -> forty nige  
48 -> forty nige  [T]
49 -> forty nige  [T]
50 -> fofty nige  [T]
51 -> fifty nige  [T]
52 -> fifty nige  
53 -> fifty nige  [T]
54

(0, 200, 0.0)

In [None]:
model.save('model_number2words.h5')

In [None]:
print(samples.shape)

In [None]:
input=188
x = model.predict(np.array([input]))
v = np.argmax(x,axis=2)
#print(v.shape)
print(label2words(v.ravel()))