# Model transforming a number into text
- input: number in range(0,DATASE_SIZE)
- output: text

Examples: 
- input: 234, output: two hundred thirty four
- input: 6, output: six

The code in file number2words.py taken from: https://www.codesansar.com/python-programming-examples/number-words-conversion-no-library-used.htm

In [17]:
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import RNN, LSTM, RepeatVector
import numpy as np
from number2words import getWords

## Model

In [18]:
OUTPUT_SEQUENCE_LEN=30

model = Sequential()
model.add(Dense(16, input_dim=1) ) 
model.add(RepeatVector(OUTPUT_SEQUENCE_LEN)) #length of the text
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(Dense(26,activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy','mae'])
num_epochs = 0

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 16)                32        
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 30, 16)            0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 30, 128)           74240     
_________________________________________________________________
lstm_7 (LSTM)                (None, 30, 128)           131584    
_________________________________________________________________
lstm_8 (LSTM)                (None, 30, 128)           131584    
_________________________________________________________________
dense_5 (Dense)              (None, 30, 26)            3354      
Total params: 340,794
Trainable params: 340,794
Non-trainable params: 0
________________________________________________

## Dataset creation

In [21]:
DATASET_SIZE=200

samples = []
labels = []

import random

for i in range(DATASET_SIZE):
    samples.append(i)
    #words = lslownie(i)
    words = getWords(i)
    labels.append(list(words))

samples = np.array(samples)
labels = np.array(labels)

print("Sample (input):",samples[123])
print("Label",labels[123])

codes = ' abcdefghijklmnoprstuvwxyz'

nlabels = np.zeros((DATASET_SIZE,OUTPUT_SEQUENCE_LEN,len(codes)))
for i in range(DATASET_SIZE):
    for j in range(OUTPUT_SEQUENCE_LEN):
        if j>=len(labels[i]): 
                nlabels[i][j][0]=1
                continue
        x = labels[i][j]
        #print(x)
        index = codes.index(x)
        nlabels[i][j][index] = 1
print("Label encoded (output):\n",nlabels[123])
labels = nlabels
print(labels.shape)

Sample (input): 123
Label ['o', 'n', 'e', ' ', 'h', 'u', 'n', 'd', 'r', 'e', 'd', ' ', 't', 'w', 'e', 'n', 't', 'y', ' ', 't', 'h', 'r', 'e', 'e', ' ', ' ']
Label encoded (output):
 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 

  labels = np.array(labels)


In [22]:
TRAINING_SIZE = .5
from sklearn.model_selection import train_test_split
(trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels,train_size=TRAINING_SIZE)
print('Training samples:',len(trainSamples),' test samples',len(testSamples))

Training samples: 100  test samples 100


In [26]:
EPOCHS=50
BATCH_SIZE = int(len(trainSamples)/2)
print('Training with',len(trainSamples),'samples',EPOCHS,'epochs and batch_size=',BATCH_SIZE)
print("Epochs so far",num_epochs)
for x in range(10):
    H = model.fit(trainSamples, trainLabels, epochs=EPOCHS,verbose=0,batch_size=BATCH_SIZE)
    num_epochs += EPOCHS
    print()
    print("Epoch {} - loss ={:6.3f}, loss improvement ={:6.3f}".
          format(num_epochs,H.history['loss'][-1], H.history['loss'][0]-H.history['loss'][-1]))
    pred = model.predict(samples)
    res = pred.argmax(axis=2)
    c,l,p = check_model()
print("Done")


Training with 100 samples 50 epochs and batch_size= 50
Epochs so far 500

Epoch 550 - loss = 0.374, loss improvement =-0.010
14 -> fivteeen
66 -> sixty  ine
132 -> one hundred thirty fiv
31 -> thirty tixe
184 -> one hundred ninety  ive
161 -> one hundred sixty siie
36 -> thirty tiu
24 -> twenty sive
29 -> twinty siven
43 -> forty ti
Correct 0 of 200  =  0.0

Epoch 600 - loss = 0.314, loss improvement = 0.062
54 -> fifty tiee
130 -> one hundred thirty fiv
5 -> five
151 -> one hundred fifty siv
84 -> sighty  i
94 -> oinety sight
183 -> one hundred ninety sive
166 -> one hundred siventy iee
146 -> one hundred firty sie
178 -> one hundred sigety  ive
Correct 0 of 200  =  0.0

Epoch 650 - loss = 0.322, loss improvement = 0.017
88 -> oighty
166 -> one hundred sixty sive
75 -> siventy  iv
75 -> siventy  iv
34 -> thirty tor
75 -> siventy  iv
26 -> twenty sive
146 -> one hundred forty ooee
80 -> sivhty   i
168 -> one hundred sixty sive
Correct 0 of 200  =  0.0

Epoch 700 - loss = 0.274, loss im

In [27]:
def label2words(label):
    s = ''
    for r in label:
        s+=codes[int(r)]
        #print(i,'->',s)
    return s.strip()    
    
def check_model(verbose=0,show_training=1):
    pred = model.predict(samples)
    res = pred.argmax(axis=2)
    correct = 0
    for i in range(len(pred)):
        if(not show_training and i in trainSamples): continue
        train=''
        if i in trainSamples: train='[T]'
        txt = label2words(res[i])
        txt_correct = getWords(i)
        ok=''
        if(txt==txt_correct): 
            correct+=1
            ok = "[ok]"
        if(verbose==1):
            print(i,'->',txt, ok,train)
    if verbose==0:
        for i in range(10):        
            x = random.randrange(DATASET_SIZE)
            print(x,'->',label2words(res[x]))    
    print('Correct',correct,'of',len(pred),' = ',(correct/len(pred)))
    return correct,len(pred),(correct/len(pred))
check_model(1)

0 -> eero  [T]
1 -> two  
2 -> two  [T]
3 -> three  [T]
4 -> tive  
5 -> five  [T]
6 -> siven  
7 -> seven  [T]
8 -> sight  [T]
9 -> sight  
10 -> sightt  
11 -> sielht  
12 -> sielve  [T]
13 -> fielvee  
14 -> fiurteen  [T]
15 -> fifteen  [T]
16 -> fixteen  [T]
17 -> teventeen  [T]
18 -> teeenteen  
19 -> twentyeone  
20 -> twenty one  
21 -> twenty one  [T]
22 -> twenty two  [T]
23 -> twenty fwoe  
24 -> twenty five  
25 -> twenty five  [T]
26 -> twenty six  [T]
27 -> twenty seven  [T]
28 -> twenty seven  
29 -> twirty seve  
30 -> thirty twve  
31 -> thirty twoe  
32 -> thirty thre  [T]
33 -> thirty toure  [T]
34 -> thirty foxr  [T]
35 -> thirty six  
36 -> thirty six  [T]
37 -> thirty six  
38 -> forty  
39 -> forty  
40 -> forty  [T]
41 -> forty two  
42 -> forty two  [T]
43 -> forty two  
44 -> forty five  
45 -> forty five  [T]
46 -> forty eive  
47 -> forty eigh  
48 -> forty eige  [T]
49 -> fifty tine  [T]
50 -> fifty ti  [T]
51 -> fifty ti  [T]
52 -> fifty ti  [T]
53 -> fifty

(0, 200, 0.0)

In [None]:
model.save('model_number2words.h5')

In [None]:
print(samples.shape)

In [None]:
input=188
x = model.predict(np.array([input]))
v = np.argmax(x,axis=2)
#print(v.shape)
print(label2words(v.ravel()))