In [23]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from sklearn.model_selection import train_test_split

import roman_numerals as convert
import numpy as np

In [24]:
model = Sequential()
model.add(LSTM(128,input_shape=(None,1),return_sequences=True)) # sequences of singlen numbers
model.add(LSTM(128))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer="adam", metrics=['mae','mse'])
num_epochs = 0
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_9 (LSTM)               (None, None, 128)         66560     
                                                                 
 lstm_10 (LSTM)              (None, 128)               131584    
                                                                 
 dense_3 (Dense)             (None, 1)                 129       
                                                                 
Total params: 198273 (774.50 KB)
Trainable params: 198273 (774.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [25]:
DATASET_SIZE=200

samples = []
labels = []
all_words = ' '
max_len = 0
for i in range(DATASET_SIZE):
    labels.append(i)
    words = convert.convert(i)
    samples.append(words)
    all_words += words
    if len(words)>max_len:
        max_len = len(words)

print('Max len of text',max_len)

vocab = sorted(set(all_words))
vocab_size = len(vocab)
print('vocabulary (used letters)',vocab)
print ('unique characters',vocab_size)

Max len of text 9
vocabulary (used letters) [' ', 'C', 'I', 'L', 'V', 'X']
unique characters 6


In [26]:
char2idx = {char:index for index, char in enumerate(vocab)}
print('char2idx:\n',char2idx)
idx2char = np.array(vocab)
print('idx2char\n',idx2char)

char2idx:
 {' ': 0, 'C': 1, 'I': 2, 'L': 3, 'V': 4, 'X': 5}
idx2char
 [' ' 'C' 'I' 'L' 'V' 'X']


In [27]:
# helper method, converts sequence of numbers to text
def to_text(sample):
    return ''.join([idx2char[int(x)] for x in sample])
# helper method, converts text to sequence of numbers
def to_number(words):
    return np.array([char2idx[char] for char in words])

In [28]:
samples_int = []
for s in samples:
    v = np.array([char2idx[char] for char in s])
    samples_int.append(v) # different sizes!
print(samples[123],' ->becomes-> ',samples_int[123])

CXXIII  ->becomes->  [1 5 5 2 2 2]


In [29]:
samples = np.zeros((DATASET_SIZE,max_len))
for i in range(len(samples_int)):
    for j in range(len(samples_int[i])):
        samples[i,j] = np.array(samples_int[i][j]) # all not used have '0' which is ' '
print('SAMPLES\n\n',samples)
print(samples.shape)

SAMPLES

 [[0. 0. 0. ... 0. 0. 0.]
 [2. 0. 0. ... 0. 0. 0.]
 [2. 2. 0. ... 0. 0. 0.]
 ...
 [1. 5. 1. ... 0. 0. 0.]
 [1. 5. 1. ... 2. 0. 0.]
 [1. 5. 1. ... 0. 0. 0.]]
(200, 9)


In [30]:
samples = np.expand_dims(samples,axis=2) #add the third dimension
labels = np.array(labels,dtype=float)

print("Sample (for 123):\n",samples[123])
print("Sample decoded",to_text(samples[123]))
print("Label (output):",labels[123])

print('samples shape',samples.shape)
print('labels shape',labels.shape)

Sample (for 123):
 [[1.]
 [5.]
 [5.]
 [2.]
 [2.]
 [2.]
 [0.]
 [0.]
 [0.]]
Sample decoded CXXIII   
Label (output): 123.0
samples shape (200, 9, 1)
labels shape (200,)


  return ''.join([idx2char[int(x)] for x in sample])


In [31]:
TRAINING_SIZE = .5
from sklearn.model_selection import train_test_split
(trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels,train_size=TRAINING_SIZE, random_state=1)
print('Training samples:',len(trainSamples),' test samples',len(testSamples))

Training samples: 100  test samples 100


In [None]:
import random

def check_model(verbose=0,how_many=5):
    pred = model.predict(samples)
    print('text => [predicted value] error=[error]')
    error = []
    for i in range(len(pred)):
        res = samples[i]
        error.append(abs(i-pred[i]))
        if verbose==1:
            train = ''
            if i in trainLabels: train='[T]'
            print(i,to_text(res),'=> {:.2f} error = {:.2f}'.format(pred[i,0],abs(i-pred[i,0])),train)
    if verbose<1: # if not verbose just display 'how_many' random samples
        for i in range(how_many):
            x = random.randrange(DATASET_SIZE)
            res = samples[x]
            print(to_text(res),'=>  {:.2f} error = {:.2f}'.format(pred[x,0],abs(x-pred[x,0])))
    print('Mean error =',np.mean(error))
    return np.mean(error)
check_model(1)

In [32]:
EPOCHS=100
BATCH_SIZE = int(len(trainSamples)/4)
print('Training with',len(trainSamples),'samples',EPOCHS,'epochs and batch_size=',BATCH_SIZE)
for x in range(100):
    H = model.fit(trainSamples, trainLabels, epochs=EPOCHS,verbose=0,batch_size=BATCH_SIZE)
    num_epochs += EPOCHS
    print("\n{}/10 Epochs: {} - loss={:6.3f}, loss improvement={:6.3f}".
          format(x, num_epochs,H.history['loss'][-1], H.history['loss'][0]-H.history['loss'][-1]))
    check_model()
print("Done")

Training with 100 samples 100 epochs and batch_size= 25


I0000 00:00:1704119986.648677     599 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.



0/10 Epochs: 100 - loss=4534.612, loss improvement=7623.219
text => [predicted value] error=[error]
CLXXXI    =>  57.79 error = 123.21
XC        =>  57.79 error = 32.21
CLXXXVII  =>  57.79 error = 129.21
CXXXII    =>  57.79 error = 74.21
CX        =>  57.79 error = 52.21
LVII      =>  57.79 error = 0.79
XLVIII    =>  57.79 error = 9.79
CLXVIII   =>  57.79 error = 110.21
CXCIII    =>  57.79 error = 135.21
LXIV      =>  57.79 error = 6.21
Mean error = 58.629383


  return ''.join([idx2char[int(x)] for x in sample])



1/10 Epochs: 200 - loss=3176.837, loss improvement=1314.494
text => [predicted value] error=[error]
LVIII     =>  83.45 error = 25.45
CXLI      =>  83.45 error = 57.55
LXI       =>  83.45 error = 22.45
XCI       =>  83.45 error = 7.55
CLV       =>  83.45 error = 71.55
XLI       =>  83.45 error = 42.45
CLXXI     =>  83.45 error = 87.55
CXXIII    =>  83.45 error = 39.55
LII       =>  83.45 error = 31.45
CVI       =>  83.45 error = 22.55
Mean error = 50.870926

2/10 Epochs: 300 - loss=3015.490, loss improvement=156.992
text => [predicted value] error=[error]
CXVI      =>  93.56 error = 22.44
XXIII     =>  93.56 error = 70.56
CLXXXVI   =>  93.56 error = 92.44
XXVII     =>  93.56 error = 66.56
XI        =>  93.56 error = 82.56
CL        =>  93.56 error = 56.44
CXLIX     =>  93.56 error = 55.44
CXIX      =>  93.56 error = 25.44
CXXVII    =>  93.56 error = 33.44
CLXV      =>  93.56 error = 71.44
Mean error = 49.708847

3/10 Epochs: 400 - loss=3006.987, loss improvement= 8.078
text => [predic

In [39]:
# Input must be 9 symbols
x = to_number('LXXI     ')
#              xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
print(x)
x = np.expand_dims(x,axis=1)
x = np.expand_dims(x,axis=0)
model.predict(x)

[3 5 5 2 0 0 0 0 0]


array([[71.28273]], dtype=float32)

In [40]:
model.save('model_words2numbers.keras')