### Deep Learning in the Eye Tracking World 
#### the tutorial presented during ETRA 2021 (https://etra.acm.org/2021/acceptedtutorials.html)
#### the code downloaded from: https://github.com/kasprowski/etra2021
@author: pawel@kasprowski.pl


# Translating a number into text using RNN
- input: number in range(0,DATASE_SIZE)
- output: text

Examples: 
- input: 234, output: two hundred thirty four
- input: 6, output: six

The code in number2word.py taken from: https://www.codesansar.com/python-programming-examples/number-words-conversion-no-library-used.htm


In [1]:
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import RNN, LSTM, RepeatVector
import numpy as np
from number2word import getWords

## Model

In [2]:
OUTPUT_SEQUENCE_LEN=30

model = Sequential()
model.add(Dense(16, input_dim=1) ) 
model.add(RepeatVector(OUTPUT_SEQUENCE_LEN)) #length of the text
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(Dense(26,activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy','mae'])
num_epochs = 0

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 16)                32        
_________________________________________________________________
repeat_vector (RepeatVector) (None, 30, 16)            0         
_________________________________________________________________
lstm (LSTM)                  (None, 30, 128)           74240     
_________________________________________________________________
lstm_1 (LSTM)                (None, 30, 128)           131584    
_________________________________________________________________
lstm_2 (LSTM)                (None, 30, 128)           131584    
_________________________________________________________________
dense_1 (Dense)              (None, 30, 26)            3354      
Total params: 340,794
Trainable params: 340,794
Non-trainable params: 0
__________________________________________________

## Dataset creation

In [3]:
DATASET_SIZE=200

samples = []
labels = []

import random

for i in range(DATASET_SIZE):
    samples.append(i)
    #words = lslownie(i)
    words = getWords(i)
    labels.append(list(words))

samples = np.array(samples)
labels = np.array(labels)

print("Sample (input):",samples[5])
print("Label",labels[5])

codes = ' abcdefghijklmnoprstuvwxyz'

nlabels = np.zeros((DATASET_SIZE,OUTPUT_SEQUENCE_LEN,len(codes)))
for i in range(DATASET_SIZE):
    for j in range(OUTPUT_SEQUENCE_LEN):
        if j>=len(labels[i]): 
                nlabels[i][j][0]=1
                continue
        x = labels[i][j]
        #print(x)
        index = codes.index(x)
        nlabels[i][j][index] = 1
print("Label encoded (output):\n",nlabels[123])
labels = nlabels
print(labels.shape)

Sample (input): 5
Label ['f', 'i', 'v', 'e', ' ', ' ']
Label encoded (output):
 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]
 [1. 0.

  labels = np.array(labels)


In [4]:
TRAINING_SIZE = .5
from sklearn.model_selection import train_test_split
(trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels,train_size=TRAINING_SIZE)
print('Training samples:',len(trainSamples),' test samples',len(testSamples))

Training samples: 100  test samples 100


In [6]:
EPOCHS=50
BATCH_SIZE = int(len(trainSamples)/2)
print('Training with',len(trainSamples),'samples',EPOCHS,'epochs and batch_size=',BATCH_SIZE)
print("Epochs so far",num_epochs)
for x in range(10):
    H = model.fit(trainSamples, trainLabels, epochs=EPOCHS,verbose=0,batch_size=BATCH_SIZE)
    num_epochs += EPOCHS
    print()
    print("Epoch {} - loss ={:6.3f}, loss improvement ={:6.3f}".
          format(num_epochs,H.history['loss'][-1], H.history['loss'][0]-H.history['loss'][-1]))
    pred = model.predict(samples)
    res = pred.argmax(axis=2)
    c,l,p = check_model()
print("Done")


Training with 100 samples 50 epochs and batch_size= 50
Epochs so far 0

Epoch 50 - loss = 1.269, loss improvement = 1.819
127 -> nnhhhundddd   ttt
167 -> nnhhhunddddde  tttt
183 -> nnhhhunddddde  tttt
163 -> nnhhhunddddde  tttt
188 -> nnhhhunddddde  tttt
36 -> nn tttt
68 -> nnn
45 -> nn  tt
62 -> nn
190 -> nnhhhuuddddde  tttt
Correct 0 of 200  =  0.0

Epoch 100 - loss = 0.993, loss improvement = 0.282
190 -> nee hundred  iittt
84 -> nne tyy
146 -> nee hundred  itttt
73 -> nnetty
119 -> nee hundred  tttt
29 -> nntttt
105 -> nne huuddd
89 -> nne  yy
72 -> nnetty
66 -> nnetty
Correct 0 of 200  =  0.0

Epoch 150 - loss = 0.778, loss improvement = 0.212
41 -> oiity   ee
181 -> one hundred sietty
158 -> one hundred fiett
30 -> oiitty  iee
9 -> oieee
100 -> one hyn e
185 -> one hundred sietty
156 -> one hundred fiett
46 -> oiity   ee
184 -> one hundred sietty
Correct 0 of 200  =  0.0

Epoch 200 - loss = 0.642, loss improvement = 0.134
157 -> one hundred fihty   e
175 -> one hundred sieety
132

In [7]:
def label2words(label):
    s = ''
    for r in label:
        s+=codes[int(r)]
        #print(i,'->',s)
    return s.strip()    
    
def check_model(verbose=0,show_training=1):
    pred = model.predict(samples)
    res = pred.argmax(axis=2)
    correct = 0
    for i in range(len(pred)):
        if(not show_training and i in trainSamples): continue
        train=''
        if i in trainSamples: train='[T]'
        txt = label2words(res[i])
        txt_correct = getWords(i)
        ok=''
        if(txt==txt_correct): 
            correct+=1
            ok = "[ok]"
        if(verbose==1):
            print(i,'->',txt, ok,train)
    if verbose==0:
        for i in range(10):        
            x = random.randrange(DATASET_SIZE)
            print(x,'->',label2words(res[x]))    
    print('Correct',correct,'of',len(pred),' = ',(correct/len(pred)))
    return correct,len(pred),(correct/len(pred))
check_model(1)

0 -> oon  
1 -> one  [T]
2 -> swo  [T]
3 -> swo  
4 -> ewvn  
5 -> eevn  
6 -> eive  
7 -> eiven  [T]
8 -> sige  [T]
9 -> sive  [T]
10 -> tiee  
11 -> tieee  
12 -> tielee  [T]
13 -> tieteeen  [T]
14 -> tieteen  
15 -> tieteen  
16 -> tweteen  [T]
17 -> tweteen  
18 -> tweteen  
19 -> twetteen  [T]
20 -> tweete nn  
21 -> twenty on  [T]
22 -> twenty two  [T]
23 -> twenty tw  
24 -> thenty six  
25 -> thenty six  
26 -> thenty sev  [T]
27 -> thenty sev  [T]
28 -> thenty  e  
29 -> thirty  
30 -> thirty  [T]
31 -> thirty  [T]
32 -> thirty   ee  
33 -> thirty  ive  [T]
34 -> thiry   eee  
35 -> thrty   eee  [T]
36 -> thrty   vee  
37 -> thrty  ive  [T]
38 -> thrty  ive  
39 -> thrty  ive  [T]
40 -> thrty sive  [T]
41 -> thrty sive  
42 -> thrty sive  
43 -> tirty sive  [T]
44 -> firty sive  
45 -> firty sive  [T]
46 -> firty  ive  
47 -> fifty  ive  [T]
48 -> fifty  hve  
49 -> fifty  h e  
50 -> fifty  h  [T]
51 -> fifty  h  
52 -> fifty  h  
53 -> fifty  h  [T]
54 -> sifty  h  [T]
55 ->

(0, 200, 0.0)

In [None]:
model.save('model_number2words.h5')

In [None]:
print(samples.shape)

In [None]:
input=188
x = model.predict(np.array([input]))
v = np.argmax(x,axis=2)
#print(v.shape)
print(label2words(v.ravel()))