##### Import the libraries

In [4]:
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [5]:
lines = ['It was a nice rainy day.','The things are so beatiful in his point.',
         'When your focus is clear, you won.','Many many happy returns of the day.']

In [6]:
lines

['It was a nice rainy day.',
 'The things are so beatiful in his point.',
 'When your focus is clear, you won.',
 'Many many happy returns of the day.']

In [7]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)

In [8]:
tokenizer.word_docs

defaultdict(int,
            {'was': 1,
             'day': 2,
             'rainy': 1,
             'it': 1,
             'nice': 1,
             'a': 1,
             'his': 1,
             'point': 1,
             'in': 1,
             'things': 1,
             'the': 2,
             'beatiful': 1,
             'are': 1,
             'so': 1,
             'you': 1,
             'clear': 1,
             'focus': 1,
             'your': 1,
             'won': 1,
             'when': 1,
             'is': 1,
             'returns': 1,
             'many': 1,
             'of': 1,
             'happy': 1})

In [9]:
tokenizer.word_index

{'day': 1,
 'the': 2,
 'many': 3,
 'it': 4,
 'was': 5,
 'a': 6,
 'nice': 7,
 'rainy': 8,
 'things': 9,
 'are': 10,
 'so': 11,
 'beatiful': 12,
 'in': 13,
 'his': 14,
 'point': 15,
 'when': 16,
 'your': 17,
 'focus': 18,
 'is': 19,
 'clear': 20,
 'you': 21,
 'won': 22,
 'happy': 23,
 'returns': 24,
 'of': 25}

In [10]:
mat = tokenizer.texts_to_matrix(lines)
mat

array([[0., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 1., 1., 1., 1., 1., 1., 0., 0., 0.],
       [0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 1., 1.]])

In [11]:
mat.shape

(4, 26)

In [12]:
seq =  tokenizer.texts_to_sequences(lines)

In [13]:
seq

[[4, 5, 6, 7, 8, 1],
 [2, 9, 10, 11, 12, 13, 14, 15],
 [16, 17, 18, 19, 20, 21, 22],
 [3, 3, 23, 24, 25, 2, 1]]

In [14]:
padded = pad_sequences(seq, maxlen=10, padding= 'post' )

In [15]:
padded

array([[ 4,  5,  6,  7,  8,  1,  0,  0,  0,  0],
       [ 2,  9, 10, 11, 12, 13, 14, 15,  0,  0],
       [16, 17, 18, 19, 20, 21, 22,  0,  0,  0],
       [ 3,  3, 23, 24, 25,  2,  1,  0,  0,  0]], dtype=int32)

In [16]:
padded = pad_sequences(seq, maxlen=10, padding= 'pre' )
padded

array([[ 0,  0,  0,  0,  4,  5,  6,  7,  8,  1],
       [ 0,  0,  2,  9, 10, 11, 12, 13, 14, 15],
       [ 0,  0,  0, 16, 17, 18, 19, 20, 21, 22],
       [ 0,  0,  0,  3,  3, 23, 24, 25,  2,  1]], dtype=int32)

In [17]:
padded.shape

(4, 10)