In [101]:
text = """This course covers all the steps that one should take to create an image classification model using Convolutional Neural Networks. 
Most courses only focus on teaching how to run the analysis but we believe that having a strong theoretical understanding of the concepts enables us to create a good model . 
And after running the analysis, one should be able to judge how good the model is and interpret the results to actually be able to help the business."""

In [102]:
import tensorflow as tf 
from tensorflow.keras.preprocessing.text import Tokenizer 


In [103]:
#Initiate the tokenizer 
tokenizer = Tokenizer() 
tokenizer.fit_on_texts([text])


In [104]:
len(tokenizer.word_index)

54

In [105]:
len(text)

456

In [106]:
for sentence in text.split('\n'): 
    print(sentence)


This course covers all the steps that one should take to create an image classification model using Convolutional Neural Networks. 
Most courses only focus on teaching how to run the analysis but we believe that having a strong theoretical understanding of the concepts enables us to create a good model . 
And after running the analysis, one should be able to judge how good the model is and interpret the results to actually be able to help the business.


In [107]:
for sentence in text.split('\n'): 
    print(tokenizer.texts_to_sequences([sentence])[0])


[15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7, 21, 22, 23, 3, 24, 25, 26, 27]
[28, 29, 30, 31, 32, 33, 8, 2, 34, 1, 9, 35, 36, 37, 4, 38, 10, 39, 40, 41, 42, 1, 43, 44, 45, 2, 7, 10, 11, 3]
[12, 46, 47, 1, 9, 5, 6, 13, 14, 2, 48, 8, 11, 1, 3, 49, 12, 50, 1, 51, 2, 52, 13, 14, 2, 53, 1, 54]


In [108]:
input_sequence = [] 
for sentence in text.split('\n'): 
    tokenizer_sentence =tokenizer.texts_to_sequences([sentence])[0]
    for i in range(1, len(tokenizer_sentence)): 
        input_sequence.append(tokenizer_sentence[:i+1])

In [109]:
input_sequence

[[15, 16],
 [15, 16, 17],
 [15, 16, 17, 18],
 [15, 16, 17, 18, 1],
 [15, 16, 17, 18, 1, 19],
 [15, 16, 17, 18, 1, 19, 4],
 [15, 16, 17, 18, 1, 19, 4, 5],
 [15, 16, 17, 18, 1, 19, 4, 5, 6],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7, 21],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7, 21, 22],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7, 21, 22, 23],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7, 21, 22, 23, 3],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7, 21, 22, 23, 3, 24],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7, 21, 22, 23, 3, 24, 25],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7, 21, 22, 23, 3, 24, 25, 26],
 [15, 16, 17, 18, 1, 19, 4, 5, 6, 20, 2, 7, 21, 22, 23, 3, 24, 25, 26, 27],
 [28, 29],
 [28, 29, 30],
 [28, 29, 30, 31],
 [28, 29, 30, 31, 32],
 [28, 29, 30, 31, 32, 33],
 [28, 29, 30, 31, 32, 33, 8],
 [28, 29, 30, 31, 32, 33, 8, 2],
 [28, 29, 30, 31, 32, 33, 8,

In [110]:
[len(x) for x in input_sequence]

[2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28]

In [111]:
max([len(x) for x in input_sequence])

30

In [112]:
max_len = max([len(x) for x in input_sequence])

In [113]:
from tensorflow.keras.preprocessing.sequence import pad_sequences 
padded_input_sequences = pad_sequences(input_sequence, maxlen = max_len , padding = 'pre')

In [114]:
padded_input_sequences

array([[ 0,  0,  0, ...,  0, 15, 16],
       [ 0,  0,  0, ..., 15, 16, 17],
       [ 0,  0,  0, ..., 16, 17, 18],
       ...,
       [ 0,  0,  0, ..., 14,  2, 53],
       [ 0,  0,  0, ...,  2, 53,  1],
       [ 0,  0, 12, ..., 53,  1, 54]], dtype=int32)

In [115]:
X = padded_input_sequences[:, :-1]
y = padded_input_sequences[:,-1]

In [116]:
X

array([[ 0,  0,  0, ...,  0,  0, 15],
       [ 0,  0,  0, ...,  0, 15, 16],
       [ 0,  0,  0, ..., 15, 16, 17],
       ...,
       [ 0,  0,  0, ..., 13, 14,  2],
       [ 0,  0,  0, ..., 14,  2, 53],
       [ 0,  0, 12, ...,  2, 53,  1]], dtype=int32)

In [117]:
y

array([16, 17, 18,  1, 19,  4,  5,  6, 20,  2,  7, 21, 22, 23,  3, 24, 25,
       26, 27, 29, 30, 31, 32, 33,  8,  2, 34,  1,  9, 35, 36, 37,  4, 38,
       10, 39, 40, 41, 42,  1, 43, 44, 45,  2,  7, 10, 11,  3, 46, 47,  1,
        9,  5,  6, 13, 14,  2, 48,  8, 11,  1,  3, 49, 12, 50,  1, 51,  2,
       52, 13, 14,  2, 53,  1, 54], dtype=int32)

In [118]:
tokenizer.word_index

{'the': 1,
 'to': 2,
 'model': 3,
 'that': 4,
 'one': 5,
 'should': 6,
 'create': 7,
 'how': 8,
 'analysis': 9,
 'a': 10,
 'good': 11,
 'and': 12,
 'be': 13,
 'able': 14,
 'this': 15,
 'course': 16,
 'covers': 17,
 'all': 18,
 'steps': 19,
 'take': 20,
 'an': 21,
 'image': 22,
 'classification': 23,
 'using': 24,
 'convolutional': 25,
 'neural': 26,
 'networks': 27,
 'most': 28,
 'courses': 29,
 'only': 30,
 'focus': 31,
 'on': 32,
 'teaching': 33,
 'run': 34,
 'but': 35,
 'we': 36,
 'believe': 37,
 'having': 38,
 'strong': 39,
 'theoretical': 40,
 'understanding': 41,
 'of': 42,
 'concepts': 43,
 'enables': 44,
 'us': 45,
 'after': 46,
 'running': 47,
 'judge': 48,
 'is': 49,
 'interpret': 50,
 'results': 51,
 'actually': 52,
 'help': 53,
 'business': 54}

In [119]:
from tensorflow.keras.utils import to_categorical 
y = to_categorical(y, num_classes = 55)

In [120]:
y.shape

(75, 55)

In [121]:
X.shape

(75, 29)

Model Building

In [122]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Embedding, LSTM, Dense 

In [123]:
model = Sequential() 
model.add(Embedding(55, 100))
model.add(LSTM(150))
model.add(Dense(55, activation='softmax')) 

model.build(input_shape=(None, 29))

In [124]:
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics =['accuracy'])

In [125]:
model.summary()

In [126]:
model.fit(X, y , epochs = 100)

Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 61ms/step - accuracy: 0.0133 - loss: 4.0114
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.0800 - loss: 3.9937
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.0933 - loss: 3.9771
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.0933 - loss: 3.9411
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.0933 - loss: 3.8741
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.0933 - loss: 3.8467
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.0800 - loss: 3.7851 
Epoch 8/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - accuracy: 0.1067 - loss: 3.7675 
Epoch 9/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x20231972650>

In [127]:
#Test the model 
text2 = "judge"
#tokenization 
token_text = tokenizer.texts_to_sequences(['text2'])[0]
#padding 
padded_text = pad_sequences([token_text], maxlen=33, padding='pre')
#model prediction 
model.predict(padded_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 597ms/step


array([[7.1693430e-05, 2.7341690e-02, 3.3658484e-04, 6.1493301e-05,
        8.4218191e-04, 7.4818596e-04, 4.3982064e-04, 5.1661795e-05,
        1.2584690e-03, 3.9689527e-03, 5.6075682e-05, 9.0047462e-05,
        1.0750622e-04, 3.5798189e-04, 1.4843378e-03, 7.4838797e-05,
        3.4207186e-01, 6.7092776e-02, 3.2264881e-02, 2.5783733e-03,
        3.0975151e-04, 3.6543741e-05, 7.4900054e-05, 5.7053123e-05,
        2.3078002e-05, 1.8180572e-05, 1.0635105e-04, 3.0375331e-05,
        5.8474976e-05, 2.8362632e-01, 2.0067956e-02, 7.4113542e-03,
        2.2868784e-03, 2.1746673e-03, 3.7302531e-04, 1.5817203e-04,
        8.6100648e-05, 6.6202716e-05, 8.3102415e-05, 5.4982796e-05,
        3.0228499e-05, 7.3197174e-05, 1.4033906e-04, 5.6234113e-04,
        2.8208856e-04, 2.3200501e-04, 1.6154639e-01, 3.7057601e-02,
        3.1942016e-04, 1.9799494e-04, 1.8664441e-04, 4.1993079e-04,
        1.7519230e-04, 2.4784118e-04, 1.2591678e-04]], dtype=float32)

In [128]:
import numpy as np 
np.argmax(model.predict(padded_text))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step


np.int64(16)

In [129]:
tokenizer.word_index

{'the': 1,
 'to': 2,
 'model': 3,
 'that': 4,
 'one': 5,
 'should': 6,
 'create': 7,
 'how': 8,
 'analysis': 9,
 'a': 10,
 'good': 11,
 'and': 12,
 'be': 13,
 'able': 14,
 'this': 15,
 'course': 16,
 'covers': 17,
 'all': 18,
 'steps': 19,
 'take': 20,
 'an': 21,
 'image': 22,
 'classification': 23,
 'using': 24,
 'convolutional': 25,
 'neural': 26,
 'networks': 27,
 'most': 28,
 'courses': 29,
 'only': 30,
 'focus': 31,
 'on': 32,
 'teaching': 33,
 'run': 34,
 'but': 35,
 'we': 36,
 'believe': 37,
 'having': 38,
 'strong': 39,
 'theoretical': 40,
 'understanding': 41,
 'of': 42,
 'concepts': 43,
 'enables': 44,
 'us': 45,
 'after': 46,
 'running': 47,
 'judge': 48,
 'is': 49,
 'interpret': 50,
 'results': 51,
 'actually': 52,
 'help': 53,
 'business': 54}

In [130]:
for word, index in tokenizer.word_index.items(): 
    if index == 29:
          print(word)

courses


In [133]:
#Test the model 
text3 = "This course "
#tokenization 
token_text3 = tokenizer.texts_to_sequences(['text3'])[0]
#padding 
padded_text3 = pad_sequences([token_text3], maxlen=max_len, padding='pre')
#model prediction 
model.predict(padded_text3)

import numpy as np 
pos = np.argmax(model.predict(padded_text3))

for word, index in tokenizer.word_index.items(): 
    if index == pos:
          print(word)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 604ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
course
