In [1]:
import numpy as np

docs = ['Stunning film.',
        'Great story, acting.',
        'Predictable plot, good.',
        'Emotional and touching.',
        'Nice direction, script.',
        'Soundtrack fits well.',
        'Outstanding cast.',
        'Slow pacing, visuals.',
        'Memorable masterpiece.',
        'Challenges norms.']

In [2]:
from keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()

In [3]:
tokenizer.fit_on_texts(docs)

In [4]:
len(tokenizer.word_index)

26

In [5]:
tokenizer.word_index

{'stunning': 1,
 'film': 2,
 'great': 3,
 'story': 4,
 'acting': 5,
 'predictable': 6,
 'plot': 7,
 'good': 8,
 'emotional': 9,
 'and': 10,
 'touching': 11,
 'nice': 12,
 'direction': 13,
 'script': 14,
 'soundtrack': 15,
 'fits': 16,
 'well': 17,
 'outstanding': 18,
 'cast': 19,
 'slow': 20,
 'pacing': 21,
 'visuals': 22,
 'memorable': 23,
 'masterpiece': 24,
 'challenges': 25,
 'norms': 26}

In [6]:
tokenizer.word_counts

OrderedDict([('stunning', 1),
             ('film', 1),
             ('great', 1),
             ('story', 1),
             ('acting', 1),
             ('predictable', 1),
             ('plot', 1),
             ('good', 1),
             ('emotional', 1),
             ('and', 1),
             ('touching', 1),
             ('nice', 1),
             ('direction', 1),
             ('script', 1),
             ('soundtrack', 1),
             ('fits', 1),
             ('well', 1),
             ('outstanding', 1),
             ('cast', 1),
             ('slow', 1),
             ('pacing', 1),
             ('visuals', 1),
             ('memorable', 1),
             ('masterpiece', 1),
             ('challenges', 1),
             ('norms', 1)])

In [7]:
sequences = tokenizer.texts_to_sequences(docs)
sequences

[[1, 2],
 [3, 4, 5],
 [6, 7, 8],
 [9, 10, 11],
 [12, 13, 14],
 [15, 16, 17],
 [18, 19],
 [20, 21, 22],
 [23, 24],
 [25, 26]]

In [12]:
from keras.utils import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding
# Pad the sequences
max_length = max(len(seq) for seq in sequences)
sequences = pad_sequences(sequences,padding= 'post')
sequences

array([[ 1,  2,  0],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19,  0],
       [20, 21, 22],
       [23, 24,  0],
       [25, 26,  0]], dtype=int32)

In [13]:
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index), output_dim=2, input_length=max_length))   # max_length = 3, 26 X 2

# Print the model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 2)              52        
                                                                 
Total params: 52 (208.00 Byte)
Trainable params: 52 (208.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [15]:
pred = model.predict(sequences)
print(pred)

[[[ 0.04131741  0.04128078]
  [-0.03770704 -0.03221248]
  [ 0.04841686  0.03058975]]

 [[ 0.00796358 -0.03325834]
  [-0.0270611  -0.04346522]
  [-0.04452697  0.02701047]]

 [[-0.0470116   0.04780244]
  [-0.01436891 -0.01018611]
  [-0.01607408  0.03352863]]

 [[ 0.04879181  0.04234275]
  [ 0.00588921  0.02602867]
  [ 0.03151019 -0.03111534]]

 [[-0.03905357  0.01963573]
  [-0.00091736 -0.03813704]
  [-0.04206625  0.03889582]]

 [[ 0.03940114  0.02034206]
  [-0.00399048  0.0419041 ]
  [-0.04233841  0.03150317]]

 [[ 0.03806144  0.04392285]
  [-0.02833618  0.0034112 ]
  [ 0.04841686  0.03058975]]

 [[ 0.0354488  -0.04583224]
  [ 0.00653279 -0.00461817]
  [-0.04331116  0.00243487]]

 [[-0.02455602 -0.02951725]
  [ 0.01435337 -0.00323582]
  [ 0.04841686  0.03058975]]

 [[-0.02269374  0.01391434]
  [ 0.          0.        ]
  [ 0.04841686  0.03058975]]]


In [16]:
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras import Sequential
from keras.layers import Dense,SimpleRNN,Embedding,Flatten

In [17]:
(X_train,y_train),(X_test,y_test) = imdb.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [18]:
X_train = pad_sequences(X_train,padding='post',maxlen=50)
X_test = pad_sequences(X_test,padding='post',maxlen=50)

In [19]:
X_train.shape

(25000, 50)

In [20]:
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=2, input_length=50))
model.add(SimpleRNN(32,return_sequences=False))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 50, 2)             20000     
                                                                 
 simple_rnn (SimpleRNN)      (None, 32)                1120      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 21153 (82.63 KB)
Trainable params: 21153 (82.63 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [21]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(X_train, y_train,epochs=5,validation_data=(X_test,y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [22]:
predictions = model.predict(X_test)

# Print the first 10 predictions and their corresponding true labels
for i in range(10):
    print(f"Prediction: {predictions[i]}, True Label: {y_test[i]}")

Prediction: [0.17154767], True Label: 0
Prediction: [0.969304], True Label: 1
Prediction: [0.8653559], True Label: 1
Prediction: [0.21659173], True Label: 0
Prediction: [0.9013918], True Label: 1
Prediction: [0.12729345], True Label: 1
Prediction: [0.7791731], True Label: 1
Prediction: [0.14491707], True Label: 0
Prediction: [0.21059997], True Label: 0
Prediction: [0.94720775], True Label: 1
