In [1]:
import tensorflow 
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Dropout
from keras.preprocessing.sequence import pad_sequences
from keras.datasets import imdb
from keras.regularizers import l2

In [2]:
(X_train, y_train),(X_test, y_test) = imdb.load_data(num_words=50000)

In [3]:
X_train.shape

(25000,)

In [4]:
len(X_train[0])

218

In [5]:
#padding 
X_train = pad_sequences(X_train, padding='post', maxlen=100)
X_test = pad_sequences(X_test, padding='post', maxlen=100)

In [6]:
X_train.shape

(25000, 100)

In [7]:
# Convert labels to float32
import numpy as np
y_train = np.array(y_train).astype('float32')
y_test = np.array(y_test).astype('float32')

In [8]:
model = Sequential([
    Embedding(input_dim=50000, output_dim=2, input_length=100),
    SimpleRNN(32, return_sequences=False, kernel_regularizer=l2(0.01)),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 2)            100000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 32)                1120      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 101153 (395.13 KB)
Trainable params: 101153 (395.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [9]:
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x140f7cbfb50>

In [10]:
loss, accuracy = model.evaluate(X_test, y_test)



In [11]:
X_test[0]

array([    1,   591,   202,    14,    31,     6,   717,    10,    10,
       18142, 10698,     5,     4,   360,     7,     4,   177,  5760,
         394,   354,     4,   123,     9,  1035,  1035,  1035,    10,
          10,    13,    92,   124,    89,   488,  7944,   100,    28,
        1668,    14,    31,    23,    27,  7479,    29,   220,   468,
           8,   124,    14,   286,   170,     8,   157,    46,     5,
          27,   239,    16,   179, 15387,    38,    32,    25,  7944,
         451,   202,    14,     6,   717,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0])

In [12]:
y_test[0]

0.0

In [13]:
y_test

array([0., 1., 1., ..., 0., 0., 0.], dtype=float32)

In [14]:
y_pred = model.predict(X_test)



In [15]:
y_pred  #y_pred stores the probablity values for sentiment if negative < 0.5 and if positive > 0.5

array([[0.04846063],
       [0.9842697 ],
       [0.93202406],
       ...,
       [0.03957693],
       [0.11344127],
       [0.06923461]], dtype=float32)

In [16]:
y_pred = (y_pred > 0.5).astype("int32")
y_pred

array([[0],
       [1],
       [1],
       ...,
       [0],
       [0],
       [0]])

In [17]:
from sklearn.metrics import classification_report
report = classification_report(y_test, y_pred, target_names=['Negative', 'Positive'])
print(report)

              precision    recall  f1-score   support

    Negative       0.79      0.90      0.84     12500
    Positive       0.88      0.77      0.82     12500

    accuracy                           0.83     25000
   macro avg       0.84      0.83      0.83     25000
weighted avg       0.84      0.83      0.83     25000



In [26]:
#review prediction model
from keras.preprocessing.text import Tokenizer
import numpy as np
review = ["Awesome, worth of money movie. Liked it very much. Best movie i have ever seen, it has great story , amazing plot , best character development, great budget. It has good music and vfx is actually the best. Best use of my money. Watching this movie again. Everyone should see it and hope it does well."]

In [27]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(review)

In [28]:
tokenizer.word_index

{'it': 1,
 'best': 2,
 'movie': 3,
 'of': 4,
 'money': 5,
 'has': 6,
 'great': 7,
 'and': 8,
 'awesome': 9,
 'worth': 10,
 'liked': 11,
 'very': 12,
 'much': 13,
 'i': 14,
 'have': 15,
 'ever': 16,
 'seen': 17,
 'story': 18,
 'amazing': 19,
 'plot': 20,
 'character': 21,
 'development': 22,
 'budget': 23,
 'good': 24,
 'music': 25,
 'vfx': 26,
 'is': 27,
 'actually': 28,
 'the': 29,
 'use': 30,
 'my': 31,
 'watching': 32,
 'this': 33,
 'again': 34,
 'everyone': 35,
 'should': 36,
 'see': 37,
 'hope': 38,
 'does': 39,
 'well': 40}

In [29]:
sequences = tokenizer.texts_to_sequences(review)
print(sequences)

[[9, 10, 4, 5, 3, 11, 1, 12, 13, 2, 3, 14, 15, 16, 17, 1, 6, 7, 18, 19, 20, 2, 21, 22, 7, 23, 1, 6, 24, 25, 8, 26, 27, 28, 29, 2, 2, 30, 4, 31, 5, 32, 33, 3, 34, 35, 36, 37, 1, 8, 38, 1, 39, 40]]


In [30]:
sequences = pad_sequences(sequences, padding='post', maxlen=100)
sequences

array([[ 9, 10,  4,  5,  3, 11,  1, 12, 13,  2,  3, 14, 15, 16, 17,  1,
         6,  7, 18, 19, 20,  2, 21, 22,  7, 23,  1,  6, 24, 25,  8, 26,
        27, 28, 29,  2,  2, 30,  4, 31,  5, 32, 33,  3, 34, 35, 36, 37,
         1,  8, 38,  1, 39, 40,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0]])

In [31]:
prediction = model.predict(sequences)



In [32]:
prediction

array([[0.8899512]], dtype=float32)

In [33]:
print(f'Postive' if prediction > 0.5 else 'Negative')

Postive
