In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
!pip install tensorflow



In [4]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb

In [6]:
#load imdb dataset
vocab_size = 10000
maxlen = 100

(x_train,y_train),(x_test,y_test) = imdb.load_data(num_words=vocab_size)

In [7]:
x_train = pad_sequences(x_train,maxlen=maxlen)
x_test = pad_sequences(x_test,maxlen=maxlen)

In [8]:
word_to_index = imdb.get_word_index()
index_to_word = {index + 3: word for word, index in word_to_index.items()}
index_to_word[0] = "[PAD]"
index_to_word[1] = "[START]"
index_to_word[2] = "[UNK]"

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [10]:
#RNN model
model = Sequential([
    Embedding(input_dim=vocab_size,output_dim=32,input_length=maxlen),  #Embeddinglayer
    SimpleRNN(32,activation='tanh'),                                    #RNN layer
    Dense(1,activation='sigmoid')                                      #outputlayer
])



In [11]:
model.compile(optimizer = 'adam', loss = 'binary_crossentropy',metrics=['accuracy'])

In [12]:
hist = model.fit(x_train,y_train,epochs=10,batch_size=64,validation_split=0.2)

Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - accuracy: 0.5867 - loss: 0.6522 - val_accuracy: 0.8068 - val_loss: 0.4270
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - accuracy: 0.8542 - loss: 0.3444 - val_accuracy: 0.8222 - val_loss: 0.4066
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9228 - loss: 0.2057 - val_accuracy: 0.8242 - val_loss: 0.4185
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9679 - loss: 0.1020 - val_accuracy: 0.8252 - val_loss: 0.5003
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9886 - loss: 0.0460 - val_accuracy: 0.8192 - val_loss: 0.6147
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.9942 - loss: 0.0257 - val_accuracy: 0.8030 - val_loss: 0.7027
Epoch 7/10
[1m313/313[0

In [14]:
test_loss,test_accuracy = model.evaluate(x_test,y_test)
print(f"Test_accuracy :{test_accuracy:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.8072 - loss: 0.8508
Test_accuracy :0.8087


In [18]:
sample_review=input("Enter the review: ")

Enter the review: good


In [19]:
sample_tokens = [word_to_index.get(word,2) for word in sample_review.lower().split()]
sample_padded= pad_sequences([sample_tokens],maxlen=maxlen)
pred_sent = model.predict(sample_padded)
print(f"pred_sent:{'positive'if pred_sent > 0.5 else 'negative'}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
pred_sent:positive
