# Sentiment analysis in RNN 

In [1]:
import numpy as np 

docs = [
    'go india',
    'india india',
    'hip hip hurray',
    'jeetega bhai jeetega',
    'bharat mata ki jai',
    'kholi kohli',
    'sachin sachin',
    'dhoni dhoni',
    'modi ki jai',
    'inquilab zindabad'
]

In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer(oov_token='meet')

In [3]:
tokenizer.fit_on_texts(docs)

In [4]:
tokenizer.word_index

{'meet': 1,
 'india': 2,
 'hip': 3,
 'jeetega': 4,
 'ki': 5,
 'jai': 6,
 'sachin': 7,
 'dhoni': 8,
 'go': 9,
 'hurray': 10,
 'bhai': 11,
 'bharat': 12,
 'mata': 13,
 'kholi': 14,
 'kohli': 15,
 'modi': 16,
 'inquilab': 17,
 'zindabad': 18}

In [5]:
tokenizer.word_counts # freq counts

OrderedDict([('go', 1),
             ('india', 3),
             ('hip', 2),
             ('hurray', 1),
             ('jeetega', 2),
             ('bhai', 1),
             ('bharat', 1),
             ('mata', 1),
             ('ki', 2),
             ('jai', 2),
             ('kholi', 1),
             ('kohli', 1),
             ('sachin', 2),
             ('dhoni', 2),
             ('modi', 1),
             ('inquilab', 1),
             ('zindabad', 1)])

In [6]:
tokenizer.document_count

10

In [7]:
sequences = tokenizer.texts_to_sequences(docs)
sequences

[[9, 2],
 [2, 2],
 [3, 3, 10],
 [4, 11, 4],
 [12, 13, 5, 6],
 [14, 15],
 [7, 7],
 [8, 8],
 [16, 5, 6],
 [17, 18]]

In [8]:
from keras.utils import pad_sequences
sequences = pad_sequences(sequences, padding="post")

In [9]:
sequences

array([[ 9,  2,  0,  0],
       [ 2,  2,  0,  0],
       [ 3,  3, 10,  0],
       [ 4, 11,  4,  0],
       [12, 13,  5,  6],
       [14, 15,  0,  0],
       [ 7,  7,  0,  0],
       [ 8,  8,  0,  0],
       [16,  5,  6,  0],
       [17, 18,  0,  0]], dtype=int32)

#### 1. this is how u make your data ready for integer encoding and then train your rnn

#### 2. While for training an rnn using embeddings. U further convert integer encoded values into dense vectors, by adding embedding layer 

In [10]:
from keras.datasets import imdb
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Flatten

In [12]:
model = Sequential()
model.add(Embedding(18, output_dim=2, input_length=4))

model.summary()


In [13]:
model.compile('adam', 'accuracy')
pred = model.predict(sequences)
print(pred)

2026-01-22 13:29:00.526123: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2026-01-22 13:29:00.526647: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2026-01-22 13:29:00.526653: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2026-01-22 13:29:00.527449: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2026-01-22 13:29:00.529699: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2026-01-22 13:29:00.886977: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 492ms/step
[[[ 0.02684713  0.0399184 ]
  [-0.03153639 -0.0097574 ]
  [ 0.0393797  -0.04824593]
  [ 0.0393797  -0.04824593]]

 [[-0.03153639 -0.0097574 ]
  [-0.03153639 -0.0097574 ]
  [ 0.0393797  -0.04824593]
  [ 0.0393797  -0.04824593]]

 [[-0.02192104 -0.04146969]
  [-0.02192104 -0.04146969]
  [ 0.02472583  0.04103423]
  [ 0.0393797  -0.04824593]]

 [[-0.04783223  0.03369978]
  [-0.00461657  0.04569444]
  [-0.04783223  0.03369978]
  [ 0.0393797  -0.04824593]]

 [[-0.00735049  0.00820907]
  [ 0.03929669  0.01794119]
  [ 0.01857418 -0.01082467]
  [-0.0325084   0.01685599]]

 [[ 0.01120368 -0.03041722]
  [ 0.00809804  0.0106334 ]
  [ 0.0393797  -0.04824593]
  [ 0.0393797  -0.04824593]]

 [[-0.03382367  0.01934103]
  [-0.03382367  0.01934103]
  [ 0.0393797  -0.04824593]
  [ 0.0393797  -0.04824593]]

 [[ 0.01512581  0.00248725]
  [ 0.01512581  0.00248725]
  [ 0.0393797  -0.04824593]
  [ 0.0393797  -0.04824593]]

 [[-0.01056052 

In [14]:
# imdb dataset - training using embeddings

In [15]:
from keras.datasets import imdb
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Flatten

In [16]:
(X_train, y_train), (X_test,y_test) = imdb.load_data()

In [17]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(25000,)
(25000,)
(25000,)
(25000,)


In [18]:
X_train

array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 22665, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 21631, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 31050, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]),
       list([1, 194, 1

In [19]:
X_train = pad_sequences(X_train, padding='post', maxlen=50)
X_test = pad_sequences(X_test, padding='post',maxlen=50)

In [20]:
X_train.shape

(25000, 50)

In [24]:
model = Sequential()
model.add(Embedding(10000,2))
model.add(SimpleRNN(32, return_sequences=False))
model.add(Dense(1,activation='sigmoid'))

model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

##### In conclusion embeddings k sath training karne par RNNs give better results in comparison to integer encoded values. Refer to below code which uses integer encoded values for training 

# Imdb dataset - Integer encoding training 

In [None]:
from keras.datasets import imdb
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Flatten

In [None]:
(X_train, y_train), (X_test,y_test) = imdb.load_data()

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(25000,)
(25000,)
(25000,)
(25000,)


In [None]:
X_train

array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 22665, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 21631, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 31050, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]),
       list([1, 194, 1

In [None]:
len(X_train[2])

141

In [None]:
X_train = pad_sequences(X_train, padding='post', maxlen=50)
X_test = pad_sequences(X_test, padding='post',maxlen=50)

In [None]:
X_train.shape

(25000, 50)

In [None]:
model = Sequential() 

model.add(SimpleRNN(32, input_shape=(50,1), return_sequences=False))
model.add(Dense(1,activation='sigmoid'))
model.summary()

2026-01-22 11:07:38.821815: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2026-01-22 11:07:38.822018: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2026-01-22 11:07:38.822022: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2026-01-22 11:07:38.822537: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2026-01-22 11:07:38.822997: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  super().__init__(**kwargs)


In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, validation_data=(X_test,y_test))