<a href="https://colab.research.google.com/github/kartikrupal/deep_learning/blob/main/p8_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [32]:
# Example data (replace with your dataset)
text_data = ['I love this movie', 'This is terrible', 'Neutral review','This is the worst','I Like You','He says Bad about you']
labels = [2, 0,1,0,2,0]  # Labels (0 for negative, 1 for neutral, 2 for positive)

# Tokenize and preprocess the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
sequences = tokenizer.texts_to_sequences(text_data)
max_sequence_length = max([len(sequence) for sequence in sequences])


In [33]:
sequences

[[2, 5, 1, 6],
 [1, 3, 7],
 [8, 9],
 [1, 3, 10, 11],
 [2, 12, 4],
 [13, 14, 15, 16, 4]]

In [21]:
X = pad_sequences(sequences, maxlen=max_sequence_length)
y = tf.keras.utils.to_categorical(labels)


In [22]:
# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=32, input_length=max_sequence_length))
model.add(SimpleRNN(64))
model.add(Dense(3, activation='softmax'))  # 3 output classes (positive, negative, neutral)




In [23]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=16)

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.0000e+00 - loss: 1.1334
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 669ms/step - accuracy: 0.3333 - loss: 1.1012
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.3333 - loss: 1.0704
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.5000 - loss: 1.0406
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.5000 - loss: 1.0116
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.6667 - loss: 0.9830
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.6667 - loss: 0.9546
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.6667 - loss: 0.9262
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x7cf738812a50>

In [24]:
# Prepare a test dataset
test_data = ['I enjoyed the movie', 'This is the worst thing ever']
test_labels = [2, 0]

# Tokenize and preprocess the test data
test_sequences = tokenizer.texts_to_sequences(test_data)
X_test = pad_sequences(test_sequences, maxlen=max_sequence_length)
y_test = tf.keras.utils.to_categorical(test_labels)


In [25]:
# Evaluate the model on the test dataset
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 434ms/step - accuracy: 0.5000 - loss: 0.8331
Loss: 0.8331120610237122, Accuracy: 0.5


In [29]:
# Make predictions on new text data
new_text = ["i hate this","i love this"]
new_sequences = tokenizer.texts_to_sequences(new_text)
X_new = pad_sequences(new_sequences, maxlen=max_sequence_length)
predictions = model.predict(X_new)

# Convert predictions to sentiment labels
predicted_labels = [np.argmax(prediction) for prediction in predictions]
print("Predicted Labels:", predicted_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Predicted Labels: [np.int64(2), np.int64(2)]


In [30]:
label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
print("\nPredictions:")
for text, pred in zip(new_text, predicted_labels):
    print(f"{text} → {label_map[pred]}")


Predictions:
i hate this → Positive
i love this → Positive


In [34]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Expanded training data
text_data = [
    'I love this movie',
    'This is terrible',
    'Neutral review',
    'This is the worst',
    'I like you',
    'He says bad things about you',
    'I enjoyed the performance',
    'The experience was awful',
    'It’s okay, not great not bad',
    'Absolutely fantastic movie!',
    'Worst film ever made',
    'Mediocre story but decent acting',
    'Loved every part of it',
    'Not good at all',
    'Just average, nothing special',
    'You are amazing',
    'Disappointing ending',
    'Could be better',
    'Really liked the characters',
    'Such a waste of time'
]

labels = [
    2, 0, 1, 0, 2, 0, 2, 0, 1, 2,
    0, 1, 2, 0, 1, 2, 0, 1, 2, 0
]  # 0 = Negative, 1 = Neutral, 2 = Positive

# Tokenize and convert text to sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
sequences = tokenizer.texts_to_sequences(text_data)
max_sequence_length = max(len(seq) for seq in sequences)
X = pad_sequences(sequences, maxlen=max_sequence_length)
y = tf.keras.utils.to_categorical(labels, num_classes=3)

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=32))
model.add(SimpleRNN(64))
model.add(Dense(3, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=15, batch_size=4)

# Expanded test data
test_data = [
    'I enjoyed the movie',
    'This is the worst thing ever',
    'Quite boring and slow',
    'A masterpiece with brilliant direction',
    'Nothing to say, very average',
    'I really hated it',
    'This was so good!',
    'Fine but forgettable'
]
test_labels = [2, 0, 0, 2, 1, 0, 2, 1]

# Tokenize and preprocess test data
test_sequences = tokenizer.texts_to_sequences(test_data)
X_test = pad_sequences(test_sequences, maxlen=max_sequence_length)
y_test = tf.keras.utils.to_categorical(test_labels, num_classes=3)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Loss:", loss, "Accuracy:", accuracy)

# Make predictions on new unseen data
new_text = ["I like it", "It's not bad", "What a lovely surprise", "I hate the way it ended"]
new_sequences = tokenizer.texts_to_sequences(new_text)
X_new = pad_sequences(new_sequences, maxlen=max_sequence_length)
predictions = model.predict(X_new)

# Convert probabilities to class labels
predicted_labels = [np.argmax(pred) for pred in predictions]
print("Predicted Labels:", predicted_labels)  # 0 = Negative, 1 = Neutral, 2 = Positive


Epoch 1/15
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.4417 - loss: 1.0794
Epoch 2/15
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8118 - loss: 1.0031 
Epoch 3/15
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8528 - loss: 0.9672 
Epoch 4/15
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9458 - loss: 0.8549 
Epoch 5/15
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8799 - loss: 0.8186 
Epoch 6/15
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9424 - loss: 0.6882 
Epoch 7/15
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8694 - loss: 0.6468 
Epoch 8/15
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9215 - loss: 0.5242 
Epoch 9/15
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1