<a href="https://colab.research.google.com/github/rahulku91058/Training-AIML/blob/main/SentimentAnalysis/SentimentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import important libraries**

In [7]:
!git clone https://github.com/rahulku91058/Training-AIML.git
%cd Training-AIML/SentimentAnalysis/

Cloning into 'Training-AIML'...
remote: Enumerating objects: 153, done.[K
remote: Counting objects: 100% (153/153), done.[K
remote: Compressing objects: 100% (133/133), done.[K
remote: Total 153 (delta 55), reused 87 (delta 17), pack-reused 0 (from 0)[K
Receiving objects: 100% (153/153), 40.29 MiB | 22.59 MiB/s, done.
Resolving deltas: 100% (55/55), done.
/content/Training-AIML/SentimentAnalysis/Training-AIML/SentimentAnalysis


In [8]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# **Load IMDB Dataset**

In [12]:
vocab_size = 10000   #Only consider the top 10,000 words
max_length = 500     #Maximum length of each sequence
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# **Preprocess Date: Pad sequence to make them all the same length** Scaling

In [10]:
x_train = pad_sequences(X_train, maxlen=max_length)
x_test = pad_sequences(X_test, maxlen=max_length)

# **Define the model**

In [13]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=max_length),  #Embedding Layer
    SimpleRNN(units=32, return_sequences=False),   #RNN Layer
    Dense(1, activation='sigmoid') # Output Layer
])




# **Compile the model**

In [14]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# **Train the model**

In [15]:
print("Training the model...")
model.fit(x_train, y_train, epochs=3, batch_size=64, validation_data=(x_test, y_test))

Training the model...
Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 140ms/step - accuracy: 0.5991 - loss: 0.6381 - val_accuracy: 0.8221 - val_loss: 0.4174
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 139ms/step - accuracy: 0.8286 - loss: 0.3943 - val_accuracy: 0.8348 - val_loss: 0.3885
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 140ms/step - accuracy: 0.9030 - loss: 0.2557 - val_accuracy: 0.8506 - val_loss: 0.4005


<keras.src.callbacks.history.History at 0x7f6d401ca290>

# **Evaluate the model**

In [16]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - accuracy: 0.8528 - loss: 0.3995
Test Loss: 0.4004603624343872, Test Accuracy: 0.850600004196167


In [17]:
print("\nEvaluating the model...")
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")


Evaluating the model...
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - accuracy: 0.8528 - loss: 0.3995
Test Accuracy: 85.06%


In [26]:
# test with a Custom Index
# Decode IMDB word index

word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

def decode_review(sequence):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in sequence])

test_review = x_test[0]
test_review_padded = pad_sequences([test_review], maxlen=max_length)
prediction = model.predict(test_review_padded)
print("Sentiment Prediction:", "Positive" if prediction>0.5 else "Negative")

#print("Review: ",decode_review(test_review))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Sentiment Prediction: Negative


In [31]:
# Step 1: Load the
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}
# Step 2: Preprocess the Custom Review
def preprocess_review(review_test):
       # Convert the review to lowercase and split into words
  words = review_test.lower().split()
       # Converts words to integers using the IMDB word index
  tokenized_review = [word_index.get(word, 2) for word in words]
       # Pad the tokenized sequence
  return pad_sequences([tokenized_review], maxlen=max_length)

# Step 3: Test with Custom Input
custom_review = "The movie was terrible, with poor acting and a boring plot."
processed_review = preprocess_review(custom_review)
# Predict the Sentiment
prediction = model.predict(preprocessed_review)
print("\nCustom Review:", custom_review)
print("Processed Review:", processed_review)
print("Sentiment Prediction:", "Positive" if prediction > 0.5 else "Negative")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step

Custom Review: The movie was terrible, with poor acting and a boring plot.
Processed Review: [[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   