<a href="https://colab.research.google.com/github/osamagasser/20210144-Sentiment-Analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
import numpy as np

In [2]:
# Load IMDB dataset
num_words = 10000  # Keep only the top 10,000 words
max_length = 200  # Set max review length for padding

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [3]:
# Pad sequences to make all reviews the same length
x_train = pad_sequences(x_train, maxlen=max_length, padding='post')
x_test = pad_sequences(x_test, maxlen=max_length, padding='post')

In [4]:
# Define LSTM Model
model = Sequential([
    Embedding(input_dim=num_words, output_dim=32, input_length=max_length),
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dense(16, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])



In [5]:
# Compile the Model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [6]:
# Train the Model
model.fit(x_train, y_train, epochs=3, batch_size=128, validation_data=(x_test, y_test))

Epoch 1/3
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 598ms/step - accuracy: 0.5236 - loss: 0.6878 - val_accuracy: 0.5226 - val_loss: 0.6882
Epoch 2/3
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 524ms/step - accuracy: 0.5623 - loss: 0.6698 - val_accuracy: 0.5150 - val_loss: 0.6774
Epoch 3/3
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 516ms/step - accuracy: 0.6017 - loss: 0.6536 - val_accuracy: 0.6904 - val_loss: 0.5693


<keras.src.callbacks.history.History at 0x7e36eab12c90>

In [7]:

# Evaluate the Model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"\nTest Accuracy: {test_acc:.2f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 47ms/step - accuracy: 0.6939 - loss: 0.5663

Test Accuracy: 0.69


In [16]:
# Function to Predict Sentiment from Text
word_index = imdb.get_word_index()
reverse_word_index = {v: k for k, v in word_index.items()}  # Reverse mapping

def decode_review(encoded_review):
    return " ".join([reverse_word_index.get(i - 3, "?") for i in encoded_review])

def predict_sentiment(text):
    # Tokenize the input text
    words = text.lower().split()
    encoded_seq = [word_index.get(word, 2) + 3 for word in words]  # Use 2 for unknown words

    # Pad the sequence
    padded_seq = pad_sequences([encoded_seq], maxlen=max_length, padding='post')

    # DEBUG: Print padded_seq to check if it is properly created
    print(f"Processed Text: {text}")
    print(f"Encoded Sequence: {encoded_seq}")
    print(f"Padded Sequence Shape: {padded_seq.shape}")

    # Make prediction
    prediction = model.predict(padded_seq)
    sentiment = "Positive" if prediction > 0.5 else "Negative"

    return sentiment, float(prediction[0][0])


In [18]:
# Test on Custom Reviews
test_sentences = [
    "This movie was absolutely fantastic!",
    "I hated this film. It was so boring.",
    "The acting was great but the plot was weak.",
    "Not my favorite movie, but it was okay.",
    "One of the best films I have ever seen!",
    "Terrible storyline, I wouldn’t recommend it.",
    "I really enjoyed watching this, such a great experience!",
    "The worst performance by an actor in a long time.",
    "I loved the cinematography, but the story lacked depth.",
    "A complete waste of time. Would not watch again."
]


In [19]:
# Store predictions
output_text = f"Test Accuracy: {test_acc:.2f}\n\nSentiment Analysis Results:\n"

for sentence in test_sentences:
    sentiment, confidence = predict_sentiment(sentence)
    result = f"Text: {sentence}\nSentiment: {sentiment} (Confidence: {confidence:.2f})\n"
    print(result)  # Print in Colab
    output_text += result + "\n"

Processed Text: This movie was absolutely fantastic!
Encoded Sequence: [14, 20, 16, 427, 5]
Padded Sequence Shape: (1, 200)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 423ms/step
Text: This movie was absolutely fantastic!
Sentiment: Positive (Confidence: 0.86)

Processed Text: I hated this film. It was so boring.
Encoded Sequence: [13, 1800, 14, 5, 12, 16, 38, 5]
Padded Sequence Shape: (1, 200)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Text: I hated this film. It was so boring.
Sentiment: Positive (Confidence: 0.86)

Processed Text: The acting was great but the plot was weak.
Encoded Sequence: [4, 116, 16, 87, 21, 4, 114, 16, 5]
Padded Sequence Shape: (1, 200)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Text: The acting was great but the plot was weak.
Sentiment: Positive (Confidence: 0.86)

Processed Text: Not my favorite movie, but it was okay.
Encoded Sequence: [24, 61, 514, 5, 21, 12, 16, 5]
Padded Sequence

In [20]:
# Save Output to File
with open("output.txt", "w") as f:
    f.write(output_text)

In [21]:
# Download the output file for GitHub
from google.colab import files
files.download("output.txt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>