In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
# import tensorflow_datasets as tfds
from tensorflow.keras.datasets import imdb
# Load the IMDB dataset from tensorflow_datasets
(ds_train, ds_test), ds_info = imdb.load_data('imdb_reviews', 
                                         split=['train', 'test'], 
                                         as_supervised=True, 
                                         with_info=True)

# Preprocess the dataset
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 10000
MAX_LEN = 500

# Encode the text into sequences of integers
encoder = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE, output_mode='int', output_sequence_length=MAX_LEN)
encoder.adapt(ds_train.map(lambda text, label: text))

# Function to preprocess the dataset
def encode_map_fn(text, label):
    encoded_text = encoder(text)
    return encoded_text, label

# Apply the encoding to the train and test datasets
ds_train = ds_train.map(encode_map_fn).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)
ds_test = ds_test.map(encode_map_fn).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

# Build the RNN model using the functional API
input_layer = layers.Input(shape=(MAX_LEN,), dtype=tf.int32)

# Add the embedding layer
embedding_layer = layers.Embedding(input_dim=VOCAB_SIZE, output_dim=128, input_length=MAX_LEN)(input_layer)

# Add the SimpleRNN layer
rnn_layer = layers.SimpleRNN(128, activation='relu')(embedding_layer)

# Add the output layer
output_layer = layers.Dense(1, activation='sigmoid')(rnn_layer)

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()

# Train the model
history = model.fit(ds_train, epochs=10, validation_data=ds_test)

# Evaluate the model on the test dataset
test_loss, test_acc = model.evaluate(ds_test)
print(f'Test Accuracy: {test_acc}')


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import imdb

# Load the IMDB dataset
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

# Convert data to one-hot vectors
train_data = tf.keras.utils.to_categorical(train_data, 10000)
test_data = tf.keras.utils.to_categorical(test_data, 10000)

# Create a Sequential model
model = Sequential()

# Add an embedding layer to convert word indices to dense vectors
model.add(Embedding(10000, 128))

# Add a SimpleRNN layer
model.add(SimpleRNN(128))

# Add a Dense layer for classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(train_data, train_labels, epochs=10, batch_size=32, validation_data=(test_data, test_labels))

# Evaluate the model
loss, accuracy = model.evaluate(test_data, test_labels)
print('Test accuracy:', accuracy)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.layers import Input, Embedding, SimpleRNN, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

# Load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# Pad sequences to a fixed length
max_length = 500
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_length)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_length)

# Define the model architecture using Functional API
vocab_size = 10000  # Number of unique words to consider
embedding_dim = 16  # Dimension of the embedding layer
rnn_units = 32      # Number of RNN units

# Functional API: Define the input and the model structure
inputs = Input(shape=(max_length,))
x = Embedding(vocab_size, embedding_dim)(inputs)
x = SimpleRNN(rnn_units)(x)
outputs = Dense(1, activation='sigmoid')(x)

# Build the model
model = Model(inputs, outputs)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()
# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(
    x_train, y_train, 
    epochs=10, 
    batch_size=64, 
    validation_data=(x_test, y_test), 
    callbacks=[early_stopping]
)

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

# Predict on a sample text
# Predict on a sample text
sample_text = "The movie was not good. The animation and the graphics were terrible. I would not recommend this movie."

# Tokenize and preprocess the sample text
word_index = imdb.get_word_index()

# Convert the sample text to integer sequences
def preprocess_text(text):
    words = text.lower().split()
    encoded_text = [word_index.get(word, 2) + 3 for word in words]  # +3 because IMDB indices are offset
    padded_text = tf.keras.preprocessing.sequence.pad_sequences([encoded_text], maxlen=max_length)
    return padded_text

sample_padded = preprocess_text(sample_text)

# Predict sentiment
predictions = model.predict(sample_padded)
print(f'Prediction: {"Positive" if predictions[0][0] > 0.5 else "Negative"} with score: {predictions[0][0]}')


In [None]:
model.save('simpleRNN_IMDB_sentiment Analysis.h5')

In [29]:
from tensorflow.keras.models import load_model
model = load_model('simpleRNN_IMDB_sentiment Analysis.h5')



In [30]:
# Predict on a sample text
# Predict on a sample text
sample_text = "And then we have Jake with his closet which totally ruins all the film! I expected to see a BOOGEYMAN similar movie, and instead i watched a drama with some meaningless thriller spots"

# Tokenize and preprocess the sample text
word_index = imdb.get_word_index()

# Convert the sample text to integer sequences
def preprocess_text(text):
    words = text.lower().split()
    encoded_text = [word_index.get(word, 2) + 3 for word in words]  # +3 because IMDB indices are offset
    padded_text = tf.keras.preprocessing.sequence.pad_sequences([encoded_text], maxlen=max_length)
    return padded_text

sample_padded = preprocess_text(sample_text)

# Predict sentiment
predictions = model.predict(sample_padded)
print(f'Prediction: {"Positive" if predictions[0][0] > 0.5 else "Negative"} with score: {predictions[0][0]}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 583ms/step
Prediction: Negative with score: 0.0660569965839386


In [None]:
def preprocess_text(text):
    words = text.lower().split()
    encoded_review = [word_index.get(word, 2) + 3 for word in words]
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
    
    # Debugging prints
    print(f"Original Text: {text}")
    # print(f"Encoded Review: {encoded_review}")
    # print(f"Padded Review Shape: {padded_review.shape}")
    
    return padded_review

def predict_sentiment(review):
    preprocessed_input = preprocess_text(review)
    
    # Debugging prints
    print(f"Preprocessed Input for Prediction: {preprocessed_input}")
    
    prediction = model.predict(preprocessed_input)
    
    sentiment = 'Positive' if prediction[0][0] > 0.05 else 'Negative'
    
    # Debugging prints
    print(f"Raw Prediction Score: {prediction[0][0]}")
    
    return sentiment, prediction[0][0]

In [None]:
example_review = "That movie was fantastic ."
sentiment, score = predict_sentiment(example_review)

print(f'Review: {example_review}')
print(f'Sentiment: {sentiment}')
print(f'Prediction Score: {score}')

In [None]:
def preprocess_text(text):
    words = text.lower().split()
    encoded_text = [word_index.get(word, 2) + 3 for word in words]  # +3 because IMDB indices are offset
    padded_text = tf.keras.preprocessing.sequence.pad_sequences([encoded_text], maxlen=max_length)
    return padded_text


In [None]:
def preprocess_text(text):
    words = text.lower().split()
    encoded_text = [word_index.get(word, 2) + 3 for word in words]  # +3 because IMDB indices are offset
    padded_text = tf.keras.preprocessing.sequence.pad_sequences([encoded_text], maxlen=max_length)
    return padded_text

# Function to predict sentiment of a review
def predict_sentiment(review):
    preprocessed_input = preprocess_text(review)
    prediction = model.predict(preprocessed_input)
    sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
    score = prediction[0][0]
    return sentiment, score

# Test the function on example reviews
example_reviews = [
    "The movie was fantastic, full of suspense and drama!",
    "I didn't like the movie, the plot was terrible and the acting was bad.",
    "It was an okay movie, not too bad but not great either.",
    "Absolutely loved this film, will watch it again.",
    "Worst movie I've ever seen, do not recommend!"
]

for review in example_reviews:
    sentiment, score = predict_sentiment(review)
    print(f'Review: "{review}"\nPredicted Sentiment: {sentiment}, Score: {score:.4f}\n')