# Dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset("imdb")
print(dataset)


In [None]:
! pip install tensorflow


# Preprocess the Data

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=10000)  
tokenizer.fit_on_texts(dataset['train']['text'])

train_sequences = tokenizer.texts_to_sequences(dataset['train']['text'])
test_sequences = tokenizer.texts_to_sequences(dataset['test']['text'])

max_length = 256 
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding='post')
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding='post')

print("Train Padded Shape:", train_padded.shape)
print("Test Padded Shape:", test_padded.shape)


# Build the RNN Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=max_length),  
    LSTM(64, return_sequences=False),  
    Dense(64, activation='relu'),  
    Dense(1, activation='sigmoid')  
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print(model.summary())


# Train the Model

In [14]:
import numpy as np

train_labels = np.array(dataset['train']['label'])


In [15]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
    train_padded, train_labels, test_size=0.2, random_state=42
)

print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)


In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=32
)


# Evaluate the Model

In [None]:
test_padded = np.array(test_padded)
test_labels = np.array(dataset['test']['label'])  

print(test_padded.shape)  
print(test_labels.shape)  

test_loss, test_accuracy = model.evaluate(test_padded, test_labels)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Predict Sentiment

In [None]:
def predict_sentiment(review):
    sequence = tokenizer.texts_to_sequences([review])
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
    prediction = model.predict(padded_sequence)
    sentiment = "Positive" if prediction[0] > 0.5 else "Negative"
    return sentiment

print(predict_sentiment("This movie was fantastic!"))
