In [2]:
import tensorflow_datasets as tfds

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional

import numpy as np

import matplotlib.pyplot as plt

### Data Collection

In [7]:
imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)

train_data, test_data = imdb['train'], imdb['test']

### Data Preprocessing

In [None]:
def preprocess_data(data):
    sentences = []
    labels = []

    for s, l in data:
        sentences.append(s.numpy().decode('utf8'))
        labels.append(l.numpy())

    return sentences, labels

train_sentences, train_labels = preprocess_data(train_data)
test_sentences, test_labels = preprocess_data(test_data)

# Tokenization and Padding
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(train_sentences)

train_sequences = tokenizer.texts_to_sequences(train_sentences)
train_padded = pad_sequences(train_sequences, padding='post', maxlen=120)

test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded = pad_sequences(test_sequences, padding='post', maxlen=120)

### Model Selection

In [None]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=120),
    Bidirectional(LSTM(64)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Build the model by providing the input shape
model.build(input_shape=(None, 120))

# Display the model summary
model.summary()

### Model Training

In [None]:
# Convert labels to numpy arrays
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

# Check the shapes of the input data and labels
print("Train data shape:", train_padded.shape)
print("Train labels shape:", train_labels.shape)
print("Test data shape:", test_padded.shape)
print("Test labels shape:", test_labels.shape)

# Ensure train_labels and test_labels have the correct shape
train_labels = np.expand_dims(train_labels, axis=-1)
test_labels = np.expand_dims(test_labels, axis=-1)

# Train the model
history = model.fit(train_padded, train_labels, epochs=5, validation_data=(test_padded, test_labels))

### Evaluation

In [None]:
def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string])
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()

plot_graphs(history, "accuracy")
plot_graphs(history, "loss")