## Applying RNN with embedding layer

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Load the IMDB dataset
vocab_size = 10000  # Considering only the top 10,000 words
max_words = 150  # Cutoff reviews after 150 words
maxlen = 150
training_samples = 100  # Restrict training samples to 100
validation_samples = 10000

# Load data, restrict to top_words and set max_words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Preprocess data
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

# Prepare model
embedding_dim = 32
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=maxlen))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

# Train the model
history = model.fit(x_train[:training_samples], y_train[:training_samples],
                    epochs=10,
                    batch_size=32,
                    validation_data=(x_train[training_samples:training_samples+validation_samples],
                                     y_train[training_samples:training_samples+validation_samples]))

# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print("Test Accuracy:", accuracy)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.5


## Improving Performance with Limited Data

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D, Dropout

# Load the IMDB dataset
vocab_size = 10000  # Considering only the top 10,000 words
max_words = 150  # Cutoff reviews after 150 words
maxlen = 150
training_samples = 100  # Restrict training samples to 100
validation_samples = 10000

# Load data, restrict to top_words and set max_words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Preprocess data
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

# Prepare model
embedding_dim = 32
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=maxlen))
model.add(SpatialDropout1D(0.2))  # Spatial dropout to reduce overfitting
model.add(LSTM(32))
model.add(Dropout(0.1))  # Dropout to reduce overfitting
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

# Train the model with data augmentation
history = model.fit(x_train[:training_samples], y_train[:training_samples],
                    epochs=10,
                    batch_size=32,
                    validation_data=(x_train[training_samples:training_samples+validation_samples],
                                     y_train[training_samples:training_samples+validation_samples]))

# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print("Test Accuracy:", accuracy)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.5030400156974792
