In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report


# Load the IMDB dataset
max_features = 10000  # Vocabulary size
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# Print the shape of the data
print(f'Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}')
print(f'Testing data shape: {X_test.shape}, Testing labels shape: {y_test.shape}')

# Inspect a sample review and its label
sample_review = X_train[0]
sample_label = y_train[0]
word_index = imdb.get_word_index()

reverse_word_index = {value: key for key, value in word_index.items()}
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in sample_review])

print(f"Sample review (decoded): {decoded_review}")
print(f'Sample label: {sample_label}')

# Pad the sequences to ensure uniform input size
max_len = 500  # Maximum review length
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

# Check the shape after padding
print(f"X_train shape after padding: {X_train.shape}")

# Compute class weights to handle class imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = {i: class_weights[i] for i in range(len(class_weights))}
print(f"Class weights: {class_weights}")

# Build the model
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len))  # Embedding Layer
model.add(LSTM(128, return_sequences=False, activation='tanh'))  # LSTM Layer
model.add(Dropout(0.5))  # Dropout to prevent overfitting
model.add(Dense(1, activation="sigmoid"))  # Output Layer

# Compile the model with Adam optimizer and binary cross-entropy loss
optimizer = Adam(learning_rate=0.0001)  # Reduced learning rate
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
earlystopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(
    X_train, y_train, epochs=10, batch_size=32,
    validation_split=0.2,
    class_weight=class_weights,  # Include class weights to address class imbalance
    callbacks=[earlystopping]
)

# Save the model
model.save('simple_rnn_imdb2.h5')

# Evaluate the model on the test data




Training data shape: (25000,), Training labels shape: (25000,)
Testing data shape: (25000,), Testing labels shape: (25000,)
Sample review (decoded): ? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play t



[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m339s[0m 539ms/step - accuracy: 0.6127 - loss: 0.6444 - val_accuracy: 0.8634 - val_loss: 0.3396
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m337s[0m 466ms/step - accuracy: 0.8790 - loss: 0.3040 - val_accuracy: 0.8814 - val_loss: 0.3063
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 432ms/step - accuracy: 0.9184 - loss: 0.2217 - val_accuracy: 0.8866 - val_loss: 0.2768
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m410s[0m 657ms/step - accuracy: 0.9345 - loss: 0.1859 - val_accuracy: 0.8846 - val_loss: 0.2802
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m453s[0m 674ms/step - accuracy: 0.9521 - loss: 0.1436 - val_accuracy: 0.8828 - val_loss: 0.2904
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m365s[0m 583ms/step - accuracy: 0.9625 - loss: 0.1221 - val_accuracy: 0.8822 - val_loss: 0.3578
Epoch 7/10
[1m



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 162ms/step
              precision    recall  f1-score   support

           0       0.87      0.89      0.88     12500
           1       0.89      0.86      0.88     12500

    accuracy                           0.88     25000
   macro avg       0.88      0.88      0.88     25000
weighted avg       0.88      0.88      0.88     25000

