In [None]:
"""
Recurrent Neural Network (RNN) Example:

Description:
    This example demonstrates the creation and training of a Recurrent Neural Network (RNN) using TensorFlow and Keras. RNNs are a type of neural network architecture designed for sequence data, making them suitable for tasks such as time series prediction, natural language processing, and speech recognition. In this example, we'll use an RNN to perform sentiment analysis on movie reviews from the IMDB dataset.

Alternatives:
    - Long Short-Term Memory (LSTM) networks: A type of RNN with improved memory capabilities, making them better suited for capturing long-term dependencies in sequences.
    - Gated Recurrent Unit (GRU) networks: Similar to LSTMs, but with a simplified architecture, making them more computationally efficient.

Benefits:
    - Suitable for sequence data: RNNs are designed to handle sequential data such as text, time series, and audio.
    - Captures temporal dependencies: RNNs can learn patterns and relationships across different time steps in a sequence.
    - Flexibility: RNNs can be applied to various tasks, including sequence prediction, classification, and generation.

Downsides:
    - Vanishing gradients: RNNs are prone to the vanishing gradient problem, making it difficult to capture long-term dependencies in sequences.
    - Computationally intensive: Training RNNs can be computationally expensive, especially with large datasets and complex architectures.
    - Memory constraints: RNNs may struggle with long sequences due to memory limitations, leading to truncated sequences or inefficient training.

"""

import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# Load the IMDB dataset
max_features = 10000  # Consider only the top 10,000 most frequent words
maxlen = 500  # Limit the review length to 500 words
batch_size = 32

print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)
print(len(input_train), 'train sequences')
print(len(input_test), 'test sequences')

# Pad sequences to a fixed length
print('Pad sequences (samples x time)')
input_train = sequence.pad_sequences(input_train, maxlen=maxlen)
input_test = sequence.pad_sequences(input_test, maxlen=maxlen)
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)

# Define the RNN model
model = Sequential()
model.add(Embedding(max_features, 32))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
print('Training...')
model.fit(input_train, y_train, epochs=10, batch_size=batch_size, validation_split=0.2)

# Evaluate the model
print('Evaluation...')
test_loss, test_acc = model.evaluate(input_test, y_test)
print('Test accuracy:', test_acc)

"""
What the Model is Trying to Learn:

The model in this example is attempting to learn sentiment analysis on movie reviews from the IMDB dataset. Specifically, it aims to predict whether a given movie review is positive or negative based on the text content of the review.

The IMDB dataset consists of movie reviews labeled as positive or negative. The reviews are represented as sequences of words, and each word is encoded as an integer index. The model processes these sequences of word indices using an Embedding layer to convert them into dense vector representations. Then, it uses a SimpleRNN layer to capture sequential dependencies in the reviews. Finally, a Dense layer with a sigmoid activation function is used to output the predicted sentiment score for each review, with values close to 0 indicating negative sentiment and values close to 1 indicating positive sentiment.

During training, the model learns to minimize the binary cross-entropy loss between the predicted sentiment scores and the true labels (positive or negative) of the reviews. Through backpropagation and gradient descent, the model adjusts its parameters to improve its predictions over time.

In summary, the model is trained to understand the sentiment expressed in movie reviews and predict whether each review is positive or negative based on its text content.
"""




Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
input_train shape: (25000, 500)
input_test shape: (25000, 500)
Training...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

In [None]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

# Load the IMDB dataset
max_features = 10000  # Consider only the top 10,000 most frequent words
maxlen = 500  # Limit the review length to 500 words
batch_size = 32

print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)
print(len(input_train), 'train sequences')
print(len(input_test), 'test sequences')

# Pad sequences to a fixed length
print('Pad sequences (samples x time)')
input_train = sequence.pad_sequences(input_train, maxlen=maxlen)
input_test = sequence.pad_sequences(input_test, maxlen=maxlen)
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)

# Define the RNN model
def create_rnn_model(optimizer='adam', units=32):
    model = Sequential()
    model.add(Embedding(max_features, 32))
    model.add(SimpleRNN(units))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create KerasClassifier for grid search
model = KerasClassifier(build_fn=create_rnn_model, epochs=10, batch_size=batch_size, verbose=0)

# Define the grid search parameters
param_grid = {
    'optimizer': ['adam', 'rmsprop', 'sgd'],
    'units': [32, 64, 128],
}

# Perform grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_result = grid.fit(input_train, y_train)

# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

"""
Explanation of Layers in the RNN Model:

The RNN (Recurrent Neural Network) model in this example consists of several layers, each serving a specific purpose in the architecture. Here's an explanation of each layer:

1. Embedding Layer:
   - Purpose: The Embedding layer converts integer-encoded words into dense vector representations. Each word is represented by a vector of fixed size, where similar words have similar vector representations.
   - Practices: The size of the embedding dimension (e.g., 32 in this example) is a hyperparameter that can be tuned based on the vocabulary size and complexity of the task. It's common to use pre-trained word embeddings for better performance, especially in NLP tasks with large vocabularies.
   - Rule of Thumb: Choose the embedding dimension large enough to capture semantic relationships between words but small enough to prevent overfitting. Pre-trained embeddings such as Word2Vec, GloVe, or FastText can provide useful initializations.

2. SimpleRNN Layer:
   - Purpose: The SimpleRNN (Simple Recurrent Neural Network) layer processes sequential data by maintaining a hidden state that captures information from previous time steps. It applies a simple transformation to the input at each time step and passes the hidden state to the next time step.
   - Practices: The number of units in the SimpleRNN layer (e.g., 32 in this example) determines the dimensionality of the hidden state and the capacity of the model to capture temporal dependencies. Increasing the number of units can enhance the model's ability to learn complex patterns but may also increase computational complexity and risk overfitting.
   - Rule of Thumb: Start with a moderate number of units and adjust based on the complexity of the data and the task. Consider using more advanced recurrent units such as LSTM or GRU if the SimpleRNN struggles to capture long-term dependencies.

3. Dense Layer:
   - Purpose: The Dense layer is a fully connected layer that transforms the output of the RNN layer into a single output value. In this binary classification task, a sigmoid activation function is used to produce a probability score indicating the sentiment (positive or negative) of the input sequence.
   - Practices: The number of units in the Dense layer is typically small, as it only needs to map the features learned by the preceding layers to the final output. The activation function (sigmoid in this case) ensures that the output is in the range [0, 1], representing the probability of the positive class.
   - Rule of Thumb: Use the sigmoid activation function for binary classification tasks. For multi-class classification tasks, consider using softmax activation with multiple output units corresponding to each class.

These layers collectively form the RNN model, which is trained to learn the sentiment expressed in movie reviews and predict whether each review is positive or negative based on its text content.
"""


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load the IMDB dataset
max_features = 10000
maxlen = 500
print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)
print(len(input_train), 'train sequences')
print(len(input_test), 'test sequences')

# Pad sequences to fixed length
input_train = sequence.pad_sequences(input_train, maxlen=maxlen)
input_test = sequence.pad_sequences(input_test, maxlen=maxlen)

# Load pre-trained RNN model
pretrained_model = load_model('pretrained_rnn_model.h5')

# Freeze pre-trained layers
for layer in pretrained_model.layers:
    layer.trainable = False

# Add new top layers for fine-tuning
fine_tuned_model = Sequential()
fine_tuned_model.add(pretrained_model)
fine_tuned_model.add(Dense(128, activation='relu'))
fine_tuned_model.add(Dropout(0.5))
fine_tuned_model.add(Dense(1, activation='sigmoid'))

# Compile the fine-tuned model
fine_tuned_model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Fine-tune the model on the IMDB dataset
fine_tuned_model.fit(input_train, y_train, batch_size=32, epochs=5, validation_data=(input_test, y_test))
