<a href="https://colab.research.google.com/github/shradhautk/AI-MICROSCOPY-WORKSHOP/blob/main/GC04A_RNN_Sequence_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Sequence Models (IMDB Sentiment Analysis using SimpleRNN)

In [None]:
%pip install -qqq keras scikit-learn

In [None]:
from keras.datasets import imdb
from keras.preprocessing import sequence
from sklearn.model_selection import train_test_split
import numpy as np
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import RMSprop

**Loading the IMDB data**

In [None]:
max_features = 10000  # reviews with containing top many words will be kept

print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)

# Split input_test and y_test into validation and test sets
input_val, input_test, y_val, y_test = train_test_split(input_test, y_test, test_size=0.6, random_state=42)

print(len(input_train), 'train sequences')
print(len(input_val), 'validation sequences')
print(len(input_test), 'test sequences')

**Preparing the data**

In [None]:
max_sequence_length = 1000  # reviews with containg more than defined words will be truncated

# Padding sequences
print('Pad sequences (samples x time)')
X_train = sequence.pad_sequences(input_train, maxlen=max_sequence_length)
X_val   = sequence.pad_sequences(input_val,   maxlen=max_sequence_length)
X_test  = sequence.pad_sequences(input_test,  maxlen=max_sequence_length)

# Randomize training data
np.random.seed(42)  # Set seed for reproducibility
shuffle_indices = np.random.permutation(len(X_train))
X_train = X_train[shuffle_indices]
y_train = y_train[shuffle_indices]

print('input_train shape:', X_train.shape)
print('input_val shape:', X_val.shape)
print('input_test shape:', X_test.shape)


**Training the model with Embedding and SimpleRNN layers**

In [None]:
#Optuna Optimized parameters
#Test Loss: 0.29567480087280273
#Test Accuracy: 0.8817333579063416
#Parameters:
#    embedding_dim: 128
#    num_units: 64
#    bidirectional: False
#   learning_rate: 0.00014503438093039025
#    optimizer: RMSprop

In [None]:
# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min')
callbacks = [early_stopping, model_checkpoint]

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, SimpleRNN

model = Sequential()
model.add(Embedding(input_dim=max_features, output_dim=128,))#input_length=max_sequence_length))  # commented due deprecation warning
model.add(SimpleRNN(64, unroll=True))
model.add(Dense(1, activation='sigmoid'))

opt = RMSprop(learning_rate=0.000145)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['acc'])

history1 = model.fit(X_train, y_train,
                     epochs=30,
                     batch_size=128,
                     validation_data=(X_val, y_val),
                     callbacks=callbacks,
                     verbose=1)

Epoch 1/30


**Plotting results**

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')

# Sample data (replace with actual data)
acc = history1.history['acc']
val_acc = history1.history['val_acc']
loss = history1.history['loss']
val_loss = history1.history['val_loss']
epochs = range(1, len(acc) + 1)

# Color palette for lines
colors = plt.cm.tab10(range(2))  # Choosing 2 colors from the 'tab10' colormap

# Styling the plots
plt.figure(figsize=(11, 4))

# Plotting accuracy
plt.subplot(1, 2, 1)
plt.plot(epochs, acc, linestyle='-', color=colors[0], label='Training Accuracy')
plt.plot(epochs, val_acc, linestyle='-', color=colors[1], label='Validation Accuracy')
plt.title('RNN: Training and Validation Accuracy', fontsize=15)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.grid(axis='y', linestyle='--')
plt.legend(loc='lower right')

# Plotting loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, linestyle='-', color=colors[0], label='Training Loss')
plt.plot(epochs, val_loss, linestyle='-', color=colors[1], label='Validation Loss')
plt.title('RNN: Training and Validation Loss', fontsize=15)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.grid(axis='y', linestyle='--')
plt.legend(loc='upper right')
plt.tight_layout()
plt.show()


**Test Accuracy & Confusion Matrix**

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()  # due sigmoid function which returns probs

test_acc = accuracy_score(y_test, y_pred)
print(f'Simple RNN: Test accuracy:, {test_acc:3.3f}')
print(f'Simple RNN: F1 score: {f1_score(y_test, y_pred):3.3f}')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_cm(model, y_test, y_pred, class_labels=["negative", "positive"]):
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap=plt.cm.Blues, xticklabels=class_labels, yticklabels=class_labels)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.title('Simple RNN: Confusion Matrix')
    plt.show()

plot_cm(model, y_test, y_pred )


**Bi-Directional RNN**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, SimpleRNN, Bidirectional

model2 = Sequential()
model2.add(Embedding(input_dim=max_features, output_dim=128, input_length=max_sequence_length))
model2.add(Bidirectional(SimpleRNN(64)))
model2.add(Dense(1, activation='sigmoid'))

opt = RMSprop(learning_rate=0.000145)
model2.compile(optimizer=opt, loss='binary_crossentropy', metrics=['acc'])

history2 = model2.fit(X_train, y_train,
                      epochs=30,
                      batch_size=128,
                      validation_data=(X_val, y_val),
                      callbacks=callbacks)

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')

# Sample data (replace with actual data)
acc = history2.history['acc']
val_acc = history2.history['val_acc']
loss = history2.history['loss']
val_loss = history2.history['val_loss']
epochs = range(1, len(acc) + 1)

# Color palette for lines
colors = plt.cm.tab10(range(2))  # Choosing 2 colors from the 'tab10' colormap

# Styling the plots
plt.figure(figsize=(11, 4))

# Plotting accuracy
plt.subplot(1, 2, 1)
plt.plot(epochs, acc, linestyle='-', color=colors[0], label='Training Accuracy')
plt.plot(epochs, val_acc, linestyle='-', color=colors[1], label='Validation Accuracy')
plt.title('BiDrectional RNN: Training and Validation Accuracy', fontsize=15)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.grid(axis='y', linestyle='--')
plt.legend(loc='lower right')

# Plotting loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, linestyle='-', color=colors[0], label='Training Loss')
plt.plot(epochs, val_loss, linestyle='-', color=colors[1], label='Validation Loss')
plt.title('BiDrectional RNN: Training and Validation Loss', fontsize=15)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.grid(axis='y', linestyle='--')
plt.legend(loc='upper right')
plt.tight_layout()
plt.show()


**Test Accuracy & Confusion Matrix**

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

y_pred_probs = model2.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()  # due sigmoid function which returns probs

test_acc = accuracy_score(y_test, y_pred)
print(f'BiDrectional RNN:: Test accuracy:, {test_acc:3.3f}')
print(f'BiDrectional RNN:  F1 score: {f1_score(y_test, y_pred):3.3f}')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_cm(model, y_test, y_pred, class_labels=["negative", "positive"]):
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap=plt.cm.Blues, xticklabels=class_labels, yticklabels=class_labels)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.title('BiDrectional RNN: Confusion Matrix')
    plt.show()

plot_cm(model2, y_test, y_pred )
