In [None]:
from keras.datasets import imdb
from keras.utils import pad_sequences
from keras.models import Model
from keras.layers import Input, Embedding, Dense, Dropout, Conv1D, Bidirectional, GRU, MaxPooling1D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the IMDb dataset
(X, y), (_, _) = imdb.load_data(num_words=10000)

# Pad the sequences
X = pad_sequences(X, maxlen=200)

# Convert the labels to the correct shape (1D array)
y = y.reshape(-1, 1)

# Define the K-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Lists to store validation accuracies
val_accuracies = []

for train_index, val_index in kf.split(X, y):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # Create the Functional model with Conv1D and two Bidirectional GRU layers
    input_layer = Input(shape=(200,))
    embedding_layer = Embedding(input_dim=10000, output_dim=128, input_length=200)(input_layer)

    # Conv1D layer
    conv1d_layer = Conv1D(64, kernel_size=5, activation='relu')(embedding_layer)
    maxpool_layer = MaxPooling1D(pool_size=4)(conv1d_layer)

    # First Bidirectional GRU layer with dropout
    bi_gru_layer1 = Bidirectional(GRU(64, return_sequences=True))(maxpool_layer)
    dropout_layer1 = Dropout(0.5)(bi_gru_layer1)

    # Second Bidirectional GRU layer with dropout
    bi_gru_layer2 = Bidirectional(GRU(64, return_sequences=True))(dropout_layer1)
    dropout_layer2 = Dropout(0.5)(bi_gru_layer2)

    # Output layer for binary classification
    output_layer = Dense(1, activation='sigmoid')(dropout_layer2)

    model = Model(inputs=input_layer, outputs=output_layer)

    model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])

    # Define early stopping and learning rate scheduling callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6)

    # Train the model with early stopping and learning rate scheduling
    model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping, reduce_lr])

    # Extract features using the Bidirectional GRU layer
    bi_gru_features = model.predict(X_train)

    # Flatten the features to 2D
    num_samples, sequence_length, num_features = bi_gru_features.shape
    bi_gru_features_2d = bi_gru_features.reshape(num_samples, sequence_length * num_features)

    # Create and fit the logistic regression model
    logistic_regression = LogisticRegression(max_iter=1000)
    logistic_regression.fit(bi_gru_features_2d, y_train)

    # Extract validation features and make predictions using logistic regression
    val_bi_gru_features = model.predict(X_val)
    val_bi_gru_features_2d = val_bi_gru_features.reshape(val_bi_gru_features.shape[0], sequence_length * num_features)
    y_val_pred = logistic_regression.predict(val_bi_gru_features_2d)

    # Calculate accuracy
    accuracy_lr = accuracy_score(y_val, y_val_pred)
    val_accuracies.append(accuracy_lr)

# Calculate and print the mean validation accuracy
mean_val_accuracy = np.mean(val_accuracies)
print(f"Mean Validation Accuracy: {mean_val_accuracy:.4f}")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


  y = column_or_1d(y, warn=True)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


  y = column_or_1d(y, warn=True)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


  y = column_or_1d(y, warn=True)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


  y = column_or_1d(y, warn=True)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


  y = column_or_1d(y, warn=True)


Mean Validation Accuracy: 0.8885


In [None]:
print("Model Performance on test set")
result = model.evaluate(X_val, y_val)
print(dict(zip(model.metrics_names, result)))

Model Performance on test set
{'loss': 0.2774365544319153, 'accuracy': 0.8858940601348877}
