In [8]:
import numpy as np
import spacy
import tensorflow as tf
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt


In [9]:
# Loading SpaCy's English model
nlp = spacy.load("en_core_web_sm")

def preprocess_data(data, show_samples=5):
    """Preprocess text data using SpaCy."""
    preprocessed_data = []
    for i, doc in enumerate(nlp.pipe(data, disable=["ner", "parser", "attribute_ruler"], batch_size=100)):
        tokens = [token.text.lower() for token in doc if not token.is_stop and token.is_alpha]
        preprocessed_data.append(" ".join(tokens))

        # Show a few samples for debugging
        if i < show_samples:
            print(f"Original Text: {data[i][:200]}")
            print(f"Preprocessed Text: {preprocessed_data[-1]}")
            print("-" * 50)

    return preprocessed_data


In [10]:
# Loading dataset
newsgroups = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
X = newsgroups.data[:5000]  
y = newsgroups.target[:5000]

# Preprocessing the text data
print("Preprocessing the dataset...")
X_processed = preprocess_data(X, show_samples=3)

# Tokenizing and paddding sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=20000)  
tokenizer.fit_on_texts(X_processed)
X_sequences = tokenizer.texts_to_sequences(X_processed)
X_pad = pad_sequences(X_sequences, maxlen=100) 

# Training and testing split
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# Encoding target labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

num_classes = len(np.unique(y_train))
print(f"Dataset ready. X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")


In [11]:
# Defining configurations for different GRU depths
shallow_config = {'gru_layers': 1, 'hidden_units': 16, 'dropout_rate': 0.2, 'learning_rate': 0.01, 'epochs': 5}
medium_config = {'gru_layers': 2, 'hidden_units': 32, 'dropout_rate': 0.3, 'learning_rate': 0.01, 'epochs': 10}
deep_config = {'gru_layers': 3, 'hidden_units': 64, 'dropout_rate': 0.4, 'learning_rate': 0.001, 'epochs': 15}

all_configs = [shallow_config, medium_config, deep_config]


In [13]:
def train_and_evaluate(config, activation_func):
    model = Sequential()
    model.add(Embedding(input_dim=20000, output_dim=128)) 

    # Add GRU layers
    for i in range(config['gru_layers']):
        model.add(GRU(config['hidden_units'], return_sequences=(i < config['gru_layers'] - 1)))
        if activation_func == 'ReLU':
            model.add(tf.keras.layers.ReLU())
        elif activation_func == 'Leaky ReLU':
            model.add(tf.keras.layers.LeakyReLU(alpha=0.01))
        elif activation_func == 'Swish':
            model.add(tf.keras.layers.Activation('swish'))
        model.add(Dropout(config['dropout_rate']))

    # Adding a output layer
    model.add(Dense(num_classes, activation='softmax'))
    optimizer = Adam(learning_rate=config['learning_rate'])
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Training the model 
    history = model.fit(X_train, y_train, epochs=config['epochs'], batch_size=128, validation_data=(X_test, y_test), verbose=0)
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

    return history, test_loss, test_accuracy, model


In [14]:
def display_predictions(model, X_test, y_test, original_texts, label_encoder, num_samples=5):
    """Display actual vs. predicted categories for a subset of the test set."""
    predictions = model.predict(X_test)
    predicted_labels = np.argmax(predictions, axis=1)

    print("\nSample Predictions:")
    for i in range(num_samples):
        print(f"Text: {original_texts[i][:200]}")  # Show the first 200 characters of the text
        print(f"Actual Category: {label_encoder.inverse_transform([y_test[i]])[0]}")
        print(f"Predicted Category: {label_encoder.inverse_transform([predicted_labels[i]])[0]}")
        print("-" * 50)


In [15]:
def plot_accuracy_loss(history, title):
    """Plot accuracy and loss curves for a given training history."""
    plt.figure(figsize=(12, 6))

    # Accuracy plot
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'{title} - Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss plot
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{title} - Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()


In [16]:
for activation_func in ['ReLU', 'Leaky ReLU', 'Swish']:
    print(f"\nTraining models with {activation_func} activation...\n")
    best_accuracy = 0
    best_model = None
    best_history = None

    for config in all_configs:
        print(f"Training with configuration: {config}")
        history, loss, accuracy, model = train_and_evaluate(config, activation_func)
        print(f"Config: {config}, Test Accuracy: {accuracy:.4f}, Test Loss: {loss:.4f}")

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model
            best_history = history

    # Plotting  accuracy and loss for the best configuration
    if best_history:
        plot_accuracy_loss(best_history, f"Best Model - {activation_func}")

    # Displaying predictions for the best model
    if best_model:
        display_predictions(best_model, X_test, y_test, X[:len(X_test)], label_encoder, num_samples=5)

    print(f"Best Test Accuracy for {activation_func}: {best_accuracy:.4f}")
