In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(1234)
np.random.seed(1234)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 128
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.6JTSFB_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.6JTSFB_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.6JTSFB_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6715 | Val Loss: 0.6636 | Accuracy: 0.6058 | Precision: 0.5822 | Recall: 0.6621
Epoch 02 | Training Loss: 0.6625 | Val Loss: 0.6634 | Accuracy: 0.6056 | Precision: 0.5792 | Recall: 0.6819
Epoch 03 | Training Loss: 0.6604 | Val Loss: 0.6619 | Accuracy: 0.6032 | Precision: 0.5742 | Recall: 0.7021
Epoch 04 | Training Loss: 0.6580 | Val Loss: 0.6625 | Accuracy: 0.6030 | Precision: 0.5717 | Recall: 0.7224
Epoch 05 | Training Loss: 0.6582 | Val Loss: 0.6606 | Accuracy: 0.6084 | Precision: 0.5822 | Recall: 0.6811
Epoch 06 | Training Loss: 0.6546 | Val Loss: 0.6683 | Accuracy: 0.5986 | Precision: 0.5630 | Recall: 0.7690
Epoch 07 | Training Loss: 0.6536 | Val Loss: 0.6748 | Accuracy: 0.5810 | Precision: 0.

In [None]:
#with seed number  of 655
#Learning Rate of 0.0001 and actiation function tanh with an optimizer Adam

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.tanh(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.tanh(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 128
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6832 | Val Loss: 0.6752 | Accuracy: 0.5882 | Precision: 0.5774 | Recall: 0.5615
Epoch 02 | Training Loss: 0.6698 | Val Loss: 0.6681 | Accuracy: 0.6044 | Precision: 0.5820 | Recall: 0.6531
Epoch 03 | Training Loss: 0.6646 | Val Loss: 0.6655 | Accuracy: 0.6060 | Precision: 0.5795 | Recall: 0.6823
Epoch 04 | Training Loss: 0.6626 | Val Loss: 0.6640 | Accuracy: 0.6108 | Precision: 0.5876 | Recall: 0.6613
Epoch 05 | Training Loss: 0.6614 | Val Loss: 0.6634 | Accuracy: 0.6120 | Precision: 0.5880 | Recall: 0.6671
Epoch 06 | Training Loss: 0.6608 | Val Loss: 0.6652 | Accuracy: 0.6072 | Precision: 0.5750 | Recall: 0.7277
Epoch 07 | Training Loss: 0.6608 | Val Loss: 0.6625 | Accuracy: 0.6088 | Precision: 0.5901 | Recall: 0.6324
Epoch 08 | Training Loss: 0.6599 | Val Loss: 0.6620 | Accuracy: 0.6080 | Precision: 0.5897 | Recal

In [None]:
#with seed number  of 655
#Learning Rate of 0.0005 and actiation function relu with an optimizer Adam , batach size=256

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 256
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6776 | Val Loss: 0.6699 | Accuracy: 0.5930 | Precision: 0.6149 | Recall: 0.4295
Epoch 02 | Training Loss: 0.6644 | Val Loss: 0.6634 | Accuracy: 0.6110 | Precision: 0.5905 | Recall: 0.6448
Epoch 03 | Training Loss: 0.6602 | Val Loss: 0.6624 | Accuracy: 0.6084 | Precision: 0.5805 | Recall: 0.6931
Epoch 04 | Training Loss: 0.6582 | Val Loss: 0.6616 | Accuracy: 0.6122 | Precision: 0.5882 | Recall: 0.6671
Epoch 05 | Training Loss: 0.6563 | Val Loss: 0.6607 | Accuracy: 0.6100 | Precision: 0.5913 | Recall: 0.6333
Epoch 06 | Training Loss: 0.6552 | Val Loss: 0.6623 | Accuracy: 0.6036 | Precision: 0.5744 | Recall: 0.7038
Epoch 07 | Training Loss: 0.6542 | Val Loss: 0.6607 | Accuracy: 0.6108 | Precision: 0.5935 | Recall: 0.6258
Epoch 08 | Training Loss: 0.6516 | Val Loss: 0.6599 | Accuracy: 0.6082 | Precision: 0.5890 | Recal

In [None]:
#with seed number  of 655
#Learning Rate of 0.0001 and actiation function relu with an optimizer Adam , batach size=256
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 256
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6904 | Val Loss: 0.6850 | Accuracy: 0.5576 | Precision: 0.5347 | Recall: 0.6737
Epoch 02 | Training Loss: 0.6790 | Val Loss: 0.6770 | Accuracy: 0.5872 | Precision: 0.5687 | Recall: 0.6147
Epoch 03 | Training Loss: 0.6718 | Val Loss: 0.6720 | Accuracy: 0.5988 | Precision: 0.5709 | Recall: 0.6939
Epoch 04 | Training Loss: 0.6672 | Val Loss: 0.6683 | Accuracy: 0.6044 | Precision: 0.5779 | Recall: 0.6823
Epoch 05 | Training Loss: 0.6635 | Val Loss: 0.6669 | Accuracy: 0.6036 | Precision: 0.5744 | Recall: 0.7038
Epoch 06 | Training Loss: 0.6617 | Val Loss: 0.6666 | Accuracy: 0.6018 | Precision: 0.5709 | Recall: 0.7195
Epoch 07 | Training Loss: 0.6603 | Val Loss: 0.6648 | Accuracy: 0.6068 | Precision: 0.5792 | Recall: 0.6910
Epoch 08 | Training Loss: 0.6595 | Val Loss: 0.6641 | Accuracy: 0.6100 | Precision: 0.5832 | Recal

In [None]:
#with seed number  of 655
#Learning Rate of 0.001 and actiation function relu with an optimizer Adam , batach size=256

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 256
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6728 | Val Loss: 0.6675 | Accuracy: 0.5964 | Precision: 0.5943 | Recall: 0.5276
Epoch 02 | Training Loss: 0.6619 | Val Loss: 0.6676 | Accuracy: 0.5958 | Precision: 0.5615 | Recall: 0.7587
Epoch 03 | Training Loss: 0.6595 | Val Loss: 0.6613 | Accuracy: 0.6098 | Precision: 0.5867 | Recall: 0.6605
Epoch 04 | Training Loss: 0.6582 | Val Loss: 0.6617 | Accuracy: 0.6052 | Precision: 0.5751 | Recall: 0.7112
Epoch 05 | Training Loss: 0.6548 | Val Loss: 0.6610 | Accuracy: 0.6080 | Precision: 0.6058 | Recall: 0.5479
Epoch 06 | Training Loss: 0.6517 | Val Loss: 0.6627 | Accuracy: 0.6038 | Precision: 0.5774 | Recall: 0.6819
Epoch 07 | Training Loss: 0.6507 | Val Loss: 0.6596 | Accuracy: 0.6114 | Precision: 0.5977 | Recall: 0.6068
Epoch 08 | Training Loss: 0.6478 | Val Loss: 0.6582 | Accuracy: 0.6086 | Precision: 0.5862 | Recal

In [None]:
#with seed number  of 655  with 4 hidden layers
#Learning Rate of 0.001 and actiation function relu with an optimizer Adam , batach size=256

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3,size_hidden4, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_hidden4=size_hidden4
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_hidden3], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_hidden3]))

        self.W4 = tf.Variable(tf.random.normal([self.size_hidden3, self.size_output], stddev=0.1))
        self.b4 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3,self.W4,self.b1, self.b2, self.b3,self.b4]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        ##hidden layer
        h3 = tf.matmul(z2, self.W3) + self.b3
        z3 = tf.nn.relu(h3)
        # Output layer (logits)
        output = tf.matmul(z3, self.W4) + self.b4
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_hidden4=16
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_hidden4, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 256
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6744 | Val Loss: 0.6652 | Accuracy: 0.6034 | Precision: 0.6011 | Recall: 0.5408
Epoch 02 | Training Loss: 0.6627 | Val Loss: 0.6638 | Accuracy: 0.6048 | Precision: 0.5721 | Recall: 0.7331
Epoch 03 | Training Loss: 0.6605 | Val Loss: 0.6603 | Accuracy: 0.6088 | Precision: 0.5816 | Recall: 0.6881
Epoch 04 | Training Loss: 0.6580 | Val Loss: 0.6601 | Accuracy: 0.6102 | Precision: 0.5827 | Recall: 0.6906
Epoch 05 | Training Loss: 0.6550 | Val Loss: 0.6598 | Accuracy: 0.6086 | Precision: 0.6063 | Recall: 0.5495
Epoch 06 | Training Loss: 0.6522 | Val Loss: 0.6623 | Accuracy: 0.6086 | Precision: 0.5825 | Recall: 0.6799
Epoch 07 | Training Loss: 0.6513 | Val Loss: 0.6587 | Accuracy: 0.6142 | Precision: 0.6047 | Recall: 0.5895
Epoch 08 | Training Loss: 0.6469 | Val Loss: 0.6569 | Accuracy: 0.6074 | Precision: 0.5858 | Recal

In [None]:
#with seed number  of 655  with 4 hidden layers
#Learning Rate of 0.0001 and actiation function relu with an optimizer Adam , batach size=256

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3,size_hidden4, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_hidden4=size_hidden4
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_hidden3], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_hidden3]))

        self.W4 = tf.Variable(tf.random.normal([self.size_hidden3, self.size_output], stddev=0.1))
        self.b4 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3,self.W4,self.b1, self.b2, self.b3,self.b4]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        ##hidden layer
        h3 = tf.matmul(z2, self.W3) + self.b3
        z3 = tf.nn.relu(h3)
        # Output layer (logits)
        output = tf.matmul(z3, self.W4) + self.b4
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_hidden4=16
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_hidden4, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 256
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6894 | Val Loss: 0.6860 | Accuracy: 0.5674 | Precision: 0.5399 | Recall: 0.7277
Epoch 02 | Training Loss: 0.6819 | Val Loss: 0.6786 | Accuracy: 0.5900 | Precision: 0.5714 | Recall: 0.6176
Epoch 03 | Training Loss: 0.6745 | Val Loss: 0.6727 | Accuracy: 0.5956 | Precision: 0.5691 | Recall: 0.6832
Epoch 04 | Training Loss: 0.6688 | Val Loss: 0.6685 | Accuracy: 0.6018 | Precision: 0.5736 | Recall: 0.6964
Epoch 05 | Training Loss: 0.6643 | Val Loss: 0.6660 | Accuracy: 0.6042 | Precision: 0.5758 | Recall: 0.6972
Epoch 06 | Training Loss: 0.6620 | Val Loss: 0.6652 | Accuracy: 0.6052 | Precision: 0.5743 | Recall: 0.7174
Epoch 07 | Training Loss: 0.6601 | Val Loss: 0.6635 | Accuracy: 0.6104 | Precision: 0.5832 | Recall: 0.6885
Epoch 08 | Training Loss: 0.6591 | Val Loss: 0.6629 | Accuracy: 0.6116 | Precision: 0.5847 | Recal

In [None]:
#with seed number  of 655  with 4 hidden layers
#Learning Rate of 0.0005 and actiation function relu with an optimizer Adam , batach size=256

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3,size_hidden4, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_hidden4=size_hidden4
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_hidden3], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_hidden3]))

        self.W4 = tf.Variable(tf.random.normal([self.size_hidden3, self.size_output], stddev=0.1))
        self.b4 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3,self.W4,self.b1, self.b2, self.b3,self.b4]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        ##hidden layer
        h3 = tf.matmul(z2, self.W3) + self.b3
        z3 = tf.nn.relu(h3)
        # Output layer (logits)
        output = tf.matmul(z3, self.W4) + self.b4
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_hidden4=16
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_hidden4, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 256
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6791 | Val Loss: 0.6699 | Accuracy: 0.5862 | Precision: 0.6099 | Recall: 0.4064
Epoch 02 | Training Loss: 0.6647 | Val Loss: 0.6628 | Accuracy: 0.6104 | Precision: 0.5885 | Recall: 0.6531
Epoch 03 | Training Loss: 0.6605 | Val Loss: 0.6611 | Accuracy: 0.6124 | Precision: 0.5894 | Recall: 0.6609
Epoch 04 | Training Loss: 0.6583 | Val Loss: 0.6605 | Accuracy: 0.6092 | Precision: 0.5880 | Recall: 0.6477
Epoch 05 | Training Loss: 0.6557 | Val Loss: 0.6591 | Accuracy: 0.6100 | Precision: 0.5925 | Recall: 0.6262
Epoch 06 | Training Loss: 0.6541 | Val Loss: 0.6612 | Accuracy: 0.6056 | Precision: 0.5776 | Recall: 0.6943
Epoch 07 | Training Loss: 0.6532 | Val Loss: 0.6589 | Accuracy: 0.6098 | Precision: 0.5971 | Recall: 0.5998
Epoch 08 | Training Loss: 0.6496 | Val Loss: 0.6591 | Accuracy: 0.6048 | Precision: 0.5871 | Recal

In [None]:
#with seed number  of 655
#Learning Rate of 0.001 and actiation function relu with an optimizer Adam , batach size=512

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6792 | Val Loss: 0.6682 | Accuracy: 0.5958 | Precision: 0.6053 | Recall: 0.4777
Epoch 02 | Training Loss: 0.6665 | Val Loss: 0.6636 | Accuracy: 0.6098 | Precision: 0.5943 | Recall: 0.6151
Epoch 03 | Training Loss: 0.6591 | Val Loss: 0.6617 | Accuracy: 0.6144 | Precision: 0.5899 | Recall: 0.6712
Epoch 04 | Training Loss: 0.6578 | Val Loss: 0.6609 | Accuracy: 0.6140 | Precision: 0.5890 | Recall: 0.6741
Epoch 05 | Training Loss: 0.6554 | Val Loss: 0.6603 | Accuracy: 0.6122 | Precision: 0.5937 | Recall: 0.6337
Epoch 06 | Training Loss: 0.6576 | Val Loss: 0.6603 | Accuracy: 0.6068 | Precision: 0.5821 | Recall: 0.6700
Epoch 07 | Training Loss: 0.6522 | Val Loss: 0.6605 | Accuracy: 0.6104 | Precision: 0.5840 | Recall: 0.6828
Epoch 08 | Training Loss: 0.6508 | Val Loss: 0.6595 | Accuracy: 0.6120 | Precision: 0.5975 | Recal

In [None]:
#with seed number  of 655
#Learning Rate of 0.0001 and actiation function relu with an optimizer Adam , batach size=512

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6946 | Val Loss: 0.6866 | Accuracy: 0.5476 | Precision: 0.5359 | Recall: 0.4992
Epoch 02 | Training Loss: 0.6835 | Val Loss: 0.6830 | Accuracy: 0.5714 | Precision: 0.5571 | Recall: 0.5656
Epoch 03 | Training Loss: 0.6791 | Val Loss: 0.6798 | Accuracy: 0.5788 | Precision: 0.5536 | Recall: 0.6770
Epoch 04 | Training Loss: 0.6749 | Val Loss: 0.6751 | Accuracy: 0.5910 | Precision: 0.5675 | Recall: 0.6572
Epoch 05 | Training Loss: 0.6705 | Val Loss: 0.6722 | Accuracy: 0.5990 | Precision: 0.5713 | Recall: 0.6922
Epoch 06 | Training Loss: 0.6674 | Val Loss: 0.6694 | Accuracy: 0.6010 | Precision: 0.5745 | Recall: 0.6828
Epoch 07 | Training Loss: 0.6648 | Val Loss: 0.6686 | Accuracy: 0.6022 | Precision: 0.5722 | Recall: 0.7108
Epoch 08 | Training Loss: 0.6630 | Val Loss: 0.6668 | Accuracy: 0.6056 | Precision: 0.5785 | Recal

In [None]:
#with seed number  of 655
#Learning Rate of 0.0001 and actiation function leakyrelu with an optimizer LeakyRelu , batach size=512

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.leaky_relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.leaky_relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6935 | Val Loss: 0.6872 | Accuracy: 0.5514 | Precision: 0.5326 | Recall: 0.6101
Epoch 02 | Training Loss: 0.6828 | Val Loss: 0.6817 | Accuracy: 0.5712 | Precision: 0.5582 | Recall: 0.5536
Epoch 03 | Training Loss: 0.6780 | Val Loss: 0.6786 | Accuracy: 0.5800 | Precision: 0.5553 | Recall: 0.6716
Epoch 04 | Training Loss: 0.6738 | Val Loss: 0.6744 | Accuracy: 0.5890 | Precision: 0.5645 | Recall: 0.6663
Epoch 05 | Training Loss: 0.6702 | Val Loss: 0.6716 | Accuracy: 0.5968 | Precision: 0.5690 | Recall: 0.6939
Epoch 06 | Training Loss: 0.6675 | Val Loss: 0.6689 | Accuracy: 0.6026 | Precision: 0.5748 | Recall: 0.6927
Epoch 07 | Training Loss: 0.6650 | Val Loss: 0.6679 | Accuracy: 0.6044 | Precision: 0.5741 | Recall: 0.7125
Epoch 08 | Training Loss: 0.6633 | Val Loss: 0.6658 | Accuracy: 0.6048 | Precision: 0.5787 | Recal

In [None]:
#with seed number  of 655
#Learning Rate of 0.0001 and actiation function tanh with an optimizer LeakyRelu , batach size=512

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.tanh(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.tanh(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.tanh(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.tanh(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 128
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.2N0P8Y_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.2N0P8Y_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.2N0P8Y_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6832 | Val Loss: 0.6752 | Accuracy: 0.5882 | Precision: 0.5774 | Recall: 0.5615
Epoch 02 | Training Loss: 0.6698 | Val Loss: 0.6681 | Accuracy: 0.6044 | Precision: 0.5820 | Recall: 0.6531
Epoch 03 | Training Loss: 0.6646 | Val Loss: 0.6655 | Accuracy: 0.6060 | Precision: 0.5795 | Recall: 0.6823
Epoch 04 | Training Loss: 0.6626 | Val Loss: 0.6640 | Accuracy: 0.6108 | Precision: 0.5876 | Recall: 0.6613
Epoch 05 | Training Loss: 0.6614 | Val Loss: 0.6634 | Accuracy: 0.6120 | Precision: 0.5880 | Recall: 0.6671
Epoch 06 | Training Loss: 0.6608 | Val Loss: 0.6652 | Accuracy: 0.6072 | Precision: 0.5750 | Recall: 0.7277
Epoch 07 | Training Loss: 0.6608 | Val Loss: 0.6625 | Accuracy: 0.6088 | Precision: 0.

In [2]:
#with seed number  of 655
#Learning Rate of 0.0001 and actiation function relu with an optimizer SGD , batach size=256
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 128
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.7191 | Val Loss: 0.7246 | Accuracy: 0.4852 | Precision: 0.4850 | Recall: 1.0000
Epoch 02 | Training Loss: 0.7102 | Val Loss: 0.7157 | Accuracy: 0.4848 | Precision: 0.4848 | Recall: 0.9967
Epoch 03 | Training Loss: 0.7039 | Val Loss: 0.7091 | Accuracy: 0.4872 | Precision: 0.4858 | Recall: 0.9909
Epoch 04 | Training Loss: 0.6994 | Val Loss: 0.7043 | Accuracy: 0.4902 | Precision: 0.4871 | Recall: 0.9707
Epoch 05 | Training Loss: 0.6963 | Val Loss: 0.7007 | Accuracy: 0.4962 | Precision: 0.4898 | Recall: 0.9439
Epoch 06 | Training Loss: 0.6940 | Val Loss: 0.6981 | Accuracy: 0.5002 | Precision: 0.4916 | Recall: 0.9055
Epoch 07 | Training Loss: 0.6924 | Val Loss: 0.6961 | Accuracy: 0.5068 | Precision: 0.4951 | Recall: 0.8721
Epoch 08 | Training Loss: 0.6913 | Val Loss: 0.6947 | Accuracy: 0.5142 | Precision: 0.4994 | Recal

In [4]:
#with seed number  of 655
#Learning Rate of 0.0001 and actiation function relu with an optimizer RMSProp , batach size=128
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 128
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6843 | Val Loss: 0.6778 | Accuracy: 0.5828 | Precision: 0.5796 | Recall: 0.5078
Epoch 02 | Training Loss: 0.6729 | Val Loss: 0.6721 | Accuracy: 0.5992 | Precision: 0.5700 | Recall: 0.7050
Epoch 03 | Training Loss: 0.6668 | Val Loss: 0.6669 | Accuracy: 0.6086 | Precision: 0.5946 | Recall: 0.6056
Epoch 04 | Training Loss: 0.6633 | Val Loss: 0.6657 | Accuracy: 0.6062 | Precision: 0.5781 | Recall: 0.6947
Epoch 05 | Training Loss: 0.6616 | Val Loss: 0.6642 | Accuracy: 0.6104 | Precision: 0.5900 | Recall: 0.6436
Epoch 06 | Training Loss: 0.6604 | Val Loss: 0.6669 | Accuracy: 0.5996 | Precision: 0.5675 | Recall: 0.7323
Epoch 07 | Training Loss: 0.6598 | Val Loss: 0.6647 | Accuracy: 0.6084 | Precision: 0.5789 | Recall: 0.7050
Epoch 08 | Training Loss: 0.6590 | Val Loss: 0.6631 | Accuracy: 0.6144 | Precision: 0.5918 | Recal

In [5]:
#with seed number  of 655
#Learning Rate of 0.0001 and actiation function relu with an optimizer RMSProp , batach size=512
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6909 | Val Loss: 0.6847 | Accuracy: 0.5538 | Precision: 0.5559 | Recall: 0.3960
Epoch 02 | Training Loss: 0.6821 | Val Loss: 0.6817 | Accuracy: 0.5714 | Precision: 0.5506 | Recall: 0.6304
Epoch 03 | Training Loss: 0.6776 | Val Loss: 0.6768 | Accuracy: 0.5860 | Precision: 0.5824 | Recall: 0.5161
Epoch 04 | Training Loss: 0.6738 | Val Loss: 0.6738 | Accuracy: 0.5910 | Precision: 0.5752 | Recall: 0.5978
Epoch 05 | Training Loss: 0.6705 | Val Loss: 0.6711 | Accuracy: 0.5988 | Precision: 0.5822 | Recall: 0.6106
Epoch 06 | Training Loss: 0.6679 | Val Loss: 0.6733 | Accuracy: 0.5934 | Precision: 0.5590 | Recall: 0.7640
Epoch 07 | Training Loss: 0.6663 | Val Loss: 0.6695 | Accuracy: 0.6012 | Precision: 0.5708 | Recall: 0.7149
Epoch 08 | Training Loss: 0.6646 | Val Loss: 0.6674 | Accuracy: 0.6050 | Precision: 0.5773 | Recal

In [6]:
#with seed number  of 655
#Learning Rate of 0.0001 and actiation function relu with an optimizer SGD , batach size=512
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=0.0001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.7231 | Val Loss: 0.7332 | Accuracy: 0.4850 | Precision: 0.4849 | Recall: 1.0000
Epoch 02 | Training Loss: 0.7201 | Val Loss: 0.7300 | Accuracy: 0.4850 | Precision: 0.4849 | Recall: 1.0000
Epoch 03 | Training Loss: 0.7175 | Val Loss: 0.7270 | Accuracy: 0.4850 | Precision: 0.4849 | Recall: 1.0000
Epoch 04 | Training Loss: 0.7151 | Val Loss: 0.7243 | Accuracy: 0.4852 | Precision: 0.4850 | Recall: 1.0000
Epoch 05 | Training Loss: 0.7128 | Val Loss: 0.7217 | Accuracy: 0.4854 | Precision: 0.4851 | Recall: 1.0000
Epoch 06 | Training Loss: 0.7108 | Val Loss: 0.7194 | Accuracy: 0.4852 | Precision: 0.4850 | Recall: 0.9996
Epoch 07 | Training Loss: 0.7089 | Val Loss: 0.7173 | Accuracy: 0.4848 | Precision: 0.4848 | Recall: 0.9979
Epoch 08 | Training Loss: 0.7072 | Val Loss: 0.7153 | Accuracy: 0.4848 | Precision: 0.4848 | Recal

In [7]:
#with seed number  of 655
#Learning Rate of 0.001 and actiation function tanh with an optimizer ADAM , batach size=512
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.tanh(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.tanh(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6760 | Val Loss: 0.6713 | Accuracy: 0.5784 | Precision: 0.6129 | Recall: 0.3540
Epoch 02 | Training Loss: 0.6690 | Val Loss: 0.6632 | Accuracy: 0.6096 | Precision: 0.5866 | Recall: 0.6597
Epoch 03 | Training Loss: 0.6620 | Val Loss: 0.6623 | Accuracy: 0.6080 | Precision: 0.5812 | Recall: 0.6848
Epoch 04 | Training Loss: 0.6604 | Val Loss: 0.6613 | Accuracy: 0.6064 | Precision: 0.5913 | Recall: 0.6093
Epoch 05 | Training Loss: 0.6589 | Val Loss: 0.6610 | Accuracy: 0.6082 | Precision: 0.5919 | Recall: 0.6180
Epoch 06 | Training Loss: 0.6626 | Val Loss: 0.6669 | Accuracy: 0.6008 | Precision: 0.5659 | Recall: 0.7583
Epoch 07 | Training Loss: 0.6588 | Val Loss: 0.6619 | Accuracy: 0.6054 | Precision: 0.5798 | Recall: 0.6757
Epoch 08 | Training Loss: 0.6575 | Val Loss: 0.6607 | Accuracy: 0.6076 | Precision: 0.5847 | Recal

In [8]:
#with seed number  of 655
#Learning Rate of 0.001 and actiation function leakyrelu with an optimizer ADAM , batach size=512
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.leaky_relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.leaky_relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6781 | Val Loss: 0.6684 | Accuracy: 0.5922 | Precision: 0.6061 | Recall: 0.4538
Epoch 02 | Training Loss: 0.6665 | Val Loss: 0.6639 | Accuracy: 0.6066 | Precision: 0.5944 | Recall: 0.5936
Epoch 03 | Training Loss: 0.6596 | Val Loss: 0.6617 | Accuracy: 0.6126 | Precision: 0.5885 | Recall: 0.6679
Epoch 04 | Training Loss: 0.6582 | Val Loss: 0.6609 | Accuracy: 0.6138 | Precision: 0.5899 | Recall: 0.6671
Epoch 05 | Training Loss: 0.6559 | Val Loss: 0.6603 | Accuracy: 0.6114 | Precision: 0.5916 | Recall: 0.6407
Epoch 06 | Training Loss: 0.6587 | Val Loss: 0.6609 | Accuracy: 0.6066 | Precision: 0.5793 | Recall: 0.6885
Epoch 07 | Training Loss: 0.6538 | Val Loss: 0.6605 | Accuracy: 0.6086 | Precision: 0.5818 | Recall: 0.6848
Epoch 08 | Training Loss: 0.6529 | Val Loss: 0.6596 | Accuracy: 0.6114 | Precision: 0.5894 | Recal

In [9]:
#with seed number  of 655
#Learning Rate of 0.001 and actiation function leakyrelu with an optimizer SGD , batach size=512
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.leaky_relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.leaky_relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.7111 | Val Loss: 0.7081 | Accuracy: 0.4954 | Precision: 0.4895 | Recall: 0.9563
Epoch 02 | Training Loss: 0.6968 | Val Loss: 0.6983 | Accuracy: 0.5114 | Precision: 0.4977 | Recall: 0.8461
Epoch 03 | Training Loss: 0.6921 | Val Loss: 0.6939 | Accuracy: 0.5242 | Precision: 0.5064 | Recall: 0.7318
Epoch 04 | Training Loss: 0.6902 | Val Loss: 0.6920 | Accuracy: 0.5282 | Precision: 0.5108 | Recall: 0.6345
Epoch 05 | Training Loss: 0.6895 | Val Loss: 0.6910 | Accuracy: 0.5332 | Precision: 0.5164 | Recall: 0.5862
Epoch 06 | Training Loss: 0.6891 | Val Loss: 0.6906 | Accuracy: 0.5336 | Precision: 0.5173 | Recall: 0.5668
Epoch 07 | Training Loss: 0.6888 | Val Loss: 0.6902 | Accuracy: 0.5368 | Precision: 0.5206 | Recall: 0.5631
Epoch 08 | Training Loss: 0.6886 | Val Loss: 0.6900 | Accuracy: 0.5364 | Precision: 0.5204 | Recal

In [10]:
#with seed number  of 655
#Learning Rate of 0.001 and actiation function leakyrelu with an optimizer RMSprop , batach size=512
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.leaky_relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.leaky_relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 512
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6854 | Val Loss: 0.7022 | Accuracy: 0.5282 | Precision: 0.6585 | Recall: 0.0557
Epoch 02 | Training Loss: 0.6683 | Val Loss: 0.6655 | Accuracy: 0.6054 | Precision: 0.5962 | Recall: 0.5767
Epoch 03 | Training Loss: 0.6643 | Val Loss: 0.6639 | Accuracy: 0.6042 | Precision: 0.5737 | Recall: 0.7145
Epoch 04 | Training Loss: 0.6621 | Val Loss: 0.6651 | Accuracy: 0.6048 | Precision: 0.6197 | Recall: 0.4785
Epoch 05 | Training Loss: 0.6611 | Val Loss: 0.6705 | Accuracy: 0.5832 | Precision: 0.6296 | Recall: 0.3408
Epoch 06 | Training Loss: 0.6612 | Val Loss: 0.6732 | Accuracy: 0.5792 | Precision: 0.5445 | Recall: 0.8082
Epoch 07 | Training Loss: 0.6601 | Val Loss: 0.6764 | Accuracy: 0.5768 | Precision: 0.5419 | Recall: 0.8218
Epoch 08 | Training Loss: 0.6579 | Val Loss: 0.6718 | Accuracy: 0.5876 | Precision: 0.5520 | Recal

In [11]:
#with seed number  of 655
#Learning Rate of 0.001 and actiation function leakyrelu with an optimizer ADAM , batach size=256
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.leaky_relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.leaky_relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 256
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6738 | Val Loss: 0.6652 | Accuracy: 0.6022 | Precision: 0.5959 | Recall: 0.5573
Epoch 02 | Training Loss: 0.6623 | Val Loss: 0.6667 | Accuracy: 0.5962 | Precision: 0.5620 | Recall: 0.7574
Epoch 03 | Training Loss: 0.6602 | Val Loss: 0.6612 | Accuracy: 0.6070 | Precision: 0.5845 | Recall: 0.6547
Epoch 04 | Training Loss: 0.6591 | Val Loss: 0.6618 | Accuracy: 0.6074 | Precision: 0.5771 | Recall: 0.7116
Epoch 05 | Training Loss: 0.6560 | Val Loss: 0.6609 | Accuracy: 0.6096 | Precision: 0.6067 | Recall: 0.5536
Epoch 06 | Training Loss: 0.6535 | Val Loss: 0.6627 | Accuracy: 0.6096 | Precision: 0.5818 | Recall: 0.6922
Epoch 07 | Training Loss: 0.6531 | Val Loss: 0.6598 | Accuracy: 0.6090 | Precision: 0.5959 | Recall: 0.6011
Epoch 08 | Training Loss: 0.6505 | Val Loss: 0.6577 | Accuracy: 0.6094 | Precision: 0.5905 | Recal

In [12]:
#with seed number  of 655
#Learning Rate of 0.001 and actiation function leakyrelu with an optimizer ADAM , batach size=128
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(655)
np.random.seed(655)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.leaky_relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.leaky_relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 128
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")


Loading IMDB dataset...
Train samples: 20000, Validation samples: 5000, Test samples: 25000
Tokenizer vocabulary size: 134

Starting training...

Epoch 01 | Training Loss: 0.6717 | Val Loss: 0.6658 | Accuracy: 0.6056 | Precision: 0.5763 | Recall: 0.7042
Epoch 02 | Training Loss: 0.6643 | Val Loss: 0.6622 | Accuracy: 0.6092 | Precision: 0.5826 | Recall: 0.6836
Epoch 03 | Training Loss: 0.6592 | Val Loss: 0.6641 | Accuracy: 0.6030 | Precision: 0.5946 | Recall: 0.5693
Epoch 04 | Training Loss: 0.6580 | Val Loss: 0.6600 | Accuracy: 0.6096 | Precision: 0.5843 | Recall: 0.6745
Epoch 05 | Training Loss: 0.6551 | Val Loss: 0.6652 | Accuracy: 0.5944 | Precision: 0.5601 | Recall: 0.7607
Epoch 06 | Training Loss: 0.6532 | Val Loss: 0.6596 | Accuracy: 0.6088 | Precision: 0.5809 | Recall: 0.6935
Epoch 07 | Training Loss: 0.6503 | Val Loss: 0.6598 | Accuracy: 0.6132 | Precision: 0.6001 | Recall: 0.6060
Epoch 08 | Training Loss: 0.6480 | Val Loss: 0.6604 | Accuracy: 0.6064 | Precision: 0.6265 | Recal

In [None]:
#with seed number  of 1567
#Learning Rate of 0.001 and actiation function leakyrelu with an optimizer ADAM , batach size=256
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

tf.random.set_seed(1567)
np.random.seed(1567)
# -------------------------------
# Original MLP Class Definition
# -------------------------------
class MLP(object):
    def __init__(self, size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden1: int, size of the 1st hidden layer
        size_hidden2: int, size of the 2nd hidden layer
        size_hidden3: int, size of the 3rd hidden layer (not used in compute_output here)
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None.
        """
        self.size_input = size_input
        self.size_hidden1 = size_hidden1
        self.size_hidden2 = size_hidden2
        self.size_hidden3 = size_hidden3  # (Currently not used in the forward pass)
        self.size_output = size_output
        self.device = device

        # Initialize weights and biases for first hidden layer
        self.W1 = tf.Variable(tf.random.normal([self.size_input, self.size_hidden1], stddev=0.1))
        self.b1 = tf.Variable(tf.zeros([1, self.size_hidden1]))

        # Initialize weights and biases for second hidden layer
        self.W2 = tf.Variable(tf.random.normal([self.size_hidden1, self.size_hidden2], stddev=0.1))
        self.b2 = tf.Variable(tf.zeros([1, self.size_hidden2]))

        # Initialize weights and biases for output layer
        self.W3 = tf.Variable(tf.random.normal([self.size_hidden2, self.size_output], stddev=0.1))
        self.b3 = tf.Variable(tf.zeros([1, self.size_output]))

        # List of variables to update during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]

    def forward(self, X):
        """
        Forward pass.
        X: Tensor, inputs.
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device == 'gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
        return self.y

    def loss(self, y_pred, y_true):
        """
        Computes the loss between predicted and true outputs.
        y_pred: Tensor of shape (batch_size, size_output)
        y_true: Tensor of shape (batch_size, size_output)
        """
        y_true_tf = tf.cast(y_true, dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
        loss_x = cce(y_true_tf, y_pred_tf)
        return loss_x

    def backward(self, X_train, y_train):
        """
        Backward pass: compute gradients of the loss with respect to the variables.
        """
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        return grads

    def compute_output(self, X):
        """
        Custom method to compute the output tensor during the forward pass.
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # First hidden layer
        h1 = tf.matmul(X_tf, self.W1) + self.b1
        z1 = tf.nn.leaky_relu(h1)
        # Second hidden layer
        h2 = tf.matmul(z1, self.W2) + self.b2
        z2 = tf.nn.leaky_relu(h2)
        # Output layer (logits)
        output = tf.matmul(z2, self.W3) + self.b3
        return output

# -------------------------------
# Character-Level Tokenizer and Preprocessing Functions
# -------------------------------
def char_level_tokenizer(texts, num_words=None):
    """
    Create and fit a character-level tokenizer.

    Args:
        texts (list of str): List of texts.
        num_words (int or None): Maximum number of tokens to keep.

    Returns:
        tokenizer: A fitted Tokenizer instance.
    """
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, char_level=True, lower=True)
    tokenizer.fit_on_texts(texts)
    return tokenizer

def texts_to_bow(tokenizer, texts):
    """
    Convert texts to a bag-of-characters representation.

    Args:
        tokenizer: A fitted character-level Tokenizer.
        texts (list of str): List of texts.

    Returns:
        Numpy array representing the binary bag-of-characters for each text.
    """
    # texts_to_matrix with mode 'binary' produces a fixed-length binary vector per text.
    matrix = tokenizer.texts_to_matrix(texts, mode='binary')
    return matrix

def one_hot_encode(labels, num_classes=2):
    """
    Convert numeric labels to one-hot encoded vectors.
    """
    return np.eye(num_classes)[labels]

# -------------------------------
# Load and Prepare the IMDB Dataset
# -------------------------------
print("Loading IMDB dataset...")
# Load the IMDB reviews dataset with the 'as_supervised' flag so that we get (text, label) pairs.
(ds_train, ds_test), ds_info = tfds.load('imdb_reviews',
                                           split=['train', 'test'],
                                           as_supervised=True,
                                           with_info=True)

# Convert training dataset to lists.
train_texts = []
train_labels = []
for text, label in tfds.as_numpy(ds_train):
    # Decode byte strings to utf-8 strings.
    train_texts.append(text.decode('utf-8'))
    train_labels.append(label)
train_labels = np.array(train_labels)

# Create a validation set from the training data (20% for validation).
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_texts, train_labels, test_size=0.2, random_state=42)

# Convert test dataset to lists.
test_texts = []
test_labels = []
for text, label in tfds.as_numpy(ds_test):
    test_texts.append(text.decode('utf-8'))
    test_labels.append(label)
test_labels = np.array(test_labels)

print(f"Train samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

# -------------------------------
# Preprocessing: Tokenization and Vectorization
# -------------------------------
# Build the character-level tokenizer on the training texts.
tokenizer = char_level_tokenizer(train_texts)
print("Tokenizer vocabulary size:", len(tokenizer.word_index) + 1)

# Convert texts to bag-of-characters representation.
X_train = texts_to_bow(tokenizer, train_texts)
X_val   = texts_to_bow(tokenizer, val_texts)
X_test  = texts_to_bow(tokenizer, test_texts)

# Convert labels to one-hot encoding.
y_train = one_hot_encode(train_labels)
y_val   = one_hot_encode(val_labels)
y_test  = one_hot_encode(test_labels)

# -------------------------------
# Model Setup
# -------------------------------
# The input size is determined by the dimension of the bag-of-characters vector.
size_input = X_train.shape[1]
# Set hidden layer sizes as desired.
size_hidden1 = 128
size_hidden2 = 64
size_hidden3 = 32  # Placeholder (not used in the forward pass)
size_output  = 2

# Instantiate the MLP model.
model = MLP(size_input, size_hidden1, size_hidden2, size_hidden3, size_output, device=None)

# Define the optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# -------------------------------
# Training Parameters and Loop
# -------------------------------
batch_size = 256
epochs = 10
num_batches = int(np.ceil(X_train.shape[0] / batch_size))

print("\nStarting training...\n")
for epoch in range(epochs):
    # Shuffle training data at the start of each epoch.
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train = X_train[indices]
    y_train = y_train[indices]

    epoch_loss = 0
    for i in range(num_batches):
        start = i * batch_size
        end = min((i+1) * batch_size, X_train.shape[0])
        X_batch = X_train[start:end]
        y_batch = y_train[start:end]

        # Compute gradients and update weights.
        # with tf.GradientTape() as tape:
        #     predictions = model.forward(X_batch)
        #     loss_value = model.loss(predictions, y_batch)
        # grads = tape.gradient(loss_value, model.variables)
        predictions = model.forward(X_batch)
        loss_value = model.loss(predictions, y_batch)
        grads = model.backward(X_batch, y_batch)
        optimizer.apply_gradients(zip(grads, model.variables))
        epoch_loss += loss_value.numpy() * (end - start)

    epoch_loss /= X_train.shape[0]

    # Evaluate on validation set.
    val_logits = model.forward(X_val)
    val_loss = model.loss(val_logits, y_val).numpy()
    val_preds = np.argmax(val_logits.numpy(), axis=1)
    true_val = np.argmax(y_val, axis=1)
    accuracy = np.mean(val_preds == true_val)
    precision = precision_score(true_val, val_preds)
    recall = recall_score(true_val, val_preds)

    print(f"Epoch {epoch+1:02d} | Training Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# -------------------------------
# Final Evaluation on Test Set
# -------------------------------
print("\nEvaluating on test set...")
test_logits = model.forward(X_test)
test_loss = model.loss(test_logits, y_test).numpy()
test_preds = np.argmax(test_logits.numpy(), axis=1)
true_test = np.argmax(y_test, axis=1)
test_accuracy = np.mean(test_preds == true_test)
test_precision = precision_score(true_test, test_preds)
test_recall = recall_score(true_test, test_preds)

print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_accuracy:.4f} | "
      f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")
