In [None]:
!pip install tensorflow numpy




# USING LSTM

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Hyperparameters
vocab_size = 10000  # Number of unique words to consider (top 10,000 words)
max_length = 100    # Maximum length of review sequences
embedding_dim = 128  # Embedding layer output size

# Load IMDB dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure uniform length
X_train = pad_sequences(X_train, maxlen=max_length, padding='post')
X_test = pad_sequences(X_test, maxlen=max_length, padding='post')

# Building the LSTM Model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))  # LSTM layer with dropout
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")



Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 202ms/step - accuracy: 0.6707 - loss: 0.5955 - val_accuracy: 0.7426 - val_loss: 0.5184
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 195ms/step - accuracy: 0.8401 - loss: 0.3920 - val_accuracy: 0.8178 - val_loss: 0.4113
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 189ms/step - accuracy: 0.8813 - loss: 0.3021 - val_accuracy: 0.8080 - val_loss: 0.4223
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 197ms/step - accuracy: 0.8939 - loss: 0.2682 - val_accuracy: 0.8408 - val_loss: 0.4023
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 189ms/step - accuracy: 0.9200 - loss: 0.2151 - val_accuracy: 0.8286 - val_loss: 0.4460
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 45ms/step - accuracy: 0.8285 - loss: 0.4475
Test Accuracy: 0.8296800255775452


# USING DCASAM





In [None]:
!pip install tensorflow transformers




In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, Bidirectional, LSTM, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from transformers import TFBertModel
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Load the IMDB dataset (train and test data)
max_length = 128  # Max length for padding
vocab_size = 10000  # Most frequent words to keep

# Load dataset (using the top 'vocab_size' words)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Padding sequences to the same length
x_train = pad_sequences(x_train, maxlen=max_length, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_length, padding='post', truncating='post')

# Create attention mask (1 for real tokens, 0 for padding)
train_attention_masks = np.where(x_train != 0, 1, 0)
test_attention_masks = np.where(x_test != 0, 1, 0)

# Load BERT model
bert_model = TFBertModel.from_pretrained("bert-base-uncased")

# Custom layer to integrate BERT with Keras model
class BERTEmbeddingLayer(Layer):
    def __init__(self, bert_model, **kwargs):
        super(BERTEmbeddingLayer, self).__init__(**kwargs)
        self.bert_model = bert_model

    def call(self, inputs):
        input_ids, attention_mask = inputs
        # Convert inputs to tensors to ensure compatibility with TFBertModel
        input_ids = tf.cast(input_ids, dtype=tf.int32)
        attention_mask = tf.cast(attention_mask, dtype=tf.int32)

        bert_output = self.bert_model(input_ids=input_ids, attention_mask=attention_mask)
        return bert_output.last_hidden_state  # Shape: (batch_size, max_length, 768)

# Define DCASAM model
def DCASAM_model():
    # Define inputs
    input_ids = Input(shape=(max_length,), dtype=tf.int32, name="input_ids")
    attention_mask = Input(shape=(max_length,), dtype=tf.int32, name="attention_mask")

    # BERT embeddings
    bert_embeddings = BERTEmbeddingLayer(bert_model)([input_ids, attention_mask])

    # BiLSTM layer for context
    lstm = Bidirectional(LSTM(64, dropout=0.2, return_sequences=False))(bert_embeddings)

    # Fully connected layers for classification
    dense = Dense(64, activation='relu')(lstm)
    dropout = Dropout(0.3)(dense)
    output = Dense(1, activation='sigmoid')(dropout)

    model = Model(inputs=[input_ids, attention_mask], outputs=output)
    return model

# Initialize and compile the model
dcasam_model = DCASAM_model()
dcasam_model.compile(optimizer=Adam(learning_rate=2e-5), loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
dcasam_model.summary()

# Split the data for training and validation manually (80% train, 20% validation)
split_index = int(x_train.shape[0] * 0.8)
X_train_input_ids, X_val_input_ids = x_train[:split_index], x_train[split_index:]
X_train_attention_masks, X_val_attention_masks = train_attention_masks[:split_index], train_attention_masks[split_index:]
y_train, y_val = y_train[:split_index], y_train[split_index:]

# Train the model with validation data
history = dcasam_model.fit(
    [X_train_input_ids, X_train_attention_masks],
    y_train,
    validation_data=([X_val_input_ids, X_val_attention_masks], y_val),
    epochs=5,
    batch_size=32
)

# Evaluate the model on the test set
loss, accuracy = dcasam_model.evaluate([x_test, test_attention_masks], y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 346ms/step - accuracy: 0.5019 - loss: 0.7045 - val_accuracy: 0.5300 - val_loss: 0.6903
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 345ms/step - accuracy: 0.5247 - loss: 0.6927 - val_accuracy: 0.5324 - val_loss: 0.6886
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 345ms/step - accuracy: 0.5282 - loss: 0.6905 - val_accuracy: 0.5438 - val_loss: 0.6881
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 344ms/step - accuracy: 0.5413 - loss: 0.6886 - val_accuracy: 0.5400 - val_loss: 0.6872
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 407ms/step - accuracy: 0.5300 - loss: 0.6900 - val_accuracy: 0.5542 - val_loss: 0.6865
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 278ms/step - accuracy: 0.5442 - loss: 0.6884
Test Loss: 0.6893552541732788
Test Accuracy: 0.536599993705749




# USING DCASAM + HAN MODEL

In [None]:
!pip install tensorflow keras



In [None]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

# Set parameters
max_features = 20000  # Number of unique words to consider as features
maxlen = 200          # Cut texts after this number of words

# Load data
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dense, LSTM, Bidirectional, GlobalAveragePooling1D, \
    GlobalMaxPooling1D, Concatenate, Layer
from tensorflow.keras.models import Model

# Custom Attention Layer
class AttentionLayer(Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1]), initializer='glorot_uniform', trainable=True)
        self.b = self.add_weight(shape=(input_shape[-1],), initializer='zeros', trainable=True)
        self.u = self.add_weight(shape=(input_shape[-1],), initializer='glorot_uniform', trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, inputs):
        v = tf.nn.tanh(tf.tensordot(inputs, self.W, axes=1) + self.b)
        vu = tf.tensordot(v, self.u, axes=1)
        alphas = tf.nn.softmax(vu)
        output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), axis=1)
        return output

# DCASAM + HAN Model Definition
def create_model(max_features, maxlen, embedding_dim=128):
    inputs = Input(shape=(maxlen,))
    x = Embedding(max_features, embedding_dim)(inputs)
    x = Bidirectional(LSTM(64, return_sequences=True))(x)

    # Dual Context Self Attention Mechanism (DCASAM)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    context_vector = Concatenate()([avg_pool, max_pool])

    # Attention Layer on top of concatenated pools
    x = AttentionLayer()(x)
    x = Dense(64, activation="relu")(x)

    outputs = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

model = create_model(max_features, maxlen)
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Summary of the model
model.summary()

In [None]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs=5,                  # Increase epochs for better accuracy if needed
    batch_size=64,
    validation_split=0.2
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 48ms/step - accuracy: 0.7315 - loss: 0.4967 - val_accuracy: 0.8748 - val_loss: 0.2999
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 24ms/step - accuracy: 0.9320 - loss: 0.1845 - val_accuracy: 0.8826 - val_loss: 0.2977
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 24ms/step - accuracy: 0.9657 - loss: 0.0999 - val_accuracy: 0.8638 - val_loss: 0.3666
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 25ms/step - accuracy: 0.9837 - loss: 0.0497 - val_accuracy: 0.8730 - val_loss: 0.4052
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step - accuracy: 0.9898 - loss: 0.0321 - val_accuracy: 0.8628 - val_loss: 0.6721
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.8424 - loss: 0.7696
Test Loss: 0.7754287719726562
Test Accuracy: 0.8428400158882141


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Tokenizer for input processing
tokenizer = Tokenizer(num_words=max_features)
word_index = imdb.get_word_index()
tokenizer.fit_on_texts(word_index.keys())  # Fit the tokenizer on the IMDB dataset vocabulary

# Preprocess function for input text
def preprocess_input_text(text, maxlen):
    # Tokenize and convert input text to sequences
    sequence = tokenizer.texts_to_sequences([text])

    # Pad the sequence to match the maxlen used in training
    padded_sequence = pad_sequences(sequence, maxlen=maxlen)
    return padded_sequence

# Function to predict sentiment from input text
def predict_sentiment(model, text):
    processed_input = preprocess_input_text(text, maxlen)
    prediction = model.predict(processed_input)
    sentiment = 'Positive' if prediction >= 0.5 else 'Negative'
    return sentiment, prediction[0][0]

# Example input text for testing
input_text = "The plot was engaging, the characters were relatable, and the direction was top-notch. A must-watch!"
sentiment, score = predict_sentiment(model, input_text)

# Print the result
print(f"Predicted Sentiment: {sentiment} (Score: {score:.2f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Predicted Sentiment: Positive (Score: 0.93)
