In [None]:
!pip install tensorflow numpy




# USING LSTM

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Hyperparameters
vocab_size = 10000  # Number of unique words to consider (top 10,000 words)
max_length = 100    # Maximum length of review sequences
embedding_dim = 128  # Embedding layer output size

# Load IMDB dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure uniform length
X_train = pad_sequences(X_train, maxlen=max_length, padding='post')
X_test = pad_sequences(X_test, maxlen=max_length, padding='post')

# Building the LSTM Model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))  # LSTM layer with dropout
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step




Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 188ms/step - accuracy: 0.6804 - loss: 0.5862 - val_accuracy: 0.7784 - val_loss: 0.4747
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 191ms/step - accuracy: 0.8528 - loss: 0.3625 - val_accuracy: 0.8154 - val_loss: 0.4058
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 182ms/step - accuracy: 0.8830 - loss: 0.3017 - val_accuracy: 0.8374 - val_loss: 0.3860
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 186ms/step - accuracy: 0.9130 - loss: 0.2410 - val_accuracy: 0.8344 - val_loss: 0.4102
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 182ms/step - accuracy: 0.9182 - loss: 0.2183 - val_accuracy: 0.8338 - val_loss: 0.4269
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 45ms/step - accuracy: 0.8350 - loss: 0.4360
Test Accuracy: 0.8350399732589722


# USING DCASAM





In [None]:
!pip install tensorflow transformers




In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, Bidirectional, LSTM, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from transformers import TFBertModel
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Load the IMDB dataset (train and test data)
max_length = 128  # Max length for padding
vocab_size = 10000  # Most frequent words to keep

# Load dataset (using the top 'vocab_size' words)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Padding sequences to the same length
x_train = pad_sequences(x_train, maxlen=max_length, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_length, padding='post', truncating='post')

# Create attention mask (1 for real tokens, 0 for padding)
train_attention_masks = np.where(x_train != 0, 1, 0)
test_attention_masks = np.where(x_test != 0, 1, 0)

# Load BERT model
bert_model = TFBertModel.from_pretrained("bert-base-uncased")

# Custom layer to integrate BERT with Keras model
class BERTEmbeddingLayer(Layer):
    def __init__(self, bert_model, **kwargs):
        super(BERTEmbeddingLayer, self).__init__(**kwargs)
        self.bert_model = bert_model

    def call(self, inputs):
        input_ids, attention_mask = inputs
        # Convert inputs to tensors to ensure compatibility with TFBertModel
        input_ids = tf.cast(input_ids, dtype=tf.int32)
        attention_mask = tf.cast(attention_mask, dtype=tf.int32)

        bert_output = self.bert_model(input_ids=input_ids, attention_mask=attention_mask)
        return bert_output.last_hidden_state  # Shape: (batch_size, max_length, 768)

# Define DCASAM model
def DCASAM_model():
    # Define inputs
    input_ids = Input(shape=(max_length,), dtype=tf.int32, name="input_ids")
    attention_mask = Input(shape=(max_length,), dtype=tf.int32, name="attention_mask")

    # BERT embeddings
    bert_embeddings = BERTEmbeddingLayer(bert_model)([input_ids, attention_mask])

    # BiLSTM layer for context
    lstm = Bidirectional(LSTM(64, dropout=0.2, return_sequences=False))(bert_embeddings)

    # Fully connected layers for classification
    dense = Dense(64, activation='relu')(lstm)
    dropout = Dropout(0.3)(dense)
    output = Dense(1, activation='sigmoid')(dropout)

    model = Model(inputs=[input_ids, attention_mask], outputs=output)
    return model

# Initialize and compile the model
dcasam_model = DCASAM_model()
dcasam_model.compile(optimizer=Adam(learning_rate=2e-5), loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
dcasam_model.summary()

# Split the data for training and validation manually (80% train, 20% validation)
split_index = int(x_train.shape[0] * 0.8)
X_train_input_ids, X_val_input_ids = x_train[:split_index], x_train[split_index:]
X_train_attention_masks, X_val_attention_masks = train_attention_masks[:split_index], train_attention_masks[split_index:]
y_train, y_val = y_train[:split_index], y_train[split_index:]

# Train the model with validation data
history = dcasam_model.fit(
    [X_train_input_ids, X_train_attention_masks],
    y_train,
    validation_data=([X_val_input_ids, X_val_attention_masks], y_val),
    epochs=5,
    batch_size=32
)

# Evaluate the model on the test set
loss, accuracy = dcasam_model.evaluate([x_test, test_attention_masks], y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 359ms/step - accuracy: 0.4994 - loss: 0.7164 - val_accuracy: 0.5360 - val_loss: 0.6911
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 356ms/step - accuracy: 0.5155 - loss: 0.6944 - val_accuracy: 0.5330 - val_loss: 0.6895
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 355ms/step - accuracy: 0.5230 - loss: 0.6912 - val_accuracy: 0.5454 - val_loss: 0.6874
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 417ms/step - accuracy: 0.5354 - loss: 0.6891 - val_accuracy: 0.5458 - val_loss: 0.6867
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 356ms/step - accuracy: 0.5250 - loss: 0.6897 - val_accuracy: 0.5524 - val_loss: 0.6867
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 284ms/step - accuracy: 0.5443 - loss: 0.6879
Test Loss: 0.6892878413200378
Test Accuracy: 0.536440014839172




# USING DCASAM + HAN MODEL

In [None]:
!pip install tensorflow keras



In [None]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

# Set parameters
max_features = 20000  # Number of unique words to consider as features
maxlen = 200          # Cut texts after this number of words

# Load data
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dense, LSTM, Bidirectional, GlobalAveragePooling1D, \
    GlobalMaxPooling1D, Concatenate, Layer
from tensorflow.keras.models import Model

# Custom Attention Layer
class AttentionLayer(Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1]), initializer='glorot_uniform', trainable=True)
        self.b = self.add_weight(shape=(input_shape[-1],), initializer='zeros', trainable=True)
        self.u = self.add_weight(shape=(input_shape[-1],), initializer='glorot_uniform', trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, inputs):
        v = tf.nn.tanh(tf.tensordot(inputs, self.W, axes=1) + self.b)
        vu = tf.tensordot(v, self.u, axes=1)
        alphas = tf.nn.softmax(vu)
        output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), axis=1)
        return output

# DCASAM + HAN Model Definition
def create_model(max_features, maxlen, embedding_dim=128):
    inputs = Input(shape=(maxlen,))
    x = Embedding(max_features, embedding_dim)(inputs)
    x = Bidirectional(LSTM(64, return_sequences=True))(x)

    # Dual Context Self Attention Mechanism (DCASAM)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    context_vector = Concatenate()([avg_pool, max_pool])

    # Attention Layer on top of concatenated pools
    x = AttentionLayer()(x)
    x = Dense(64, activation="relu")(x)

    outputs = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

model = create_model(max_features, maxlen)
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Summary of the model
model.summary()

In [None]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs=5,                  # Increase epochs for better accuracy if needed
    batch_size=64,
    validation_split=0.2
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 48ms/step - accuracy: 0.7172 - loss: 0.5172 - val_accuracy: 0.8520 - val_loss: 0.3290
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 28ms/step - accuracy: 0.9338 - loss: 0.1809 - val_accuracy: 0.8822 - val_loss: 0.2820
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 24ms/step - accuracy: 0.9722 - loss: 0.0860 - val_accuracy: 0.8766 - val_loss: 0.3475
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - accuracy: 0.9866 - loss: 0.0442 - val_accuracy: 0.8712 - val_loss: 0.4560
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 25ms/step - accuracy: 0.9921 - loss: 0.0262 - val_accuracy: 0.8736 - val_loss: 0.5706
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.8566 - loss: 0.6300
Test Loss: 0.6283608675003052
Test Accuracy: 0.8569599986076355


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Tokenizer for input processing
tokenizer = Tokenizer(num_words=max_features)
word_index = imdb.get_word_index()
tokenizer.fit_on_texts(word_index.keys())  # Fit the tokenizer on the IMDB dataset vocabulary

# Preprocess function for input text
def preprocess_input_text(text, maxlen):
    # Tokenize and convert input text to sequences
    sequence = tokenizer.texts_to_sequences([text])

    # Pad the sequence to match the maxlen used in training
    padded_sequence = pad_sequences(sequence, maxlen=maxlen)
    return padded_sequence

# Function to predict sentiment from input text
def predict_sentiment(model, text):
    processed_input = preprocess_input_text(text, maxlen)
    prediction = model.predict(processed_input)
    sentiment = 'Negative' if prediction >= 0.5 else 'Positive'
    return sentiment, prediction[0][0]

# Example input text for testing
input_text = "The movie was a masterpiece! The acting, storyline, and direction were all top-notch"
sentiment, score = predict_sentiment(model, input_text)

# Print the result
print(f"Predicted Sentiment: {sentiment} (Score: {score:.2f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
Predicted Sentiment: Positive (Score: 0.36)


In [None]:
input_text = "The plot was overly complicated, and the characters were poorly developed. It was a disappointing experience"
sentiment, score = predict_sentiment(model, input_text)

# Print the result
print(f"Predicted Sentiment: {sentiment} (Score: {score:.2f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Predicted Sentiment: Negative (Score: 0.96)


In [None]:
input_text = "The cinematography was stunning, but the script lacked originality. Mixed feelings overall."
sentiment, score = predict_sentiment(model, input_text)

# Print the result
print(f"Predicted Sentiment: {sentiment} (Score: {score:.2f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
Predicted Sentiment: Negative (Score: 0.74)
