In [70]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import AutoTokenizer, TFAutoModel
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping


In [71]:
# Load and preprocess data
test_data = pd.read_csv(r'D:\1A Shiash\23-09 to 28 -09\Test Classification Using BERT\DataSet\Corona_NLP_test.csv', encoding='latin-1')
train_data = pd.read_csv(r'D:\1A Shiash\23-09 to 28 -09\Test Classification Using BERT\DataSet\Corona_NLP_train.csv', encoding='latin-1')



In [72]:
# Encode labels
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_data['Sentiment'])
test_labels = label_encoder.transform(test_data['Sentiment'])



In [73]:
# Convert to categorical
train_labels_cat = tf.keras.utils.to_categorical(train_labels)
test_labels_cat = tf.keras.utils.to_categorical(test_labels)



In [79]:
import os
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy

# Optional: Disable GPU and run on CPU to avoid OOM errors
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # Uncomment to disable GPU

# Enable mixed precision training for memory optimization
from tensorflow.keras import mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Load pre-trained DistilBERT model and tokenizer
model_name = 'distilbert-base-uncased'  # Smaller model to prevent OOM
tokenizer = AutoTokenizer.from_pretrained(model_name)
num_classes = 5  # Number of sentiment classes (adjust according to your dataset)

# Load the model with sequence classification
bert_model = TFAutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)

# Define optimizer and loss function
optimizer = Adam(learning_rate=2e-5)
loss_fn = CategoricalCrossentropy(from_logits=True)
train_acc_metric = CategoricalAccuracy()
val_acc_metric = CategoricalAccuracy()

# Compile the model
bert_model.compile(optimizer=optimizer, loss=loss_fn, metrics=[train_acc_metric])


ResourceExhaustedError: OOM when allocating tensor with shape[30522,768] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:TruncatedNormal]

In [None]:
# Build a custom model
def build_model(num_classes):
    input_ids = Input(shape=(max_length,), dtype=tf.int32, name='input_ids')
    attention_mask = Input(shape=(max_length,), dtype=tf.int32, name='attention_mask')

    bert_outputs = bert_model(input_ids, attention_mask=attention_mask)
    pooled_output = bert_outputs[0][:, 0, :]  # CLS token output

    x = Dropout(0.3)(pooled_output)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=[input_ids, attention_mask], outputs=outputs)
    return model


In [None]:
# Tokenize and encode the text data
max_length = 64

def encode_text(texts):
    return tokenizer(
        texts,
        padding='max_length',
        truncation=True,
        max_length=max_length,
        return_tensors='tf'
    )

train_encodings = encode_text(train_data['OriginalTweet'].tolist())
test_encodings = encode_text(test_data['OriginalTweet'].tolist())

# Freeze BERT layers (we will only train the classifier)
bert_model.trainable = False

In [52]:
# Build the model
def build_model(num_classes):
    input_ids = Input(shape=(max_length,), dtype=tf.int32, name='input_ids')
    attention_mask = Input(shape=(max_length,), dtype=tf.int32, name='attention_mask')

    roberta = TFAutoModel.from_pretrained(model_name)
    outputs = roberta(input_ids, attention_mask=attention_mask)
    pooled_output = outputs[0][:, 0, :]  # Use the [CLS] token representation

    x = Dropout(0.1)(pooled_output)
    x = Dense(128, activation='relu')(x)  # Reduced hidden layer size
    x = Dropout(0.1)(x)
    outputs = Dense(num_classes, activation='softmax', dtype='float32')(x)  # Ensure final layer uses float32

    model = Model(inputs=[input_ids, attention_mask], outputs=outputs)
    return model

In [54]:
# Create and compile the model
num_classes = len(label_encoder.classes_)
model = build_model(num_classes)

# Set the optimizer, loss, and metrics
optimizer = Adam(learning_rate=2e-5)
loss_fn = CategoricalCrossentropy(from_logits=False)
train_acc_metric = CategoricalAccuracy()
val_acc_metric = CategoricalAccuracy()

# Compile the model
model.compile(optimizer=optimizer, loss=loss_fn, metrics=[train_acc_metric])

ResourceExhaustedError: failed to allocate memory [Op:AddV2]

In [None]:
# Prepare the data
batch_size = 4  # Further reduce batch size to 4 to save memory
train_dataset = tf.data.Dataset.from_tensor_slices((
    {'input_ids': train_encodings['input_ids'], 'attention_mask': train_encodings['attention_mask']},
    train_labels_cat
)).shuffle(1000).batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices((
    {'input_ids': test_encodings['input_ids'], 'attention_mask': test_encodings['attention_mask']},
    test_labels_cat
)).batch(batch_size)


In [15]:
# Training loop
epochs = 10
steps_per_epoch = 500  # Set a fixed number of steps per epoch
target_accuracy = 0.80

for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")

    # Training
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset.take(steps_per_epoch)):
        loss_value, accuracy = model.train_on_batch(x_batch_train, y_batch_train)

        if step % 100 == 0:
            print(f"Step {step}/{steps_per_epoch}: Loss: {loss_value:.4f}, Accuracy: {accuracy:.4f}")

    # Validation
    val_losses = []
    val_accuracies = []
    for x_batch_val, y_batch_val in val_dataset:
        val_loss, val_accuracy = model.test_on_batch(x_batch_val, y_batch_val)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

    val_loss = np.mean(val_losses)
    val_accuracy = np.mean(val_accuracies)
    print(f"Training Accuracy: {accuracy:.4f}")
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

    if accuracy >= target_accuracy:
        print(f"Reached target accuracy of {target_accuracy:.2%}. Stopping training.")
        break




Epoch 1/10


ResourceExhaustedError: Graph execution error:

Detected at node 'model/tf_roberta_model/roberta/encoder/layer_._11/attention/self/dropout_33/dropout/random_uniform/RandomUniform' defined at (most recent call last):
    File "c:\Users\srini\anaconda3\envs\Ten\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\srini\anaconda3\envs\Ten\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\traitlets\config\application.py", line 1053, in launch_instance
      app.start()
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelapp.py", line 737, in start
      self.io_loop.start()
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\tornado\platform\asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\srini\anaconda3\envs\Ten\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "c:\Users\srini\anaconda3\envs\Ten\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "c:\Users\srini\anaconda3\envs\Ten\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 524, in dispatch_queue
      await self.process_one()
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 513, in process_one
      await dispatch(*args)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 418, in dispatch_shell
      await result
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\ipykernel\kernelbase.py", line 758, in execute_request
      reply_content = await reply_content
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\ipykernel\ipkernel.py", line 426, in do_execute
      res = shell.run_cell(
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3024, in run_cell
      result = self._run_cell(
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3079, in _run_cell
      result = runner(coro)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3284, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3466, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\srini\AppData\Local\Temp\ipykernel_20764\4245737737.py", line 11, in <module>
      loss_value, accuracy = model.train_on_batch(x_batch_train, y_batch_train)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\training.py", line 2144, in train_on_batch
      logs = self.train_function(iterator)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\training.py", line 1051, in train_function
      return step_function(self, iterator)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\training.py", line 1040, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\training.py", line 1030, in run_step
      outputs = model.train_step(data)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\training.py", line 889, in train_step
      y_pred = self(x, training=True)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\functional.py", line 458, in call
      return self._run_internal_graph(
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\transformers\modeling_tf_utils.py", line 1008, in run_call_with_unpacked_inputs
      else:
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\transformers\models\roberta\modeling_tf_roberta.py", line 1035, in call
      outputs = self.roberta(
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\transformers\modeling_tf_utils.py", line 1008, in run_call_with_unpacked_inputs
      else:
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\transformers\models\roberta\modeling_tf_roberta.py", line 827, in call
      encoder_outputs = self.encoder(
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\transformers\models\roberta\modeling_tf_roberta.py", line 610, in call
      for i, layer_module in enumerate(self.layer):
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\transformers\models\roberta\modeling_tf_roberta.py", line 616, in call
      layer_outputs = layer_module(
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\transformers\models\roberta\modeling_tf_roberta.py", line 508, in call
      self_attention_outputs = self.attention(
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\transformers\models\roberta\modeling_tf_roberta.py", line 389, in call
      self_outputs = self.self_attention(
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\srini\AppData\Roaming\Python\Python310\site-packages\transformers\models\roberta\modeling_tf_roberta.py", line 304, in call
      attention_probs = self.dropout(inputs=attention_probs, training=training)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\layers\regularization\dropout.py", line 111, in call
      output = control_flow_util.smart_cond(training, dropped_inputs,
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\utils\control_flow_util.py", line 105, in smart_cond
      return tf.__internal__.smart_cond.smart_cond(
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\layers\regularization\dropout.py", line 108, in dropped_inputs
      return self._random_generator.dropout(
    File "c:\Users\srini\anaconda3\envs\Ten\lib\site-packages\keras\backend.py", line 1940, in dropout
      return tf.nn.dropout(inputs, rate=rate, noise_shape=noise_shape,
Node: 'model/tf_roberta_model/roberta/encoder/layer_._11/attention/self/dropout_33/dropout/random_uniform/RandomUniform'
OOM when allocating tensor with shape[16,12,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node model/tf_roberta_model/roberta/encoder/layer_._11/attention/self/dropout_33/dropout/random_uniform/RandomUniform}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_17786]

In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(
    {'input_ids': test_encodings['input_ids'], 'attention_mask': test_encodings['attention_mask']},
    test_labels_cat
)
print(f"Test accuracy: {test_accuracy:.4f}")



In [None]:
# Function to predict sentiment for new tweets
def predict_sentiment(tweets):
    encodings = encode_text(tweets)
    predictions = model.predict({
        'input_ids': encodings['input_ids'],
        'attention_mask': encodings['attention_mask']
    })
    return label_encoder.inverse_transform(np.argmax(predictions, axis=1))



In [None]:
# Example usage
new_tweets = [
    "I'm worried about the increasing number of COVID-19 cases",
    "The vaccine rollout gives me hope for the future.",
    "Wearing masks is an effective way to prevent the spread of the virus."
]

predicted_sentiments = predict_sentiment(new_tweets)
for tweet, sentiment in zip(new_tweets, predicted_sentiments):
    print(f"Tweet: {tweet}")
    print(f"Predicted sentiment: {sentiment}\n")

# Print the label encoding
print("Label Encoding:")
for i, label in enumerate(label_encoder.classes_):
    print(f"{i}: {label}")