In [1]:
# common imports

import sys
sys.path.append("../datasets/ARID_supporting_scripts")


import os
import random
import mapper
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

seed = 15
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

2025-02-26 22:47:15.035792: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-26 22:47:15.035837: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-26 22:47:15.037136: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-26 22:47:15.044106: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import datasets

dataset = datasets.load_from_disk('./ARID/')

In [3]:
dataset_train = dataset['train']
dataset_test = dataset['test']

In [4]:
# Injects random noise into the 'label' by flipping field in a balanced manner.

import random

def add_balanced_label_noise(dataset, noise_rate = 0.2, seed = seed):
    random.seed(seed)
    labels = dataset["signal_keyword"]
    unique_labels = set(labels)
    new_labels = list(labels)
    indices_by_class = {label: [] for label in unique_labels}
    for idx, label in enumerate(labels):
        indices_by_class[label].append(idx)
    for label, indices in indices_by_class.items():
        num_samples = len(indices)
        num_noisy = min(int(noise_rate * num_samples), num_samples - 1)
        noisy_indices = random.sample(indices, num_noisy)
        for idx in noisy_indices:
            possible_labels = list(unique_labels - {label})
            if possible_labels: 
                new_label = random.choice(possible_labels)
                new_labels[idx] = new_label
    return dataset.add_column("noisy_signal_keyword", new_labels)

In [5]:
noise_rate = 0.1

noisy_train_dataset = add_balanced_label_noise(dataset['train'], noise_rate = noise_rate)
noisy_train_dataset = noisy_train_dataset.remove_columns('label')

In [6]:
lbl_ = dataset['test'].features['label'].names
label2id = {lbl: idx for idx, lbl in enumerate(lbl_)}
id2label = {val: key for key, val in label2id.items()}

In [7]:
noisy_train_dataset = noisy_train_dataset.map(lambda x: {"label": label2id[x["noisy_signal_keyword"]]})
noisy_train_dataset

Map:   0%|          | 0/1916 [00:00<?, ? examples/s]

Dataset({
    features: ['REQID', 'REQID_expanded', 'Requirement Sentences', 'Open/ Closed Source', 'class', 'signal_keyword', 'Source', 'noisy_signal_keyword', 'label'],
    num_rows: 1916
})

In [8]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification


batch_size = 16


model_ckpt = 'FacebookAI/roberta-base'
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

model = TFAutoModelForSequenceClassification.from_pretrained(model_ckpt,
                                                        num_labels = len(lbl_),
                                                        id2label = id2label,
                                                        label2id = label2id,)

2025-02-26 22:47:18.944167: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-26 22:47:18.946045: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-26 22:47:18.949605: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [10]:
def preprocess_function(dataset):
    return tokenizer(dataset['Requirement Sentences'], truncation = True)

In [11]:
X_train_encoded = noisy_train_dataset.map(preprocess_function, batched = True)
X_test_encoded = dataset_test.map(preprocess_function, batched = True)

Map:   0%|          | 0/1916 [00:00<?, ? examples/s]

In [12]:
print(X_train_encoded['Requirement Sentences'][0])
print(X_train_encoded['input_ids'][0])
print(tokenizer.convert_ids_to_tokens(X_train_encoded['input_ids'][0]))

The DWA must request DWA acknowledgment flashing when the DWA has assumed the "armed" state and the outer skin is closed.
[0, 133, 211, 8460, 531, 2069, 211, 8460, 38169, 22643, 77, 5, 211, 8460, 34, 9159, 5, 22, 17651, 113, 194, 8, 5, 15705, 3024, 16, 1367, 4, 2]
['<s>', 'The', 'ĠD', 'WA', 'Ġmust', 'Ġrequest', 'ĠD', 'WA', 'Ġacknowledgment', 'Ġflashing', 'Ġwhen', 'Ġthe', 'ĠD', 'WA', 'Ġhas', 'Ġassumed', 'Ġthe', 'Ġ"', 'armed', '"', 'Ġstate', 'Ġand', 'Ġthe', 'Ġouter', 'Ġskin', 'Ġis', 'Ġclosed', '.', '</s>']


In [13]:
X_train_encoded

Dataset({
    features: ['REQID', 'REQID_expanded', 'Requirement Sentences', 'Open/ Closed Source', 'class', 'signal_keyword', 'Source', 'noisy_signal_keyword', 'label', 'input_ids', 'attention_mask'],
    num_rows: 1916
})

In [14]:
tf_train_dataset = model.prepare_tf_dataset(
    X_train_encoded,
    shuffle = True,
    batch_size = batch_size,
    tokenizer = tokenizer
)

tf_valid_dataset = model.prepare_tf_dataset(
    X_test_encoded,
    shuffle = False,
    batch_size = batch_size,
    tokenizer = tokenizer
)

In [15]:
from transformers import create_optimizer

num_epochs = 10
batches_per_epoch = len(X_train_encoded) // batch_size
total_train_steps = int(batches_per_epoch * num_epochs)

optimizer, schedule = create_optimizer(
    init_lr = 2e-5, num_warmup_steps = 0, num_train_steps = total_train_steps
)

2025-02-26 22:47:23.006085: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


In [16]:
import evaluate
from transformers.keras_callbacks import KerasMetricCallback


def compute_metrics(eval_predictions):
    metric1 = evaluate.load("precision")
    metric2 = evaluate.load("recall")
    metric3 = evaluate.load("f1")


    predictions, labels = eval_predictions
    predictions = np.argmax(predictions, axis = 1)

    precision = metric1.compute(predictions = predictions, references = labels, average = 'macro')["precision"]
    recall = metric2.compute(predictions = predictions, references = labels, average = 'macro')["recall"]
    f1 = metric3.compute(predictions = predictions, references = labels, average = 'macro')["f1"]
    return {"precision": precision, "recall": recall, "f1": f1}

metric_callback = KerasMetricCallback(metric_fn = compute_metrics, eval_dataset = tf_valid_dataset)

In [17]:
from transformers.keras_callbacks import PushToHubCallback
from tensorflow.keras.callbacks import TensorBoard

model_name = model_ckpt.split("/")[-1]
push_to_hub_model_id = f'{model_name}_noise_rate{noise_rate}_seed{seed}_percent_noise'
print(push_to_hub_model_id)
tensorboard_callback = TensorBoard(log_dir = f'./models/{push_to_hub_model_id}/logs')

push_to_hub_callback = PushToHubCallback(
    output_dir = f"./{push_to_hub_model_id}",
    tokenizer = tokenizer,
    hub_model_id = push_to_hub_model_id,
)

callbacks = [push_to_hub_callback, tensorboard_callback, metric_callback]

roberta-base_noise_rate0.1_seed15_percent_noise


For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/kasrahabib/roberta-base_noise_rate0.1_seed15_percent_noise into local empty directory.


In [18]:
model.compile(optimizer = optimizer)
history = model.fit(tf_train_dataset, validation_data = (tf_valid_dataset), epochs = num_epochs, callbacks = callbacks)

Epoch 1/10
Cause: for/else statement not yet supported
Cause: for/else statement not yet supported


2025-02-26 22:48:05.395343: I external/local_xla/xla/service/service.cc:168] XLA service 0x7a64b4e02a20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-02-26 22:48:05.395377: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2025-02-26 22:48:05.395387: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (1): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2025-02-26 22:48:05.405206: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-02-26 22:48:05.440331: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
I0000 00:00:1740606485.496034 3769830 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it couldn't be found locally at evaluate-metric-

Epoch 2/10
  1/119 [..............................] - ETA: 9s - loss: 1.8493

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))




Several commits (2) will be pushed upstream.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it 

Epoch 3/10

Several commits (3) will be pushed upstream.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it 

Epoch 4/10

Several commits (4) will be pushed upstream.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it 

Epoch 5/10

Several commits (5) will be pushed upstream.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it 

Epoch 6/10

Several commits (6) will be pushed upstream.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it 

Epoch 7/10

Several commits (7) will be pushed upstream.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it 

Epoch 8/10

Several commits (8) will be pushed upstream.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it 

Epoch 9/10

Several commits (9) will be pushed upstream.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it 

Epoch 10/10

Several commits (10) will be pushed upstream.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Mon May  6 14:56:12 2024) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/e40e6e98d18ff3f210f4d0b26fa721bfaa80704b1fdf890fa551cfabf94fc185 (last modified on Mon May  6 14:56:14 2024) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/kasra/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Mon May  6 14:56:15 2024) since it



In [19]:
y_pred = model.predict(tf_valid_dataset).logits



In [20]:
import numpy as np

y_pred = np.argmax(y_pred, axis = 1)
y_true = dataset['test']['label']

In [22]:
y_true_label = [id2label[i] for i in y_true]
y_pred_label = [id2label[i] for i in y_pred]

y_true_three_class = [mapper.map_hf[i] for i in y_true_label]
y_pred_three_class = [mapper.map_hf[i] for i in y_pred_label]

In [23]:
from sklearn.metrics import precision_score, recall_score, f1_score

def evaluate(y_true, y_pred, average = 'binary'):
    print('Precision: ', precision_score(y_true, y_pred, average = average))
    print('Recall: ', recall_score(y_true, y_pred, average = average))
    print('f1_score: ', f1_score(y_true, y_pred, average = average))

In [24]:
# seed 15
evaluate(y_true_three_class, y_pred_three_class, average = 'macro')              

Precision:  0.9514244195330713
Recall:  0.9496825396825397
f1_score:  0.9505062444778757


In [24]:
# seed 13
evaluate(y_true_three_class, y_pred_three_class, average = 'macro')              

Precision:  0.9331519187366665
Recall:  0.9269047619047619
f1_score:  0.9293058305583456


In [24]:
# seed 19
evaluate(y_true_three_class, y_pred_three_class, average = 'macro')              

Precision:  0.9435648000138016
Recall:  0.938015873015873
f1_score:  0.9405029221706073


In [29]:
# seed 100
evaluate(y_true_three_class, y_pred_three_class, average = 'macro')              

Precision:  0.9437744511801175
Recall:  0.9326984126984127
f1_score:  0.9372252180939659


In [27]:
# seed 42
evaluate(y_true_three_class, y_pred_three_class, average = 'macro')              

Precision:  0.9376872169975617
Recall:  0.929047619047619
f1_score:  0.9329815331729855
