In [57]:
import base64
import datasets
import numpy as np
import pandas as pd

# Removing Non-disclosure Requirements from Automated NLP Evaluations

In [24]:
models_prediction_dataset_path = '../evaluations_with_non_disclosure_reqs/automated_nlp_evaluations/evaluation_set_for_nlp_metrics/models_prediction_dataset'
models_prediction_dataset = datasets.load_from_disk(models_prediction_dataset_path)

In [25]:
models_prediction_dataset

Dataset({
    features: ['REQID_ex', 'completion', 'query', 'class', 'task', 'text', 'label', 'mistral_ai_instruct_7b_chat_hf_preds', 'falcon_7b_base_preds', 'falcon_7b_instruct_preds', 'llama2_7b_chat_hf_preds', 'zephyr_7b_beta_preds', 'openai_compe_gpt4o_24_11_20', 'chatgpt4o_frugal_score', 'chatgpt4o_bert_score', 'zephyr_frugal_score', 'zephyr_bert_score', 'mistralai_frugal_score', 'mistralai_bert_score', 'falcon_base_frugal_score', 'falcon_base_bert_score', 'falcon_frugal_score', 'falcon_bert_score', 'llama_frugal_score', 'llama_bert_score'],
    num_rows: 34
})

In [26]:
non_disclosure_req_ids = ['55162',
                          '55626',
                          '55892',
                          '56111',
                          '57308',
                          '58850',
                          '60513',
                          '60585',
                          '64072',
                          '66326',
                          '66918',
                          '66922',
                          '68176',
                          '69228',
                          '72708',
                          '73435']

In [47]:
non_disclosure_req_text = []

for nc_id in non_disclosure_req_ids:
    nc_fltr = models_prediction_dataset.to_pandas()['REQID_ex'] == nc_id
    nc_req_text = models_prediction_dataset.to_pandas()['completion'][nc_fltr].to_numpy()
    if len(nc_req_text) > 0:
        non_disclosure_req_text.append(nc_req_text[0])

In [50]:
non_disclosure_req_text

 'The model shall receive from EWS_DATA_DISTRIBUTE the threat data sets of the highest priority levels to prepare them for output to the MFDs.',
 'When the radar signal is received, the system shall activate Path-Oriented Transport Infrastructure (POTI) protocol.',
 'Key Delivery Message (KDM) shall return the index of the Electrically Erasable Programmable Read-Only Memory Immobilizer Transponder Identifier (EEPROM IMMO TI) that matched the given Transponder Identifier (TI).']

In [51]:
def remove_non_disclosure_requirements(dataset, ids_to_remove):
    return dataset.filter(lambda example: example["REQID_ex"] not in ids_to_remove)

In [52]:
models_prediction_dataset = models_prediction_dataset.filter(lambda example: example["REQID_ex"] not in non_disclosure_req_ids)

In [53]:
models_prediction_dataset

Dataset({
    features: ['REQID_ex', 'completion', 'query', 'class', 'task', 'text', 'label', 'mistral_ai_instruct_7b_chat_hf_preds', 'falcon_7b_base_preds', 'falcon_7b_instruct_preds', 'llama2_7b_chat_hf_preds', 'zephyr_7b_beta_preds', 'openai_compe_gpt4o_24_11_20', 'chatgpt4o_frugal_score', 'chatgpt4o_bert_score', 'zephyr_frugal_score', 'zephyr_bert_score', 'mistralai_frugal_score', 'mistralai_bert_score', 'falcon_base_frugal_score', 'falcon_base_bert_score', 'falcon_frugal_score', 'falcon_bert_score', 'llama_frugal_score', 'llama_bert_score'],
    num_rows: 30
})

In [54]:
# models_prediction_dataset.save_to_disk('./automated_nlp_evaluations/evaluation_set_for_nlp_metrics/models_prediction_dataset')

Saving the dataset (0/1 shards):   0%|          | 0/30 [00:00<?, ? examples/s]

# Removing Non-disclosure Requirements from Human Evaluation

In [62]:
def decode_base64(item):
    return base64.b64decode(item.encode()).decode('utf-8')

In [63]:
def decode_human_ai_hash(df):
    df = df.drop(columns=[col for col in df.columns if 'Unnamed' in col])
    df = df.applymap(lambda x: x.replace('\xa0', '').strip() if isinstance(x, str) else x)
    df['hash'] = df['hash'].apply(decode_base64)
    df['REQID_ex'] = df['hash'].str.split('__________').str[0]
    df['author'] = df['hash'].str.split('__________').str[-1]
    df.drop(columns = ['hash'], inplace = True)

    nan_summary = df.isnull().sum()
    print("Number of NaN values in each column:\n", nan_summary)
    for column in df.columns:
        if df[column].isnull().sum() > 0:
            majority_value = df[column].mode()[0]
            df[column].fillna(majority_value, inplace = True)
    return df

In [65]:
participants = ['p1', 'p2', 'p3', 'p4']

human_assessed_requirements = [pd.read_excel(f'../evaluations_with_non_disclosure_reqs/human_evaluations/{i}_human_evaluation/tasks_b_and_c/task_b.xlsx') for i in participants]
human_assessed_requirements = [decode_human_ai_hash(df) for df in human_assessed_requirements]

Number of NaN values in each column:
 requirement                                                                                                        0
Based on the style and content of the requirement, do you believe it was written by a human or generated by AI?    0
This requirement is well-structured according to the ISO-29248 recommended syntax.                                 0
The use of signaling keywords to indicate the presence of a requirement is appropriate based on ISO-29148.         0
REQID_ex                                                                                                           0
author                                                                                                             0
dtype: int64
Number of NaN values in each column:
 requirement                                                                                                        0
Based on the style and content of the requirement, do you believe it was written by a human 

  df = df.applymap(lambda x: x.replace('\xa0', '').strip() if isinstance(x, str) else x)
  df = df.applymap(lambda x: x.replace('\xa0', '').strip() if isinstance(x, str) else x)
  df = df.applymap(lambda x: x.replace('\xa0', '').strip() if isinstance(x, str) else x)
  df = df.applymap(lambda x: x.replace('\xa0', '').strip() if isinstance(x, str) else x)


In [67]:
human_assessed_requirements[0]
blind_message = '''"Due to non-disclosure agreements, this requirement is blinded after rating."'''

Unnamed: 0,requirement,"Based on the style and content of the requirement, do you believe it was written by a human or generated by AI?",This requirement is well-structured according to the ISO-29248 recommended syntax.,The use of signaling keywords to indicate the presence of a requirement is appropriate based on ISO-29148.,REQID_ex,author
0,The communication interface of the medical dev...,AI,Disagree,Strongly Disagree,56111,UAI
1,The product shall provide the ability to defin...,AI,Strongly Agree,Disagree,2607,AI
2,If ifr Conditions change by 500ft or reset tim...,AI,Strongly Disagree,Disagree,43101,AI
3,The software shall respond in 2 seconds or les...,AI,Agree,Strongly Agree,PROMISE-2,AI
4,The Index shall be determined of the EEPROM IM...,HUMAN,Agree,Agree,66922,AI
...,...,...,...,...,...,...
63,To help your user generate additional software...,AI,Strongly Disagree,Strongly Disagree,43101,UAI
64,"Of the total number of end users, 80% shall be...",AI,Disagree,Strongly Agree,PROMISE-6,AI
65,The following non-functional availability requ...,AI,Agree,Agree,PROMISE-1,UAI
66,The Early Warning System (EWS) shall respond t...,HUMAN,Agree,Agree,55892,AI


In [None]:
# from datasets import Dataset

# def blind_and_replace_non_disclosure_requirements(dataset, target_ids):
#     def process_example(example):
#         blind_message = '''"Due to non-disclosure agreements, this requirement is blinded."'''
#         if example["REQID_ex"] in str(target_ids):  
#             original_text = example["completion"]  
#             example["completion"] = blind_message  
#             example["text"] = example["text"].replace(original_text, blind_message)
#         return example
#     return dataset.map(process_example)