In [None]:
pip install transformers



In [None]:
import pandas as pd
from transformers import pipeline
#import the libraries needed


# Load the dataset
dataset = 'PRE_FRI_TEST_CVE_BERTresults.csv'
mapped_data = pd.read_csv(dataset)

# Take only 10% of the dataset
sampled_data = mapped_data.sample(frac=0.1, random_state=42)

# Load the pretrained model
the_model = pipeline('text-classification', model='facebook/bart-large-mnli')

#change to the wanted column name
first_three_columns = sampled_data.columns[:3]  # take only the first 3 columns 'premise'
cve_text_col = 'cve_text' # 'hypothesis'

# Function to check textual entailment
def check_textual_entailment(row):
    combined_text = ' '.join([str(row[col]) for col in first_four_columns])
    cve_text = row[cve_text_col]
    result = the_model(f"Hypothesis: {cve_text} Premise: {combined_text}")

    entailment_label = result[0]['label']
    entailment_score = result[0]['score']

    # check the logical consistency
    logical_consistency = 'Yes' if entailment_label == 'ENTAILMENT' or entailment_score >= 0.60 else 'No'

    return logical_consistency, entailment_score

# Apply entailment check
sampled_data['Logical Consistency'], sampled_data['Entailment Score'] = zip(
    *sampled_data.apply(check_textual_entailment, axis=1)
)

# Calculate the percentages of "Yes" and "No"
yes_percentage = (sampled_data['Logical Consistency'].value_counts(normalize=True).get('Yes', 0) * 100)
no_percentage = (sampled_data['Logical Consistency'].value_counts(normalize=True).get('No', 0) * 100)

# Calculate the average entailment score
average_entailment_score = sampled_data['Entailment Score'].mean()


print(f"Percentage of 'Yes' for Logical Consistency: {yes_percentage:.2f}%")
print(f"Percentage of 'No' for Logical Consistency: {no_percentage:.2f}%")
print(f"Average Entailment Score: {average_entailment_score:.4f}")


columns_to_display = [
    *first_four_columns, cve_text_col, 'Logical Consistency', 'Entailment Score'
]

#save the output in excel file
output_path = 'textual_entailment_validation_map1_bert_cluster.xlsx'
sampled_data[columns_to_display].to_excel(output_path, index=False)


output_path

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Percentage of 'Yes' for Logical Consistency: 85.43%
Percentage of 'No' for Logical Consistency: 14.57%
Average Entailment Score: 0.8438


'textual_entailment_validation_map1_bert_cluster.xlsx'