In [1]:
import argparse
import json
import os
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from dataclasses import dataclass
from tqdm import tqdm

# Add module path to sys.path
module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.insert(0,module_path)

# Import third-party libraries
from config import SciFactT5Config
from multivers.data_r import ClaimDataLoaderGenerator, DataLoaderGenerator, get_dataloader
from multivers.model_r import MultiVerSModel
from multivers import util

# Other necessary imports
import definitions

# Add necessary paths to sys.path
sys.path.append(os.path.dirname(definitions.PROJECT_VARS.ROOT_DIR))

# Import project-specific modules
from T5ParEvo.src.data.data import Claim, ClaimPredictions, GoldDataset, Label
from T5ParEvo.src.linguistic.ner_abbr import Abbreviation, NEREntity
from T5ParEvo.target_system.multivers.multivers_interface import ModelPredictorMultivers, PredictionParams,ModelPredictorMultiversList

# Print module_path and definitions.PROJECT_VARS.ROOT_DIR
print('module_path:', module_path)
print('definitions.PROJECT_VARS.ROOT_DIR:', definitions.PROJECT_VARS.ROOT_DIR)


root dir :  /home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo


  "The `@auto_move_data` decorator is deprecated in v1.3 and will be removed in v1.5."


/home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo
module_path: /home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo
definitions.PROJECT_VARS.ROOT_DIR: /home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo


In [2]:
import torch
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
from T5ParEvo.src.util.logger import LoggerConfig, NeptuneConfig, LogConfigurator, NeptuneRunner

os.environ['NEPTUNE_API_TOKEN'] = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NWQwMGIyZi1mNzM5LTRiMjEtOTg2MC1mNTc4ODRiMWU2ZGYifQ=='
log_config = LoggerConfig()
log_configurator = LogConfigurator(log_config)
log_configurator.configure()

neptune_config = NeptuneConfig(project_name="ratulalahy/scifact-paraphrase-T5-evo",
                                tags=['other model attack', 'tech_term', 'mlnli'],
                                source_files=["t5_par_evo_test.ipynb", "*.yaml", "config.py", "definition.py"])
                                #source_files=["**/*.ipynb", "*.yaml"])
neptune_runner = NeptuneRunner(neptune_config)
nep_run = neptune_runner.run()

Logging to /home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo/logs/log_all_.log


https://app.neptune.ai/ratulalahy/scifact-paraphrase-T5-evo/e/SCIF3-165
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


## Load Data

In [4]:
# This dataset to be used only for training
cfg= SciFactT5Config()
ds_train = GoldDataset(cfg.target_dataset.loc_target_dataset_corpus,
                    cfg.target_dataset.loc_target_dataset_train)
claim_train = ds_train.get_claim(39)

In [5]:

params = PredictionParams(
    checkpoint_path= "/home/qudratealahyratu/research/nlp/fact_checking/my_work/multivers/checkpoints/scifact.ckpt",
    output_file= None,#"prediction/pred_opt_scifact.jsonl",
    batch_size=5,
    device=0,
    num_workers=4,
    no_nei=False,
    force_rationale=False,
    debug=False,
    corpus_file = cfg.target_dataset.loc_target_dataset_corpus
)
corpus_file = cfg.target_dataset.loc_target_dataset_corpus#cfg.target_dataset.loc_target_dataset_test#"/home/qudratealahyratu/research/nlp/fact_checking/my_work/multivers/data/scifact/corpus.jsonl"
    

gold_claims = []
claims_path = cfg.target_dataset.loc_target_dataset_test#'/home/qudratealahyratu/research/nlp/fact_checking/my_work/multivers/data/scifact/claims_test_retrived.jsonl'
with open(claims_path, 'r') as f:
    for line in f:
        data = json.loads(line)
        claim = Claim(id = data['id'], claim = data['claim'], cited_docs = data['doc_ids'], evidence = {},release = None)
        gold_claims.append(claim)

#get unique claims
unique_gold_claims = Claim.get_unique_claims(gold_claims)

## Predict Original Claims

In [6]:
# Loading unique claims and preparing prediction model
unique_gold_claims = Claim.get_unique_claims(gold_claims)
dataloader_generator = ClaimDataLoaderGenerator(params, unique_gold_claims[0], corpus_file)
dataloader = dataloader_generator.get_dataloader_by_single_claim()
# prediction_model = ModelPredictorMultivers(params, dataloader, corpus_file)
prediction_model = ModelPredictorMultivers(params, unique_gold_claims[0])
original_claim_predictions_raw = prediction_model.predict(unique_gold_claims[0])

Some weights of the model checkpoint at allenai/longformer-large-4096 were not used when initializing LongformerModel: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  stream(template_mgs % msg_args)


In [7]:
# Predicting for unique claims
all_original_claim_predictions : List[ClaimPredictions]= []
for cur_uniq_claim in tqdm(unique_gold_claims[:], desc="Predicting for unique claims"):
    original_claim_prediction = prediction_model.predict(cur_uniq_claim)
    all_original_claim_predictions.append(original_claim_prediction)

Predicting for unique claims: 100%|██████████| 12/12 [00:45<00:00,  3.83s/it]


In [8]:
# Checking how many claims have more than one prediction
for cur_prediction in all_original_claim_predictions:
    if len(cur_prediction.predictions.keys()) > 1:
        print(cur_prediction)

Predictions for 84: Adult tissue-resident macrophages are maintained independent of circulating monocytes.


In [9]:
print(len(all_original_claim_predictions[11].predictions.keys()))
print(all_original_claim_predictions[11].predictions)

4
{12827098: PredictedAbstract(abstract_id=12827098, label=<Label.SUPPORTS: 2>, rationale=[5]), 7521113: PredictedAbstract(abstract_id=7521113, label=<Label.SUPPORTS: 2>, rationale=[4]), 22406695: PredictedAbstract(abstract_id=22406695, label=<Label.SUPPORTS: 2>, rationale=[1]), 22973574: PredictedAbstract(abstract_id=22973574, label=<Label.SUPPORTS: 2>, rationale=[3])}


## Paraphrase and attack original claims

In [10]:
import torch
from transformers import T5ForConditionalGeneration, PreTrainedTokenizer, PreTrainedModel, T5Tokenizer, T5ForConditionalGeneration
# Constants and configurations
paraphrase_model_checkpoint_path = '/home/qudratealahyratu/research/nlp/fact_checking/my_work/SciMedAttack/results/t5_paws_masked_claim_abstract_paws_3_epoch_2/models/model_3_epochs/'
paraphrase_model_tokenizer = 'Vamsi/T5_Paraphrase_Paws'
PARAPHRASE_CONFIG_PARAMS = {
    'max_length': 512,
    'do_sample': True,
    'top_k': 50,
    'top_p': 0.99,
    'repetition_penalty': 3.5,
    'early_stopping': True,
    'num_return_sequences': 10
}

# Utility function to load T5 model
def load_t5_model(checkpoint_path):
    model_t5 = T5ForConditionalGeneration.from_pretrained(checkpoint_path)
    model_t5 = model_t5.to(DEVICE)
    return model_t5


**Technical terms**

In [11]:
import pickle

with open('../data/meta/merged_abbreviations.pkl', 'rb') as f:
    merged_abbreviations = pickle.load(f)

with open('../data/meta/merged_entities.pkl', 'rb') as f:
    merged_entities = pickle.load(f)


**Paraphrase,Entailment,classification model**

In [12]:
# Preparing paraphraser
from T5ParEvo.src.paraphrase.paraphrase_claim import ParaphrasedAttack
from T5ParEvo.src.paraphrase.paraphraser import T5Paraphraser, ModelConfig

# Load T5 model and tokenizer
model_t5 = load_t5_model(paraphrase_model_checkpoint_path)
tokenizer_t5 = T5Tokenizer.from_pretrained(paraphrase_model_tokenizer)

# Initialize paraphrase model and paraphrase attack
paraphrase_config = ModelConfig(**PARAPHRASE_CONFIG_PARAMS)
paraphrase_model = T5Paraphraser(model_t5, tokenizer_t5, paraphrase_config)

# Initialize entailment model
from T5ParEvo.src.paraphrase.paraphrase_claim  import TorchEntailmentPredictionModel


entailment_model = TorchEntailmentPredictionModel(model_path=  'pytorch/fairseq', model_name = 'roberta.large.mnli', device= DEVICE)

# Initialize paraphrase attack
paraphrase_attack_model = ParaphrasedAttack(paraphrase_model, prediction_model,entailment_model ,list_ners = merged_entities)

Using cache found in /home/qudratealahyratu/.cache/torch/hub/pytorch_fairseq_main
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  caller_stack_depth=caller_stack_depth + 1,
See https://hydra.cc/docs/next/upgrades/1.0_to_1.1/changes_to_package_header for more information
  See {url} for more information"""
'config' is validated against ConfigStore schema with the same name.
This behavior is deprecated in Hydra 1.1 and will be removed in Hydra 1.2.
See https://hydra.cc/docs/next/upgrades/1.0_to_1.1/automatic_schema_matching for migration instructions.
  state = load_checkpoint_to_cpu(filename, arg_overrides)


ANTLR runtime and generated code versions disagree: 4.9.3!=4.8
ANTLR runtime and generated code versions disagree: 4.9.3!=4.8


The strict flag in the compose API is deprecated.
See https://hydra.cc/docs/upgrades/0.11_to_1.0/strict_mode_flag_deprecated for more info.

  """
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  caller_stack_depth=caller_stack_depth + 1,
'config' is validated against ConfigStore schema with the same name.
This behavior is deprecated in Hydra 1.1 and will be removed in Hydra 1.2.
See https://hydra.cc/docs/next/upgrades/1.0_to_1.1/automatic_schema_matching for migration instructions.
  **kwargs,


In [13]:
from T5ParEvo.src.paraphrase.paraphrase_claim import ParaphrasedClaim
iteration_counter = 0
all_paraphrased_attacks : List[ParaphrasedClaim] = []
for cur_original_claim_pred in tqdm(all_original_claim_predictions[:12], desc="Paraphrasing claims"):
    paraphrased_attack = paraphrase_attack_model.attack(iteration = iteration_counter, 
                                                  original_claim= cur_original_claim_pred.gold, 
                                                  original_prediction = cur_original_claim_pred, 
                                                  predict_if_pass_filter=False)
    all_paraphrased_attacks.append(paraphrased_attack)
# result = paraphrase_attack.attack(iteration = 0, original_claim= all_original_claim_predictions[11].gold, 
#         original_prediction =all_original_claim_predictions[11], 
#         predict_if_pass_filter=False)

Paraphrasing claims: 100%|██████████| 12/12 [07:21<00:00, 36.79s/it]


In [14]:
for cur_attack in all_paraphrased_attacks[3:5]:
    for cur_paraphrased_claim in cur_attack:
        print("original_claim : ", cur_paraphrased_claim.original_claim.claim)
        # print('#'*10)
        # print("original_predict : ", cur_paraphrased_claim.original_prediction.predictions)
        # print(ClaimPredictions.get_count_support_refute(cur_paraphrased_claim.original_prediction))
        # print('Paraphrased Claim : ', cur_paraphrased_claim.paraphrased_claim.claim)
        # print(ClaimPredictions.get_count_support_refute(cur_paraphrased_claim.paraphrased_prediction))
        print("nli_label : ", cur_paraphrased_claim.nli_label)
        print('is tech term preserved : ', cur_paraphrased_claim.is_ners_preserved)
        print('original claim state : ', cur_paraphrased_claim.original_claim_state)
        print('paraphrased claim state : ', cur_paraphrased_claim.paraphrased_claim_state)
        

original_claim :  8% of burn patients are admitted for hospitalization and further treatment after appearing at hospital emergency wards or outpatient clinics.
nli_label :  True
is tech term preserved :  True
original claim state :  ClaimState.EMPTY
paraphrased claim state :  ClaimState.EMPTY
original_claim :  8% of burn patients are admitted for hospitalization and further treatment after appearing at hospital emergency wards or outpatient clinics.
nli_label :  True
is tech term preserved :  True
original claim state :  ClaimState.EMPTY
paraphrased claim state :  ClaimState.EMPTY
original_claim :  8% of burn patients are admitted for hospitalization and further treatment after appearing at hospital emergency wards or outpatient clinics.
nli_label :  False
is tech term preserved :  False
original claim state :  ClaimState.EMPTY
paraphrased claim state :  ClaimState.EMPTY
original_claim :  8% of burn patients are admitted for hospitalization and further treatment after appearing at hosp

In [19]:
from T5ParEvo.src.paraphrase.paraphrase_claim import ParaphrasedAttackResult
from T5ParEvo.src.paraphrase.paraphrase_claim import ParaphrasedClaim, ParaphrasedAttackResult
all_attack_results : List[ParaphrasedAttackResult] = []
for cur_claims_attack in all_paraphrased_attacks:
    for cur_attack in cur_claims_attack:
        paraphrase_attack_model.calculate_and_set_claim_states(cur_attack)
        all_attack_results.append(ParaphrasedAttackResult(cur_attack))

for cur_res in all_attack_results:    
    cur_res.determine_attack_status()

In [None]:
all_paraphrased_attacks[0][0]

ParaphrasedClaim(iteration=0, original_claim=Example 7: 10-20% of people with severe mental disorder receive no treatment in low and middle income countries., paraphrased_claim=Example 7: 10-20 % of people with severe mental disorder receive no treatment in low and middle income countries., original_prediction=Predictions for 7: 10-20% of people with severe mental disorder receive no treatment in low and middle income countries., paraphrased_prediction=Predictions for 7: 10-20 % of people with severe mental disorder receive no treatment in low and middle income countries., is_ners_preserved=True, nli_label=True, original_claim_state=<ClaimState.EMPTY: 'Empty Prediction Result'>, paraphrased_claim_state=<ClaimState.EMPTY: 'Empty Prediction Result'>, attack_result=None)

In [23]:
from T5ParEvo.src.paraphrase.paraphrase_claim import AttackStatus
for cur_atk in all_attack_results:
    if cur_atk.attack_status == AttackStatus.SUCCESSFUL:
        print(cur_atk.attack_status)

In [33]:
for cur_atk in all_attack_results[50:70]:
    print(ClaimPredictions.get_count_support_refute_nei(cur_atk.attack.original_prediction), 
          ClaimPredictions.get_count_support_refute_nei(cur_atk.attack.paraphrased_prediction))

{'count_support': 1, 'count_refute': 0, 'count_nei': 0} {'count_support': 1, 'count_refute': 0, 'count_nei': 0}
{'count_support': 1, 'count_refute': 0, 'count_nei': 0} {'count_support': 1, 'count_refute': 0, 'count_nei': 0}
{'count_support': 1, 'count_refute': 0, 'count_nei': 0} {'count_support': 1, 'count_refute': 0, 'count_nei': 0}
{'count_support': 1, 'count_refute': 0, 'count_nei': 0} {'count_support': 1, 'count_refute': 0, 'count_nei': 0}
{'count_support': 1, 'count_refute': 0, 'count_nei': 0} {'count_support': 1, 'count_refute': 0, 'count_nei': 0}
{'count_support': 1, 'count_refute': 0, 'count_nei': 0} {'count_support': 1, 'count_refute': 0, 'count_nei': 0}
{'count_support': 1, 'count_refute': 0, 'count_nei': 0} {'count_support': 1, 'count_refute': 0, 'count_nei': 0}
{'count_support': 1, 'count_refute': 0, 'count_nei': 0} {'count_support': 1, 'count_refute': 0, 'count_nei': 0}
{'count_support': 1, 'count_refute': 0, 'count_nei': 0} {'count_support': 1, 'count_refute': 0, 'count_n

In [35]:
all_attack_results[:10]

[ParaphrasedAttackResult(attack=ParaphrasedClaim(iteration=0, original_claim=Example 7: 10-20% of people with severe mental disorder receive no treatment in low and middle income countries., paraphrased_claim=Example 7: 10-20% of people with severe mental disorder receive no treatment in low and middle income countries., original_prediction=Predictions for 7: 10-20% of people with severe mental disorder receive no treatment in low and middle income countries., paraphrased_prediction=Predictions for 7: 10-20% of people with severe mental disorder receive no treatment in low and middle income countries., is_ners_preserved=True, nli_label=True, original_claim_state=<ClaimState.SUPPORT_MAJORITY: 'Support Majority'>, paraphrased_claim_state=<ClaimState.SUPPORT_MAJORITY: 'Support Majority'>, attack_result=None), attack_status=<AttackStatus.UNSUCCESSFUL: 'Unsuccessful Attack'>),
 ParaphrasedAttackResult(attack=ParaphrasedClaim(iteration=0, original_claim=Example 7: 10-20% of people with sever

In [60]:
from T5ParEvo.src.paraphrase.paraphrase_claim import ClaimState
target_state = ClaimState.SUPPORT_MAJORITY 
same_state_results = [result for result in all_attack_results if result.attack.paraphrased_claim_state == target_state]

In [61]:
len(same_state_results)

41