In [1]:
import argparse
import json
import os
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from dataclasses import dataclass
from tqdm import tqdm

# Add module path to sys.path
module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.insert(0,module_path)

# Import third-party libraries
from config import SciFactT5Config
from multivers.data_r import ClaimDataLoaderGenerator, DataLoaderGenerator, get_dataloader
from multivers.model_r import MultiVerSModel
from multivers import util

# Other necessary imports
import definitions

# Add necessary paths to sys.path
sys.path.append(os.path.dirname(definitions.PROJECT_VARS.ROOT_DIR))

# Import project-specific modules
from T5ParEvo.src.data.data import Claim, ClaimPredictions, GoldDataset, Label
from T5ParEvo.src.linguistic.ner_abbr import Abbreviation, NEREntity
from T5ParEvo.target_system.multivers.multivers_interface import ModelPredictorMultivers, PredictionParams,ModelPredictorMultiversList
from T5ParEvo.src.paraphrase.paraphrase_claim import ClaimState
from T5ParEvo.src.util.logger import LoggerConfig, NeptuneConfig, LogConfigurator, NeptuneRunner

# Print module_path and definitions.PROJECT_VARS.ROOT_DIR
print('module_path:', module_path)
print('definitions.PROJECT_VARS.ROOT_DIR:', definitions.PROJECT_VARS.ROOT_DIR)


root dir :  /home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo


  "The `@auto_move_data` decorator is deprecated in v1.3 and will be removed in v1.5."


/home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo
module_path: /home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo
definitions.PROJECT_VARS.ROOT_DIR: /home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo


In [2]:
TRAINING_DIRECTION : ClaimState = ClaimState.SUPPORT_MAJORITY
import torch
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:

os.environ['NEPTUNE_API_TOKEN'] = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NWQwMGIyZi1mNzM5LTRiMjEtOTg2MC1mNTc4ODRiMWU2ZGYifQ=='
log_config = LoggerConfig()
log_configurator = LogConfigurator(log_config)
log_configurator.configure()

neptune_config = NeptuneConfig(project_name="ratulalahy/scifact-paraphrase-T5-evo",
                                tags=['other model attack', 'tech_term', 'mlnli'],
                                source_files=["t5_par_evo_test.ipynb", "*.yaml", "config.py", "definition.py"])
                                #source_files=["**/*.ipynb", "*.yaml"])
neptune_runner = NeptuneRunner(neptune_config)
nep_run = neptune_runner.run()

Logging to /home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo/logs/log_all_.log
https://app.neptune.ai/ratulalahy/scifact-paraphrase-T5-evo/e/SCIF3-182
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [4]:
from T5ParEvo.src.util.logger import LoggerConfig, LogConfigurator, NeptuneConfig, NeptuneRunner, Logger, LightningLogger

logger = Logger(nep_run, log_configurator)
lightning_logger = LightningLogger(logger)

Logging to /home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo/logs/log_all_.log


## Load Data

In [5]:
# This dataset to be used only for training
cfg= SciFactT5Config()
ds_train = GoldDataset(cfg.target_dataset.loc_target_dataset_corpus,
                    cfg.target_dataset.loc_target_dataset_train)
claim_train = ds_train.get_claim(39)

In [6]:

params = PredictionParams(
    checkpoint_path= "/home/qudratealahyratu/research/nlp/fact_checking/my_work/multivers/checkpoints/scifact.ckpt",
    output_file= None,#"prediction/pred_opt_scifact.jsonl",
    batch_size=5,
    device=0,
    num_workers=4,
    no_nei=False,
    force_rationale=False,
    debug=False,
    corpus_file = cfg.target_dataset.loc_target_dataset_corpus
)
corpus_file = cfg.target_dataset.loc_target_dataset_corpus#cfg.target_dataset.loc_target_dataset_test#"/home/qudratealahyratu/research/nlp/fact_checking/my_work/multivers/data/scifact/corpus.jsonl"
    

gold_claims = []
claims_path = cfg.target_dataset.loc_target_dataset_test#'/home/qudratealahyratu/research/nlp/fact_checking/my_work/multivers/data/scifact/claims_test_retrived.jsonl'
with open(claims_path, 'r') as f:
    for line in f:
        data = json.loads(line)
        claim = Claim(id = data['id'], claim = data['claim'], cited_docs = data['doc_ids'], evidence = {},release = None)
        gold_claims.append(claim)

#get unique claims
unique_gold_claims = Claim.get_unique_claims(gold_claims)

## Predict Original Claims

In [7]:
# Loading unique claims and preparing prediction model
unique_gold_claims = Claim.get_unique_claims(gold_claims)
dataloader_generator = ClaimDataLoaderGenerator(params, unique_gold_claims[0], corpus_file)
dataloader = dataloader_generator.get_dataloader_by_single_claim()
# prediction_model = ModelPredictorMultivers(params, dataloader, corpus_file)
prediction_model = ModelPredictorMultivers(params, unique_gold_claims[0])
original_claim_predictions_raw = prediction_model.predict(unique_gold_claims[0])

Some weights of the model checkpoint at allenai/longformer-large-4096 were not used when initializing LongformerModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  stream(template_mgs % msg_args)


In [25]:
# Predicting for unique claims
all_original_claim_predictions : List[ClaimPredictions]= []
for cur_uniq_claim in tqdm(unique_gold_claims[:100], desc="Predicting for unique claims"):
    original_claim_prediction = prediction_model.predict(cur_uniq_claim)
    all_original_claim_predictions.append(original_claim_prediction)

Predicting for unique claims: 100%|██████████| 100/100 [06:17<00:00,  3.77s/it]


In [26]:
# Checking how many claims have more than one prediction
for cur_prediction in all_original_claim_predictions:
    if len(cur_prediction.predictions.keys()) > 1:
        print(cur_prediction)

Predictions for 84: Adult tissue-resident macrophages are maintained independent of circulating monocytes.
Predictions for 107: Amitriptyline ineffective as a treatment for chronic tension-type headaches.
Predictions for 260: Chronic aerobic exercise alters endothelial function, impairing vasodilating mechanisms mediated by NO.
Predictions for 307: DUSP4 downregulation activates the Ras-ERK pathway
Predictions for 465: Genomic instability in leukemia cells results from an increase in reactive oxygen species from oncogene activation.


In [27]:
print(len(all_original_claim_predictions[11].predictions.keys()))
print(all_original_claim_predictions[11].predictions)

4
{12827098: PredictedAbstract(abstract_id=12827098, label=<Label.SUPPORTS: 2>, rationale=[5]), 7521113: PredictedAbstract(abstract_id=7521113, label=<Label.SUPPORTS: 2>, rationale=[4]), 22406695: PredictedAbstract(abstract_id=22406695, label=<Label.SUPPORTS: 2>, rationale=[1]), 22973574: PredictedAbstract(abstract_id=22973574, label=<Label.SUPPORTS: 2>, rationale=[3])}


## Paraphrase and attack original claims

In [8]:
import torch
from transformers import T5ForConditionalGeneration, PreTrainedTokenizer, PreTrainedModel, T5Tokenizer, T5ForConditionalGeneration
# Constants and configurations
PARAPHRASE_MODEL_CHECKPOINT_PATH_URL = '/home/qudratealahyratu/research/nlp/fact_checking/my_work/SciMedAttack/results/t5_paws_masked_claim_abstract_paws_3_epoch_2/models/model_3_epochs/'
PARAPHRASE_MODEL_TOKENIZER = 'Vamsi/T5_Paraphrase_Paws'
PARAPHRASE_CONFIG_PARAMS = {
    'max_length': 512,
    'do_sample': True,
    'top_k': 50,
    'top_p': 0.99,
    'repetition_penalty': 3.5,
    'early_stopping': True,
    'num_return_sequences': 10
}

# Utility function to load T5 model
def load_t5_model(checkpoint_path):
    model_t5 = T5ForConditionalGeneration.from_pretrained(checkpoint_path)
    model_t5 = model_t5.to(DEVICE)
    return model_t5


**Technical terms**

In [9]:
import pickle

with open('../data/meta/merged_abbreviations.pkl', 'rb') as f:
    merged_abbreviations = pickle.load(f)

with open('../data/meta/merged_entities.pkl', 'rb') as f:
    merged_entities = pickle.load(f)


**Paraphrase,Entailment,classification model**

In [10]:
# Preparing paraphraser
from T5ParEvo.src.paraphrase.paraphrase_claim import ParaphrasedAttack
from T5ParEvo.src.paraphrase.paraphraser import T5Paraphraser, ModelConfig

# Load T5 model and tokenizer
model_t5 = load_t5_model(PARAPHRASE_MODEL_CHECKPOINT_PATH_URL)
tokenizer_t5 = T5Tokenizer.from_pretrained(PARAPHRASE_MODEL_TOKENIZER)

# Initialize paraphrase model and paraphrase attack
paraphrase_config = ModelConfig(**PARAPHRASE_CONFIG_PARAMS)
paraphrase_model = T5Paraphraser(model_t5, tokenizer_t5, paraphrase_config)

# Initialize entailment model
from T5ParEvo.src.paraphrase.paraphrase_claim  import TorchEntailmentPredictionModel


entailment_model = TorchEntailmentPredictionModel(model_path=  'pytorch/fairseq', model_name = 'roberta.large.mnli', device= DEVICE)

# Initialize paraphrase attack
paraphrase_attack_model = ParaphrasedAttack(paraphrase_model, prediction_model,entailment_model ,list_ners = merged_entities)

Using cache found in /home/qudratealahyratu/.cache/torch/hub/pytorch_fairseq_main
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  caller_stack_depth=caller_stack_depth + 1,
See https://hydra.cc/docs/next/upgrades/1.0_to_1.1/changes_to_package_header for more information
  See {url} for more information"""
'config' is validated against ConfigStore schema with the same name.
This behavior is deprecated in Hydra 1.1 and will be removed in Hydra 1.2.
See https://hydra.cc/docs/next/upgrades/1.0_to_1.1/automatic_schema_matching for migration instructions.
  state = load_checkpoint_to_cpu(filename, arg_overrides)


ANTLR runtime and generated code versions disagree: 4.9.3!=4.8
ANTLR runtime and generated code versions disagree: 4.9.3!=4.8


The strict flag in the compose API is deprecated.
See https://hydra.cc/docs/upgrades/0.11_to_1.0/strict_mode_flag_deprecated for more info.

  """
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  caller_stack_depth=caller_stack_depth + 1,
'config' is validated against ConfigStore schema with the same name.
This behavior is deprecated in Hydra 1.1 and will be removed in Hydra 1.2.
See https://hydra.cc/docs/next/upgrades/1.0_to_1.1/automatic_schema_matching for migration instructions.
  **kwargs,


In [37]:
from T5ParEvo.src.paraphrase.paraphrase_claim import ParaphrasedClaim
iteration_counter = 0
all_paraphrased_attacks : List[ParaphrasedClaim] = []
for cur_original_claim_pred in tqdm(all_original_claim_predictions[:], desc="Paraphrasing claims"):
    paraphrased_attack = paraphrase_attack_model.attack(iteration = iteration_counter, 
                                                  original_claim= cur_original_claim_pred.gold, 
                                                  original_prediction = cur_original_claim_pred, 
                                                  predict_if_pass_filter=False)
    all_paraphrased_attacks.append(paraphrased_attack)
# result = paraphrase_attack.attack(iteration = 0, original_claim= all_original_claim_predictions[11].gold, 
#         original_prediction =all_original_claim_predictions[11], 
#         predict_if_pass_filter=False)

Paraphrasing claims:   0%|          | 0/100 [00:00<?, ?it/s]


AttributeError: 'T5FineTuner' object has no attribute 'generate'

In [17]:
for cur_attack in all_paraphrased_attacks[3:5]:
    for cur_paraphrased_claim in cur_attack:
        print("original_claim : ", cur_paraphrased_claim.original_claim.claim)
        # print('#'*10)
        # print("original_predict : ", cur_paraphrased_claim.original_prediction.predictions)
        # print(ClaimPredictions.get_count_support_refute(cur_paraphrased_claim.original_prediction))
        # print('Paraphrased Claim : ', cur_paraphrased_claim.paraphrased_claim.claim)
        # print(ClaimPredictions.get_count_support_refute(cur_paraphrased_claim.paraphrased_prediction))
        print("nli_label : ", cur_paraphrased_claim.nli_label)
        print('is tech term preserved : ', cur_paraphrased_claim.is_ners_preserved)
        print('original claim state : ', cur_paraphrased_claim.original_claim_state)
        print('paraphrased claim state : ', cur_paraphrased_claim.paraphrased_claim_state)
        

original_claim :  8% of burn patients are admitted for hospitalization and further treatment after appearing at hospital emergency wards or outpatient clinics.
nli_label :  True
is tech term preserved :  True
original claim state :  ClaimState.EMPTY
paraphrased claim state :  ClaimState.EMPTY
original_claim :  8% of burn patients are admitted for hospitalization and further treatment after appearing at hospital emergency wards or outpatient clinics.
nli_label :  True
is tech term preserved :  True
original claim state :  ClaimState.EMPTY
paraphrased claim state :  ClaimState.EMPTY
original_claim :  8% of burn patients are admitted for hospitalization and further treatment after appearing at hospital emergency wards or outpatient clinics.
nli_label :  True
is tech term preserved :  True
original claim state :  ClaimState.EMPTY
paraphrased claim state :  ClaimState.EMPTY
original_claim :  8% of burn patients are admitted for hospitalization and further treatment after appearing at hospit

In [11]:
from T5ParEvo.src.paraphrase.paraphrase_claim import ParaphrasedAttackResult
from T5ParEvo.src.paraphrase.paraphrase_claim import ParaphrasedClaim, ParaphrasedAttackResult

# Post processing the attack results. like filtering, majority and so on.
all_attack_results : List[ParaphrasedAttackResult] = []
for cur_claims_attack in all_paraphrased_attacks:
    for cur_attack in cur_claims_attack:
        paraphrase_attack_model.calculate_and_set_claim_states(cur_attack)
        all_attack_results.append(ParaphrasedAttackResult(cur_attack))

for cur_res in all_attack_results:    
    cur_res.determine_attack_status()
    cur_res.training_direction = TRAINING_DIRECTION

NameError: name 'all_paraphrased_attacks' is not defined

In [11]:
import pickle
# with open('all_attacks.pkl', 'wb') as f:
#   pickle.dump(all_attack_results, f)

with open('all_attacks.pkl', 'rb') as f:
    all_attack_results = pickle.load(f)

In [12]:
## Repeat code IGNORE!

for cur_res in all_attack_results:    
    cur_res.determine_attack_status()
    cur_res.training_direction = TRAINING_DIRECTION

In [13]:
all_attack_results

[ParaphrasedAttackResult(attack=ParaphrasedClaim(iteration=0, original_claim=Example 7: 10-20% of people with severe mental disorder receive no treatment in low and middle income countries., paraphrased_claim=Example 7: 10-20% of people with a severe mental disorder receive no treatment in low and middle-income countries., original_prediction=Predictions for 7: 10-20% of people with severe mental disorder receive no treatment in low and middle income countries., paraphrased_prediction=Predictions for 7: 10-20% of people with a severe mental disorder receive no treatment in low and middle-income countries., is_ners_preserved=True, nli_label=True, original_claim_state=<ClaimState.SUPPORT_MAJORITY: 'Support Majority'>, paraphrased_claim_state=<ClaimState.SUPPORT_MAJORITY: 'Support Majority'>, attack_result=None), training_direction=<ClaimState.SUPPORT_MAJORITY: 'Support Majority'>, attack_status=<AttackStatus.UNSUCCESSFUL: 'Unsuccessful Attack'>),
 ParaphrasedAttackResult(attack=Paraphras

In [14]:
from T5ParEvo.src.paraphrase.paraphrase_claim import AttackStatus
count_successful_attacks_training_direction = 0
unique_ids_attacks_training_direction = set()
ataccks_to_be_used_for_training = []
for cur_atk in all_attack_results:
    # print(cur_atk.attack_status == AttackStatus.SUCCESSFUL and cur_atk.training_direction == TRAINING_DIRECTION)
    if cur_atk.attack_status == AttackStatus.SUCCESSFUL and cur_atk.training_direction == TRAINING_DIRECTION:
        count_successful_attacks_training_direction += 1
        # print(cur_atk.attack.original_prediction.predictions)
        # print(cur_atk.attack.paraphrased_prediction.predictions)
        unique_ids_attacks_training_direction.add(cur_atk.attack.original_claim.id)
        ataccks_to_be_used_for_training.append(cur_atk)

In [15]:
ataccks_to_be_used_for_training

[ParaphrasedAttackResult(attack=ParaphrasedClaim(iteration=0, original_claim=Example 29: A breast cancer patient's capacity to metabolize tamoxifen has no effect on treatment outcome., paraphrased_claim=Example 29: The capacity of a breast cancer patient to metabolize Tamoxifen has no effect on the overall outcome of treatment., original_prediction=Predictions for 29: A breast cancer patient's capacity to metabolize tamoxifen has no effect on treatment outcome., paraphrased_prediction=Predictions for 29: The capacity of a breast cancer patient to metabolize Tamoxifen has no effect on the overall outcome of treatment., is_ners_preserved=True, nli_label=True, original_claim_state=<ClaimState.REFUTE_MAJORITY: 'Refute Majority'>, paraphrased_claim_state=<ClaimState.SUPPORT_MAJORITY: 'Support Majority'>, attack_result=None), training_direction=<ClaimState.SUPPORT_MAJORITY: 'Support Majority'>, attack_status=<AttackStatus.SUCCESSFUL: 'Successful Attack'>),
 ParaphrasedAttackResult(attack=Par

In [16]:
import pandas as pd
fine_tuning_data = {
    "org_claim": [cur_atk.attack.original_claim.claim for cur_atk in ataccks_to_be_used_for_training],
    "gen_claim": [cur_atk.attack.paraphrased_claim.claim for cur_atk in ataccks_to_be_used_for_training]
}
df_fine_tuning_dataset = pd.DataFrame(fine_tuning_data)

In [17]:
df_fine_tuning_dataset

Unnamed: 0,org_claim,gen_claim
0,A breast cancer patient's capacity to metaboli...,The capacity of a breast cancer patient to met...
1,An N-formyl group on M3 molecules suppresss st...,An N-formyl group on M3 molecules suppresses s...
2,Bariatric surgery has a deleterious impact on ...,Bariatric surgery has qatriuretic impact on me...
3,Bariatric surgery reduces resolution of diabetes.,Bariatric surgery reduces diabetes.
4,Carriers of the alcohol aldehyde dehydrogenase...,Carriers of the mutation from alcohol aldehyde...
5,Chronic aerobic exercise alters endothelial fu...,Chronic aerobic exercise affects endothelial f...
6,Excess nitric oxide is responsible for vasospasm.,Vasospasm is responsible for excess nitric oxide.
7,Excess nitric oxide is responsible for vasospasm.,Nitric oxide is excitable for vasospasm.
8,Having a main partner lowers survival rates in...,Decreased participation in a main partner redu...
9,Having a main partner lowers survival rates in...,The presence of a main partner lowers in HIV p...


In [18]:
from T5ParEvo.src.data.dataset_preparation import DatasetPreparation
SPLIT_SIZE = 0.2
prep = DatasetPreparation(df_fine_tuning_dataset, SPLIT_SIZE)
df_tune_train, df_tune_val = prep.split_and_reset_index()

In [19]:
df_tune_val

Unnamed: 0,org_claim,gen_claim
0,Transplanted human glial cells are incapable o...,Human glial cells are transformed into cell-li...
1,Bariatric surgery reduces resolution of diabetes.,Bariatric surgery reduces diabetes.
2,Participants who quit smoking reduce lung canc...,Participants who quit smoking reduce risk of l...
3,Metastases have genomic aberrations different ...,Metastases have unique genomic aberrations dis...
4,Having a main partner lowers survival rates in...,The presence of a main partner lowers in HIV p...


In [20]:
from T5ParEvo.src.models.fine_tune import FineTuneHyperParams
NUM_EPOCHS = 10
fineTuneHyperParam = FineTuneHyperParams(model_name_path = PARAPHRASE_MODEL_CHECKPOINT_PATH_URL, 
                                         tokenizer_name_path = PARAPHRASE_MODEL_TOKENIZER,
                                             num_train_epochs = NUM_EPOCHS, 
                                             df_train = df_tune_train, 
                                             df_val = df_tune_val, 
                                             df_train_val = df_fine_tuning_dataset)

In [21]:
from T5ParEvo.src.models.fine_tune import LoggingCallback
from T5ParEvo.src.models.fine_tune import T5FineTuner
from pytorch_lightning import Trainer
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

early_stop_callback = EarlyStopping(
   monitor='val_loss',
   patience=3,
   verbose=False,
   mode='min'
)

model_t5_fine_tuned = T5FineTuner(fineTuneHyperParam)
trainer_model_t5_fine_tune = pl.Trainer(callbacks=[early_stop_callback, 
                                                    fineTuneHyperParam.get_checkpoint_callback(), 
                                                    LoggingCallback()], 
                                        logger=lightning_logger, 
                                        **fineTuneHyperParam.get_train_params())


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
  f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added."
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [22]:
trainer_model_t5_fine_tune.fit(model_t5_fine_tuned)  

  "When using `Trainer(accumulate_grad_batches != 1)` and overriding"
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 222 M 
-----------------------------------------------------
222 M     Trainable params
0         Non-trainable params
222 M     Total params
891.614   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch 0, global step 0: val_loss reached 1.21778 (best 1.21778), saving model to "/home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo/notebooks/checkpoints/best-checkpoint-v3.ckpt" as top 5


Validating: 0it [00:00, ?it/s]

Epoch 1, global step 1: val_loss reached 1.21778 (best 1.21778), saving model to "/home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo/notebooks/checkpoints/best-checkpoint-v4.ckpt" as top 5


Validating: 0it [00:00, ?it/s]

Epoch 2, global step 2: val_loss reached 1.21778 (best 1.21778), saving model to "/home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo/notebooks/checkpoints/best-checkpoint-v5.ckpt" as top 5


Validating: 0it [00:00, ?it/s]

Epoch 3, global step 3: val_loss reached 1.21778 (best 1.21778), saving model to "/home/qudratealahyratu/research/nlp/fact_checking/my_work/T5ParEvo/notebooks/checkpoints/best-checkpoint-v6.ckpt" as top 5


In [28]:

paraphrase_model = T5Paraphraser(model_t5_fine_tuned, tokenizer_t5, paraphrase_config)

# Initialize paraphrase attack
paraphrase_attack_model = ParaphrasedAttack(paraphrase_model, prediction_model,entailment_model ,list_ners = merged_entities)

In [35]:
type(paraphrase_model.model_t5.model)

transformers.models.t5.modeling_t5.T5ForConditionalGeneration

In [None]:
from T5ParEvo.src.paraphrase.paraphrase_claim import ClaimState
target_state = ClaimState.SUPPORT_MAJORITY 
same_state_results = [result for result in all_attack_results if result.attack.paraphrased_claim_state == target_state]

In [None]:
len(same_state_results)

In [101]:
from pytorch_lightning.loggers import LightningLoggerBase
from pytorch_lightning.utilities import rank_zero_only

class LightningLogger(LightningLoggerBase):
    def __init__(self, logger):
        super().__init__()
        self._logger = logger

    @property
    @rank_zero_only
    def experiment(self):
        return self._logger

    def log_hyperparams(self, params):
        self._logger.log('parameters', params)

    def log_metrics(self, metrics, step):
        for k, v in metrics.items():
            self._logger.log(k, v)

    def save(self):
        pass

    @rank_zero_only
    def finalize(self, status):
        pass

    @property
    def name(self):
        return 'LightningLogger'

    @property
    def version(self):
        return '0.0.1'