In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [2]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer, Trainer, TrainingArguments
import json
from datasets import Dataset, DatasetDict

In [3]:
# Load the BART model and tokenizer
model_name = 'facebook/bart-base'
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [4]:
# Load the preprocessed dataset
with open('preprocessed_data.json', 'r') as f:
    data = json.load(f)

In [5]:
# Prepare the dataset for training and evaluation
train_data = []
eval_data = []
for i, entry in enumerate(data):
    for prompt in entry['prompts']:
        example = {
            'input_text': prompt,
            'target_text': entry['marked_sentence']
        }
        if i % 10 == 0:  # Use 10% of the data for evaluation
            eval_data.append(example)
        else:
            train_data.append(example)

In [6]:
# Convert the training and evaluation data into Dataset objects
datasets = DatasetDict({
    'train': Dataset.from_list(train_data),
    'eval': Dataset.from_list(eval_data)
})

In [7]:
# Tokenize the input and target texts
def tokenize_function(example):
    input_encoding = tokenizer(example['input_text'], padding='max_length', truncation=True, max_length=512)
    target_encoding = tokenizer(example['target_text'], padding='max_length', truncation=True, max_length=512)
    input_encoding['labels'] = target_encoding['input_ids']
    return input_encoding

tokenized_datasets = datasets.map(tokenize_function, batched=True)

Map:   0%|          | 0/7371 [00:00<?, ? examples/s]

Map:   0%|          | 0/823 [00:00<?, ? examples/s]

In [8]:
# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_total_limit=2,
    save_steps=500,
    evaluation_strategy='steps',
    eval_steps=500,
    learning_rate=5e-5,
    fp16=True if torch.cuda.is_available() else False,
)



In [9]:
# Define the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['eval'],
    tokenizer=tokenizer,
)

  trainer = Trainer(


In [10]:
# Train the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained('./fine_tuned_bart')
tokenizer.save_pretrained('./fine_tuned_bart')

print("Model fine-tuning completed and saved to './fine_tuned_bart'")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss,Validation Loss
500,0.0029,0.000913
1000,0.005,0.000823
1500,0.001,0.000613
2000,0.0015,0.000541
2500,0.003,0.000542
3000,0.0013,0.000546
3500,0.0002,0.000481
4000,0.0001,0.000423
4500,0.0004,0.000493
5000,0.0003,0.000451




Model fine-tuning completed and saved to './fine_tuned_bart'


In [36]:
# Evaluate on the evaluation set
eval_results = trainer.evaluate()
print("Evaluation results:", eval_results)

Evaluation results: {'eval_loss': 0.0004964735126122832, 'eval_runtime': 16.4754, 'eval_samples_per_second': 49.953, 'eval_steps_per_second': 6.252, 'epoch': 3.0}


In [17]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [14]:
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=7763b88451aa207c5fb1ab6ed41319a1896a3be2e6d9c29bea8c13f20a090b65
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [15]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer, pipeline
import json
from datasets import Dataset
import re
from sklearn.metrics import precision_recall_fscore_support
from rouge_score import rouge_scorer

# Load the fine-tuned BART model and tokenizer
model_path = './fine_tuned_bart'
model = BartForConditionalGeneration.from_pretrained(model_path)
tokenizer = BartTokenizer.from_pretrained(model_path)

# Sample input for testing
sample_data = [
    {
        "input_text": "Extract the details of the Disease_disorder event occurring in this text: A 58-year-old man with poorly controlled hypertension presented to the ICU for cardiogenic shock.",
        "target_text": "A <arg>58-year-old</arg> man with poorly controlled <trigger>hypertension</trigger> presented to the <arg>ICU</arg> for <trigger>cardiogenic shock</trigger>."
    },
    {
        "input_text": "Extract the details of the Clinical_event event occurring in this text: The patient experienced sudden chest pain, indicating a myocardial infarction.",
        "target_text": "The patient experienced sudden <trigger>chest pain</trigger>, indicating a <trigger>myocardial infarction</trigger>."
    }
]

# Convert the sample data into a Dataset object
test_dataset = Dataset.from_list(sample_data)

# Tokenize the input texts
def tokenize_function(example):
    input_encoding = tokenizer(example['input_text'], padding='max_length', truncation=True, max_length=512)
    target_encoding = tokenizer(example['target_text'], padding='max_length', truncation=True, max_length=512)
    input_encoding['labels'] = target_encoding['input_ids']
    return input_encoding

tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

# Define a function to add tags using a simple rule-based approach
def add_tags_post_processing(generated_text, reference_text):
    # Extract all arguments and triggers from the reference text
    arg_pattern = r'<arg>(.*?)</arg>'
    trigger_pattern = r'<trigger>(.*?)</trigger>'

    args = re.findall(arg_pattern, reference_text)
    triggers = re.findall(trigger_pattern, reference_text)

    # Apply tags to generated text using a simple rule-based approach
    for arg in args:
        generated_text = re.sub(rf'\b{re.escape(arg)}\b', f'<arg>{arg}</arg>', generated_text)
    for trigger in triggers:
        generated_text = re.sub(rf'\b{re.escape(trigger)}\b', f'<trigger>{trigger}</trigger>', generated_text)

    return generated_text

# Define the function to test and evaluate the model
def test_and_evaluate_model(model, dataset):
    model.eval()
    generated_texts = []
    references = []

    for i in range(len(dataset)):
        input_text = dataset[i]['input_text']
        target_text = dataset[i]['target_text']

        # Tokenize the input text
        inputs = tokenizer(input_text, return_tensors='pt', truncation=True, max_length=512).to(model.device)

        # Generate output using the model
        with torch.no_grad():
            outputs = model.generate(**inputs, max_length=512, num_beams=4, early_stopping=True)

        # Decode the generated output
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Apply post-processing to add tags
        generated_text_with_tags = add_tags_post_processing(generated_text, target_text)

        # Append results
        generated_texts.append(generated_text_with_tags)
        references.append(target_text)

        # Print input, reference, and generated output
        print(f"Input: {input_text}")
        print(f"Reference: {target_text}")
        print(f"Generated (without tags): {generated_text}")
        print(f"Generated (with tags): {generated_text_with_tags}")
        print("-" * 50)

    # Evaluate the model using precision, recall, and F1-score
    y_true = []
    y_pred = []
    for ref, gen in zip(references, generated_texts):
        ref_tags = re.findall(r'<(arg|trigger)>.*?</\1>', ref)
        gen_tags = re.findall(r'<(arg|trigger)>.*?</\1>', gen)
        y_true.extend(ref_tags)
        y_pred.extend(gen_tags)

    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='micro')
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

    # Evaluate using ROUGE score
    rouge_scorer_obj = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge_scores = [rouge_scorer_obj.score(ref, gen) for ref, gen in zip(references, generated_texts)]
    avg_rouge1 = sum([score['rouge1'].fmeasure for score in rouge_scores]) / len(rouge_scores)
    avg_rouge2 = sum([score['rouge2'].fmeasure for score in rouge_scores]) / len(rouge_scores)
    avg_rougeL = sum([score['rougeL'].fmeasure for score in rouge_scores]) / len(rouge_scores)

    print(f"Average ROUGE-1 F1 Score: {avg_rouge1:.4f}")
    print(f"Average ROUGE-2 F1 Score: {avg_rouge2:.4f}")
    print(f"Average ROUGE-L F1 Score: {avg_rougeL:.4f}")

# Test and evaluate the model
test_and_evaluate_model(model, tokenized_test_dataset)

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Input: Extract the details of the Disease_disorder event occurring in this text: A 58-year-old man with poorly controlled hypertension presented to the ICU for cardiogenic shock.
Reference: A <arg>58-year-old</arg> man with poorly controlled <trigger>hypertension</trigger> presented to the <arg>ICU</arg> for <trigger>cardiogenic shock</trigger>.
Generated (without tags): A 58-year-old man with poorly controlled hypertension presented to the ICU for cardiogenic shock.
Generated (with tags): A <arg>58-year-old</arg> man with poorly controlled <trigger>hypertension</trigger> presented to the <arg>ICU</arg> for <trigger>cardiogenic shock</trigger>.
--------------------------------------------------
Input: Extract the details of the Clinical_event event occurring in this text: The patient experienced sudden chest pain, indicating a myocardial infarction.
Reference: The patient experienced sudden <trigger>chest pain</trigger>, indicating a <trigger>myocardial infarction</trigger>.
Generated 