In [2]:
import sys, os
sys.path.insert(0, "/home/ubuntu/adapters/src")
import pandas as pd
import numpy as np
from tqdm import tqdm

import adapters
from adapters import AutoAdapterModel, AdapterTrainer, SeqBnConfig, Seq2SeqAdapterTrainer
import adapters.composition as ac
from adapters.composition import Fuse
import peft, torch
from transformers import (AutoTokenizer, 
                          AutoModel,
                          AutoModelForSeq2SeqLM,
                          BartForConditionalGeneration,
                          Seq2SeqTrainer, 
                          Seq2SeqTrainingArguments,
                          DataCollatorForSeq2Seq,
                          EarlyStoppingCallback,
                          set_seed,
                          Trainer
                         )
from datasets import Dataset, DatasetDict
device = 'cuda' if torch.cuda.is_available() else 'cpu'
adapters.__file__
import torch
torch.autograd.set_detect_anomaly(True)

  from .autonotebook import tqdm as notebook_tqdm
2024-07-09 14:36:37.177174: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x720c3ede1990>

In [3]:
# model = BartForConditionalGeneration.from_pretrained("ireneli1024/bart-large-PLOS-finetuned") 
# tokenizer = AutoTokenizer.from_pretrained("ireneli1024/bart-large-PLOS-finetuned")

In [4]:
model = AutoAdapterModel.from_pretrained("/opt/dlami/nvme/no_fusion_layer/knowledge_consolidation").to(device) #AutoAdapterModel.from_pretrained("/opt/dlami/nvme/knowledge_consolidation/checkpoint-7956")
tokenizer = AutoTokenizer.from_pretrained("/opt/dlami/nvme/no_fusion_layer/knowledge_consolidation")

In [5]:
model.delete_head('knowledge_consolidation')
adapter_setup = Fuse("adapter1", "adapter2", "adapter3")    
model.add_seq2seq_lm_head('fine_tunning')
model.train_adapter_fusion([adapter_setup, 'adapter1', 'adapter2', 'adapter3'], unfreeze_adapters=True, train_embeddings=True)

In [6]:
def make_all_layers_trainable(model):
    # Set all parameters to require gradients (trainable)
    for param in model.parameters():
        param.requires_grad = True

# Example usage assuming `model` is your instantiated model
make_all_layers_trainable(model)

In [7]:
def print_parameters(model):
    trainable_params = {name: param for name, param in model.named_parameters() if param.requires_grad}
    untrainable_params = {name: param for name, param in model.named_parameters() if not param.requires_grad}

    print("Trainable Parameters:")
    total_trainable_params = 0
    for name, param in trainable_params.items():
        print(f"{name}: {param.size()}")
        total_trainable_params += param.numel()
    print(f"Total number of trainable parameters: {total_trainable_params}")

    print("\nUntrainable Parameters:")
    total_untrainable_params = 0
    for name, param in untrainable_params.items():
        print(f"{name}: {param.size()}")
        total_untrainable_params += param.numel()
    print(f"Total number of untrainable parameters: {total_untrainable_params}")

# Assuming your model instance is named `model`
print_parameters(model)

Trainable Parameters:
model.shared.weight: torch.Size([50265, 768])
model.encoder.embed_positions.weight: torch.Size([1026, 768])
model.encoder.layers.0.self_attn.k_proj.weight: torch.Size([768, 768])
model.encoder.layers.0.self_attn.k_proj.bias: torch.Size([768])
model.encoder.layers.0.self_attn.v_proj.weight: torch.Size([768, 768])
model.encoder.layers.0.self_attn.v_proj.bias: torch.Size([768])
model.encoder.layers.0.self_attn.q_proj.weight: torch.Size([768, 768])
model.encoder.layers.0.self_attn.q_proj.bias: torch.Size([768])
model.encoder.layers.0.self_attn.out_proj.weight: torch.Size([768, 768])
model.encoder.layers.0.self_attn.out_proj.bias: torch.Size([768])
model.encoder.layers.0.self_attn_layer_norm.weight: torch.Size([768])
model.encoder.layers.0.self_attn_layer_norm.bias: torch.Size([768])
model.encoder.layers.0.fc1.weight: torch.Size([3072, 768])
model.encoder.layers.0.fc1.bias: torch.Size([3072])
model.encoder.layers.0.fc2.weight: torch.Size([768, 3072])
model.encoder.laye

In [3]:
#data_path = "/opt/dlami/nvme/"

# train_df = pd.read_csv(data_path + 'train.csv', usecols = ['input_text', 'target_text'])
# val_df = pd.read_csv(data_path + 'val.csv', usecols = ['input_text', 'target_text'])
# test_df = pd.read_csv(data_path + 'test.csv', usecols = ['input_text', 'target_text'])


df = pd.read_excel("/opt/dlami/nvme/plos_all.xlsx")

def create_dataframe(df, split):
    selected_df = df[df["Split"] == split][["Abstract", "Summary"]].rename(columns={"Abstract": "input_text", "Summary": "target_text"})
    return selected_df

train_df, test_df, val_df = create_dataframe(df, "train"), create_dataframe(df, "test"), create_dataframe(df, "val")

train_dataset, val_dataset, test_dataset = Dataset.from_dict(train_df), Dataset.from_dict(val_df), Dataset.from_dict(test_df)

dataset = DatasetDict({
    "train": train_dataset,
    "test": test_dataset,
    "val": val_dataset
})

In [6]:
dataset['train']['input_text'][0], dataset['train']['target_text'][0]

('Kidney function depends on the nephron , which comprises a blood filter , a tubule that is subdivided into functionally distinct segments , and a collecting duct . How these regions arise during development is poorly understood . The zebrafish pronephros consists of two linear nephrons that develop from the intermediate mesoderm along the length of the trunk . Here we show that , contrary to current dogma , these nephrons possess multiple proximal and distal tubule domains that resemble the organization of the mammalian nephron . We examined whether pronephric segmentation is mediated by retinoic acid ( RA ) and the caudal ( cdx ) transcription factors , which are known regulators of segmental identity during development . Inhibition of RA signaling resulted in a loss of the proximal segments and an expansion of the distal segments , while exogenous RA treatment induced proximal segment fates at the expense of distal fates . Loss of cdx function caused abrogation of distal segments ,

In [11]:
batch_size = 10
max_length = 1024 

def process_data(batch, tokenizer):
    inputs = tokenizer(batch["input_text"], padding="max_length", max_length=max_length, truncation=True)
    outputs = tokenizer(batch["target_text"], padding="max_length", max_length=max_length, truncation=True)
    batch["input_ids"] = inputs.input_ids
    batch["attention_mask"] = inputs.attention_mask
    labels = np.array([[-100 if token == tokenizer.pad_token_id else token for token in label]
                       for label in outputs.input_ids], dtype=np.int64)
    batch["labels"] = torch.tensor(labels)
    return batch

dataset = dataset.map(lambda batch: process_data(batch, tokenizer), batched=True, batch_size=batch_size, remove_columns=['input_text', 'target_text'])
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/24773 [00:00<?, ? examples/s]

Map: 100%|██████████| 24773/24773 [00:29<00:00, 843.00 examples/s]
Map: 100%|██████████| 1376/1376 [00:01<00:00, 822.20 examples/s]
Map: 100%|██████████| 1376/1376 [00:01<00:00, 848.19 examples/s]


In [13]:
training_args = Seq2SeqTrainingArguments(
    output_dir="/opt/dlami/nvme/no_fusion_layer/fine_tuned",
    per_device_eval_batch_size=8,
    per_device_train_batch_size=8,
    predict_with_generate=True,
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    do_train=True,
    do_eval=True,
    load_best_model_at_end=True,
    remove_unused_columns=True,
    save_strategy="epoch",
    save_total_limit=1,
    num_train_epochs=10,
    gradient_accumulation_steps=4,
    eval_accumulation_steps=4,
    learning_rate=2e-5,
    bf16=True,
    bf16_full_eval=True,
    optim="adamw_bnb_8bit",
    seed=42,
    report_to="none",
)

# Set Data Collator
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Train model
trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['val'],
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()
trained_model = trainer.model



Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [44]:
def test_model(model, tokenizer, max_length, test_dataset, num_samples=30):
    model.eval()  # Set the model to evaluation mode
    model.to(device)  # Ensure the model is on the correct device

    decoded_preds = []
    for i, batch in enumerate(tqdm(test_dataset, desc="Testing")):  # Use tqdm to wrap the dataset
        if i >= num_samples:  # Stop after processing num_samples batches
            break
        input_ids = batch["input_ids"].clone().detach().unsqueeze(0).to(device)
        attention_mask = batch["attention_mask"].clone().detach().unsqueeze(0).to(device)

        with torch.no_grad():  
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, max_length=512, 
                            num_beams=2, length_penalty=2, no_repeat_ngram_size=3, early_stopping=True)
            predicted_ids = torch.argmax(outputs.logits, dim=-1)
        
        output = tokenizer.decode(predicted_ids[0], skip_special_tokens=True)
        decoded_preds.append(output)
    return decoded_preds

# Set the device (e.g., "cuda" or "cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_preds = test_model(model, tokenizer, max_length, dataset['test'])

Testing:   0%|          | 0/1376 [00:00<?, ?it/s]There are adapters available but none are activated for the forward pass.
There are adapters available but none are activated for the forward pass.
There are adapters available but none are activated for the forward pass.
Testing:   0%|          | 3/1376 [00:00<01:02, 22.04it/s]There are adapters available but none are activated for the forward pass.
There are adapters available but none are activated for the forward pass.
There are adapters available but none are activated for the forward pass.
Testing:   0%|          | 6/1376 [00:00<01:05, 20.94it/s]There are adapters available but none are activated for the forward pass.
There are adapters available but none are activated for the forward pass.
There are adapters available but none are activated for the forward pass.
Testing:   1%|          | 9/1376 [00:00<01:05, 20.86it/s]There are adapters available but none are activated for the forward pass.
There are adapters available but none ar

In [45]:
# Print the first 30 predictions
for i, pred in enumerate(model_preds):
    print(pred)

Infal epidemics of influenza virus result in approximately36, 000 deaths annually in the United States. Current vaccines against influenza virus elicit an antibody response specific for the envelope glycoproteins. However, high mutation rates result in the emergence of new viral serotypes, which elude neutralization by preexisting antibodies. T lymphocytes have been reported to be capable of mediating heterosubtypic protection through recognition of internal, more conserved, influenza virus proteins. Here, we demonstrate using a recombinant influenza virus expressing the epitMV GP33-41 epitope that influenza virus-specific antibodies8+ cells cells and virus-specific non-neutralizing antibodies each are relatively ineffective at conferring heterosubtypic protection immunity alone. However, when combined virus-specific CD8+ cells and non-neutralizing antibodies cooperatively elicit robust protective immunity, synergistic improvement in protective immunity is dependent, at least in part, 

In [28]:
# with open('plos_ireneli1024_bart-large-finetuned_model_preds.txt', 'w') as file:
#   for string in model_preds:
#     file.write(string + "\n")

In [10]:
import torch

# Before clearing cache
print(torch.cuda.memory_allocated())  # Print current memory allocated

torch.cuda.empty_cache()  # Clear CUDA cache

# After clearing cache
print(torch.cuda.memory_allocated())  # Print current memory allocated again

0
0


Base Model finetuning

In [46]:
model = AutoModelForSeq2SeqLM.from_pretrained("mse30/bart-base-finetuned-pubmed", gradient_checkpointing=True, use_cache=False) 
tokenizer = AutoTokenizer.from_pretrained("mse30/bart-base-finetuned-pubmed")

In [47]:
model.config.num_beams = 2
model.config.max_length = 512
model.config.min_length = 100
model.config.length_penalty = 2.0
model.config.early_stopping = True
model.config.no_repeat_ngram_size = 3

In [52]:
from datasets import load_dataset, load_metric
rouge = load_metric("rouge")

def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = tokenizer.pad_token_id
    label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    rouge_output = rouge.compute(
        predictions=pred_str, references=label_str, rouge_types=["rouge2"]
    )["rouge2"].mid

    return {
        "rouge2_precision": round(rouge_output.precision, 4),
        "rouge2_recall": round(rouge_output.recall, 4),
        "rouge2_fmeasure": round(rouge_output.fmeasure, 4),
    }

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
