In [None]:
# First, install the necessary libraries.
%pip install peft==0.4.0 datasets transformers




In [None]:

# Import necessary modules.
import os
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
import transformers
from peft import LoraConfig, get_peft_model


In [None]:


# Model and Tokenizer loading.
model_name = "gpt2"  # GPT-2 base model
tokenizer = AutoTokenizer.from_pretrained(model_name)
foundation_model = AutoModelForCausalLM.from_pretrained(model_name)

# Load CNN/Daily Mail dataset (which contains both articles and highlights).
data = load_dataset("cnn_dailymail", "3.0.0", cache_dir="./cache")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/15.6k [00:00<?, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [None]:
# Select only a subset of the dataset (e.g., first 500 samples for faster experimentation).
subset_data = data["train"].select(range(1000))

# Set the pad_token to eos_token (GPT-2 doesn’t have a pad_token by default).
tokenizer.pad_token = tokenizer.eos_token  # Use the eos_token as the pad_token

# Tokenize both the article (input) and highlights (target) for the summarization task.
def preprocess_function(examples):
    # Tokenize article and highlight
    model_inputs = tokenizer(examples["article"], max_length=512, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["highlights"], max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply tokenization to the subset of data (only 500 samples).
tokenized_data = subset_data.map(preprocess_function, batched=True)

# Take a subset of the data for faster training/testing (you can adjust this as needed).
train_sample = tokenized_data # Using 500 samples for faster experimentation

# LoRA configuration.
lora_config = LoraConfig(
    r=4,  # Rank of the low-rank matrices
    lora_alpha=32,  # Scaling factor for the LoRA update
    target_modules=["c_attn"],  # The attention layer to apply LoRA
    lora_dropout=0.1,  # Dropout applied to the LoRA layers
    bias="none",  # Not training the bias terms
    task_type="CAUSAL_LM"  # Language modeling task
)


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]



In [None]:

# Apply LoRA to the model.
peft_model = get_peft_model(foundation_model, lora_config)

# Print the trainable parameters to verify LoRA is applied.
peft_model.print_trainable_parameters()

# Define the output directory where the fine-tuned model will be saved.
output_directory = "./peft_gpt2_cnn_dailymail"

# Define the training arguments.
training_args = TrainingArguments(
    output_dir=output_directory,
    evaluation_strategy="no",  # Evaluation during training at every x steps
    eval_steps=50,
    logging_steps=50,
    save_steps=50,
    learning_rate=3e-4,  # Learning rate suitable for LoRA fine-tuning
    per_device_train_batch_size=4,
    num_train_epochs=3,  # Adjust based on your resources
    weight_decay=0.01,
    save_total_limit=2,
    report_to="none",  # Disables reporting to services like WandB
    no_cuda=False  # Set to True if you're not using a GPU
)


trainable params: 147,456 || all params: 124,587,264 || trainable%: 0.11835559692522023




In [None]:

# Initialize the Trainer.
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_sample,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

# Start training the LoRA-adapted GPT-2 model.
trainer.train()



Step,Training Loss
50,3.1559
100,3.0738
150,3.0575
200,3.003
250,3.0446
300,3.0378
350,3.0256
400,3.0156
450,2.9892
500,2.9816


KeyboardInterrupt: 

In [19]:
# Save the fine-tuned model.
peft_model.save_pretrained(output_directory)

# Load the fine-tuned model for inference.
from peft import PeftModel

# Load the model for inference.
loaded_model = PeftModel.from_pretrained(foundation_model, output_directory, is_trainable=False)
# Move the model to CUDA (GPU).
loaded_model = loaded_model.to("cuda")

# Input text for summarization
input_text = '''Ensure that the submitted assignment is your original work. Please do
not copy any part from any source, including your friends, seniors, or the
internet. If any such attempt is caught, serious actions, including an F
grade in the course, are possible.'''
inputs = tokenizer(input_text, return_tensors="pt")

# Move the input tensors to the same device (CUDA).
inputs = {key: value.to("cuda") for key, value in inputs.items()}

# Generate a summary using the fine-tuned model.
outputs = loaded_model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"],
    max_new_tokens=50,  # Limit on new tokens to generate
    eos_token_id=tokenizer.eos_token_id
)

# Decode the output and print the generated summary.
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Ensure that the submitted assignment is your original work. Please do
not copy any part from any source, including your friends, seniors, or the
internet. If any such attempt is caught, serious actions, including an F
grade in the course, are possible. If you are not sure whether you have been approved for a assignment, please contact

the Fgrade Program Office at (202) 522-5200.

If you are not sure whether you have been approved for a assignment, please


In [21]:
!pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=4264dafff6a0866ea4f9d4ff1044af846658fd43644787194f4730d639b065d6
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [24]:
from peft import PeftModel
from rouge_score import rouge_scorer

# Save the fine-tuned model.
peft_model.save_pretrained(output_directory)

# Load the fine-tuned model for inference.
loaded_model = PeftModel.from_pretrained(foundation_model, output_directory, is_trainable=False)
# Move the model to CUDA (GPU).
loaded_model = loaded_model.to("cuda")

# Input text for summarization
input_text = '''Ensure that the submitted assignment is your original work. Please do
not copy any part from any source, including your friends, seniors, or the
internet. If any such attempt is caught, serious actions, including an F
grade in the course, are possible.'''

# Reference summary (ground truth)
reference_summary = '''Your submitted assignment must be original work. Do not copy from any sources, including peers or online. Serious penalties, including an F grade, will apply for any violations.'''

# Tokenization
inputs = tokenizer(input_text, return_tensors="pt")
# Move the input tensors to the same device (CUDA).
inputs = {key: value.to("cuda") for key, value in inputs.items()}

# Generate a summary using the fine-tuned model.
outputs = loaded_model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"],
    max_new_tokens=50,  # Limit on new tokens to generate
    eos_token_id=tokenizer.eos_token_id
)

# Decode the output and print the generated summary.
generated_summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Generated Summary:", generated_summary)

# Calculate ROUGE scores
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scores = scorer.score(reference_summary, generated_summary)

# Print the ROUGE scores
print("ROUGE Scores:")
print("ROUGE-1:", scores['rouge1'])
print("ROUGE-2:", scores['rouge2'])
print("ROUGE-L:", scores['rougeL'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated Summary: Ensure that the submitted assignment is your original work. Please do
not copy any part from any source, including your friends, seniors, or the
internet. If any such attempt is caught, serious actions, including an F
grade in the course, are possible. If you are not sure whether you have been approved for a assignment, please contact

the Fgrade Program Office at (202) 522-5200.

If you are not sure whether you have been approved for a assignment, please
ROUGE Scores:
ROUGE-1: Score(precision=0.25316455696202533, recall=0.7142857142857143, fmeasure=0.3738317757009346)
ROUGE-2: Score(precision=0.1282051282051282, recall=0.37037037037037035, fmeasure=0.19047619047619047)
ROUGE-L: Score(precision=0.22784810126582278, recall=0.6428571428571429, fmeasure=0.33644859813084116)


In [None]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
from datasets import load_dataset

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = AutoModelForCausalLM.from_pretrained(
    "gpt2",
    device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# FREEZE WEIGHTS
for param in model.parameters():
    param.requires_grad = False

# LoRA configuration
config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)

# LOAD AND STRUCTURE DATA
dataset = load_dataset("cnn_dailymail", '3.0.0')

# Calculate 1% of the dataset for each split
data = {split: ds.shuffle(seed=42).select(range(int(len(ds) * 0.01))) for split, ds in dataset.items()}
# Preprocess function to combine article and highlights
def preprocess_function(examples):
    inputs = tokenizer(examples['article'], truncation=True, padding="max_length", return_tensors="pt")
    labels = tokenizer(examples['highlights'], truncation=True, padding="max_length", return_tensors="pt").input_ids
    return {
        'input_ids': inputs.input_ids.squeeze(), 
        'attention_mask': inputs.attention_mask.squeeze(),
        'labels': labels.squeeze()
    }

# Apply preprocessing to each split
data = {split: dataset.map(preprocess_function, batched=True, remove_columns=["article", "highlights"])
        for split, dataset in data.items()}

# Print trainable parameters
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}%"
    )

print_trainable_parameters(model)

# TRAINING
trainer = transformers.Trainer(
    model=model,
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=500,
        learning_rate=2e-4,
        logging_steps=1,
        output_dir='outputs',
        auto_find_batch_size=True
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

model.config.use_cache = False

trainer.train()

torch.save(model.state_dict(), 'lora.pt')
