# Install necessary packages:

In [None]:
%%capture

#!pip install transformers==4.32.1
!pip install git+https://github.com/huggingface/transformers
!pip install datasets==2.14.4
!pip install peft==0.5.0
!pip install bitsandbytes==0.41.1
!pip install trl==0.7.1
!pip install rouge_score
!pip install accelerate

# Our imports:

In [None]:
import json
import re
import torch
import random
import time
import warnings

import pandas as pd

from datasets import load_dataset, load_metric
from huggingface_hub import login
from peft import PeftModel, PeftConfig, LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from functools import partial
from pprint import pprint
from trl import SFTTrainer


warnings.filterwarnings(action="ignore")

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Some important variables:

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "stabilityai/stablelm-3b-4e1t"
DATASET_NAME = "TweetSumm"
DATASET_ADDRESS = "Salesforce/dialogstudio"

# Load the dataset:

In [None]:
dataset = load_dataset(DATASET_ADDRESS, DATASET_NAME)
dataset

Downloading builder script:   0%|          | 0.00/18.3k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/6.63k [00:00<?, ?B/s]

DatasetDict({
    train: Dataset({
        features: ['original dialog id', 'new dialog id', 'dialog index', 'original dialog info', 'log', 'prompt'],
        num_rows: 879
    })
    validation: Dataset({
        features: ['original dialog id', 'new dialog id', 'dialog index', 'original dialog info', 'log', 'prompt'],
        num_rows: 110
    })
    test: Dataset({
        features: ['original dialog id', 'new dialog id', 'dialog index', 'original dialog info', 'log', 'prompt'],
        num_rows: 110
    })
})

# Helper functions for pre-processing and prompt generation:

In [None]:
def generate_inference_query(conversation):
    return f"Below is a conversation between a human and an AI agent. Summarize the following conversation.\n\n\
{conversation.strip()}\n\n\
Summary: ".strip()

def generate_training_query(conversation, summary):
    return f"Below is a conversation between a human and an AI agent. Summarize the following conversation.\n\n\
{conversation.strip()}\n\n\
Summary: {summary}".strip()

def preprocess_text(x):
    x = re.sub(r"http\S+", "", x)
    x = re.sub(r"@[^\s]+", "", x)
    x = re.sub(r"\s+", " ", x)
    x = re.sub(r"\^[^ ]+", "", x)
    return x

def create_conversation(x):
    text = ""

    for item in x["log"]:
        user = preprocess_text(item["user utterance"])
        text += f"user: {user.strip()}\n"

        agent = preprocess_text(item["system response"])
        text += f"agent: {agent.strip()}\n"

    return text

def generate_query(x, is_training):
    summaries = json.loads(x["original dialog info"])["summaries"]["abstractive_summaries"]

    summary = summaries[0]
    summary = " ".join(summary)

    conversation = create_conversation(x)

    return {
        "conversation": conversation,
        "summary": summary,
        "query": generate_training_query(conversation, summary) if is_training else generate_inference_query(conversation)
    }

def process_dataset(data, is_training):
    partial_generate_query = partial(generate_query, is_training=is_training)

    return data.shuffle(seed=42).map(partial_generate_query).remove_columns([
        "original dialog id",
        "new dialog id",
        "dialog index",
        "original dialog info",
        "log",
        "prompt",
        ]
    )

# Pre-process the dataset:

In [None]:
dataset["train"] = process_dataset(dataset["train"], is_training=True)
dataset["validation"] = process_dataset(dataset["validation"], is_training=True)
dataset["test"] = process_dataset(dataset["test"], is_training=False)

dataset

DatasetDict({
    train: Dataset({
        features: ['conversation', 'summary', 'query'],
        num_rows: 879
    })
    validation: Dataset({
        features: ['conversation', 'summary', 'query'],
        num_rows: 110
    })
    test: Dataset({
        features: ['conversation', 'summary', 'query'],
        num_rows: 110
    })
})

# An example from the dataset:

In [None]:
print(dataset["train"][0]["conversation"])

user: Do you have a plan to notify passengers well in advance of pilot related cancellations or just wait til the day before? Will you protect passengers on other airlines if flights are cancelled b/c of pilot shortages?
agent: We're planning to fly as scheduled, Shaun.
user: HOW ABOUT ANSWERING MY QUESTION. I'm asking if you do not get enough pilots to fly, which is a possibility, do you have a contingency plan in place on how to get customers to their destinations &amp; when will it be relayed to customers. THE DAY BEFORE WILL NOT BE ACCEPTABLE!
agent: Our team is working hard to avoid cancellations and you'll be notified if otherwise.
user: Your reading comprehension is terrible. WHEN WILL WE BE NOTIFIED? 3 hours b4 our flight so all other flights r sold out? Instead of Doug Parker making comments like "I don't think we're ever going to lose money again," he should b assuring customers we're getting home 4 XMAS
agent: As of now, flights are scheduled and we expect to avoid cancellat

In [None]:
print(dataset["train"][0]["summary"])

The customer is complaining that what will you do if there are no enough pilots to fly. The agent answered that as of now flights are scheduled and they have avoiding cancellations.


In [None]:
print(dataset["train"][0]["query"])

Below is a conversation between a human and an AI agent. Summarize the following conversation.

user: Do you have a plan to notify passengers well in advance of pilot related cancellations or just wait til the day before? Will you protect passengers on other airlines if flights are cancelled b/c of pilot shortages?
agent: We're planning to fly as scheduled, Shaun.
user: HOW ABOUT ANSWERING MY QUESTION. I'm asking if you do not get enough pilots to fly, which is a possibility, do you have a contingency plan in place on how to get customers to their destinations &amp; when will it be relayed to customers. THE DAY BEFORE WILL NOT BE ACCEPTABLE!
agent: Our team is working hard to avoid cancellations and you'll be notified if otherwise.
user: Your reading comprehension is terrible. WHEN WILL WE BE NOTIFIED? 3 hours b4 our flight so all other flights r sold out? Instead of Doug Parker making comments like "I don't think we're ever going to lose money again," he should b assuring customers we

In [None]:
print(dataset["test"][0]["conversation"])

user: looking to change my flight Friday, Oct 27. GRMSKV to DL4728 from SLC to ORD. Is that an option and what is the cost? Jess
agent: The difference in fare is $185.30. This would include all airport taxes and fees. The ticket is non-refundable changeable with a fee, *ALS and may result in additional fare collection for changes when making a future changes. *ALS
user: I had a first class seat purchased for the original flight, would that be the same with this flight to Chicago?
agent: Hello, Jess. That is the fare difference. You will have to call us at 1 800 221 1212 to make any changes. It is in First class. *TAY
user: thx
agent: Our pleasure. *ALS
user: Do I have to call or is there a means to do this online?
agent: You can call or you can login to your trip on our website to make changes. *TJE



In [None]:
print(dataset["test"][0]["summary"])

Customer is looking to change the flight on Friday Oct 27 is that an option and asking about cost. Agent replying that there is an difference in fare and this would include all airport taxes and fees and ticket is non refundable changeable with a fee.


In [None]:
print(dataset["test"][0]["query"])

Below is a conversation between a human and an AI agent. Summarize the following conversation.

user: looking to change my flight Friday, Oct 27. GRMSKV to DL4728 from SLC to ORD. Is that an option and what is the cost? Jess
agent: The difference in fare is $185.30. This would include all airport taxes and fees. The ticket is non-refundable changeable with a fee, *ALS and may result in additional fare collection for changes when making a future changes. *ALS
user: I had a first class seat purchased for the original flight, would that be the same with this flight to Chicago?
agent: Hello, Jess. That is the fare difference. You will have to call us at 1 800 221 1212 to make any changes. It is in First class. *TAY
user: thx
agent: Our pleasure. *ALS
user: Do I have to call or is there a means to do this online?
agent: You can call or you can login to your trip on our website to make changes. *TJE

Summary:


# Load the tokenizer and the model:

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    use_safetensors=True,
    quantization_config=bnb_config,
    trust_remote_code=True,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model.config.use_cache = False

model

config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

configuration_stablelm_epoch.py:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/stabilityai/stablelm-3b-4e1t:
- configuration_stablelm_epoch.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_stablelm_epoch.py:   0%|          | 0.00/38.3k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/stabilityai/stablelm-3b-4e1t:
- modeling_stablelm_epoch.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/5.59G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/264 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


StableLMEpochForCausalLM(
  (model): StableLMEpochModel(
    (embed_tokens): Embedding(50304, 2560)
    (layers): ModuleList(
      (0-31): 32 x DecoderLayer(
        (self_attn): Attention(
          (q_proj): Linear4bit(in_features=2560, out_features=2560, bias=False)
          (k_proj): Linear4bit(in_features=2560, out_features=2560, bias=False)
          (v_proj): Linear4bit(in_features=2560, out_features=2560, bias=False)
          (o_proj): Linear4bit(in_features=2560, out_features=2560, bias=False)
          (rotary_emb): RotaryEmbedding()
        )
        (mlp): MLP(
          (gate_proj): Linear4bit(in_features=2560, out_features=6912, bias=False)
          (up_proj): Linear4bit(in_features=2560, out_features=6912, bias=False)
          (down_proj): Linear4bit(in_features=6912, out_features=2560, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((2560

# First step:

## First step - In-context learning, zero-shot:

In [None]:
random.seed(106)

random_sample = random.choice(dataset["test"])

input_ids = tokenizer(random_sample["query"], return_tensors="pt").to(DEVICE)

with torch.no_grad():
    output = model.generate(**input_ids, max_new_tokens=90)

model_summary = tokenizer.decode(output[0], skip_special_tokens=True)

print("*" * 66)
print("Original Conversation:\n")
print(random_sample["conversation"])

print("\n")
print("*" * 66)
print("Baseline Summary:\n")
print(random_sample["summary"])

print("\n")
print("*" * 66)
print("Model Summary:\n")
print(model_summary)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


******************************************************************
Original Conversation:

user: doing the 5K plan on the run app &amp; I’ve consistently done better/longer than the suggestions. This morning it asked to update plan using my new (improved) times &amp; it cut next week’s runs by more than half &amp; slowed my times even more. Why?
agent: Let's figure this out. Did you complete all workouts in your previous week in your Coach plan?
user: Yes. I haven’t missed a workout. For example, today started week 5, I originally had a recovery run of 4.25 miles that was changed to 1.5 miles w/ a 2 min slower time. The mileage total for this week dropped from 12.5 miles to 6.25 after the app updated based on my new data.
agent: Got it. Can you tell us what device, operating system and version of the app you're using?
user: iPhone 7+ Version 11.1.2 NRC App Version: 5.10.0 Thanks!
agent: Thanks for that information. Mind passing us screenshots of what you're seeing?
user: Sorry, screens

### First step - In-context learning, zero-shot - Rouge values:

In [None]:
predictions = []
labels = []

for i in range(len(dataset["test"])):
    inputs = tokenizer(dataset["test"][i]["query"], return_tensors="pt").to(DEVICE)
    inputs_length = len(inputs["input_ids"][0])

    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=90)

    output = tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

    predictions.append(output)
    labels.append(dataset["test"][i]["summary"])

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for o

In [None]:
rouge_metric = load_metric("rouge")
rouge_output = rouge_metric.compute(predictions=predictions, references=labels, rouge_types=None)
rouge_output = {key: round(value.mid.fmeasure * 100, 2) for key, value in rouge_output.items()}
rouge_output

{'rouge1': 17.13, 'rouge2': 3.43, 'rougeL': 15.35, 'rougeLsum': 12.67}

## First step - In-context learning, one-shot:

In [None]:
random.seed(106)

random_idx = random.sample(range(0, len(dataset["train"])), 1)

prompt = ""

prompt += dataset["train"][random_idx[0]]["query"]

prompt += "\n\n\n" + random_sample["query"]

input_ids = tokenizer(prompt, return_tensors="pt").to(DEVICE)

with torch.no_grad():
    output = model.generate(**input_ids, max_new_tokens=50)

model_summary = tokenizer.decode(output[0], skip_special_tokens=True)

print("*" * 66)
print("Original Conversation:\n")
print(random_sample["conversation"])

print("\n")
print("*" * 66)
print("Baseline Summary:\n")
print(random_sample["summary"])

print("\n")
print("*" * 66)
print("Model Summary:\n")
print(model_summary)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


******************************************************************
Original Conversation:

user: doing the 5K plan on the run app &amp; I’ve consistently done better/longer than the suggestions. This morning it asked to update plan using my new (improved) times &amp; it cut next week’s runs by more than half &amp; slowed my times even more. Why?
agent: Let's figure this out. Did you complete all workouts in your previous week in your Coach plan?
user: Yes. I haven’t missed a workout. For example, today started week 5, I originally had a recovery run of 4.25 miles that was changed to 1.5 miles w/ a 2 min slower time. The mileage total for this week dropped from 12.5 miles to 6.25 after the app updated based on my new data.
agent: Got it. Can you tell us what device, operating system and version of the app you're using?
user: iPhone 7+ Version 11.1.2 NRC App Version: 5.10.0 Thanks!
agent: Thanks for that information. Mind passing us screenshots of what you're seeing?
user: Sorry, screens

### First step - In-context learning, one-shot - Rouge values:

In [None]:
predictions = []
labels = []

for i in range(len(dataset["test"])):
    random_idx = random.sample(range(0, len(dataset["train"])), 1)

    prompt = ""

    prompt += dataset["train"][random_idx[0]]["query"]

    prompt += "\n\n\n" + dataset["test"][i]["query"]

    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    inputs_length = len(inputs["input_ids"][0])

    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=50)

    output = tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

    predictions.append(output)
    labels.append(dataset["test"][i]["summary"])

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for o

In [None]:
rouge_metric = load_metric("rouge")
rouge_output = rouge_metric.compute(predictions=predictions, references=labels, rouge_types=None)
rouge_output = {key: round(value.mid.fmeasure * 100, 2) for key, value in rouge_output.items()}
rouge_output

{'rouge1': 27.39, 'rouge2': 6.17, 'rougeL': 21.03, 'rougeLsum': 21.78}

# Second step:

## LoRA == 16:

In [None]:
# "target_modules" values are inspired from the link below
# https://stackoverflow.com/questions/76768226/target-modules-for-applying-peft-lora-on-different-models

peft_config = LoraConfig(
    r=16,
    lora_alpha=64,
    lora_dropout=0.1,
    target_modules=["q_proj", "up_proj", "o_proj", "k_proj", "down_proj", "gate_proj", "v_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
# only to get percentage of trainable parameters:

get_peft_model(model, peft_config).print_trainable_parameters()

trainable params: 25,034,752 || all params: 2,820,477,952 || trainable%: 0.8876067257412122


In [None]:
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj", "gate_proj"] # lora is applied to these in each layer
lora_rank = 16

peft_model = get_peft_model(model, peft_config)

hidden_size = peft_model.config.hidden_size
num_hidden_layers = peft_model.config.num_hidden_layers

# in each layer, lora is applied to seven modules:
added_parameters_per_layer = len(target_modules) * hidden_size * lora_rank
layers_modified = len(target_modules) * num_hidden_layers

total_added_parameters = added_parameters_per_layer * layers_modified
print(f"Total added parameters are: {total_added_parameters}")

Total added parameters are: 64225280


In [None]:
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"] # lora is applied to these in each layer
lora_rank = 16

peft_model = get_peft_model(model, peft_config)

hidden_size = peft_model.config.hidden_size
num_hidden_layers = peft_model.config.num_hidden_layers

# in each layer, lora is applied to four modules:
added_parameters_per_layer = len(target_modules) * hidden_size * lora_rank
layers_modified = len(target_modules) * num_hidden_layers

total_added_parameters = added_parameters_per_layer * layers_modified
print(f"Total added parameters are: {total_added_parameters}")

Total added parameters are: 20971520


In [None]:
training_arguments = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    group_by_length=True,
    save_safetensors=True,
    output_dir="/content",
    logging_steps=1,
    learning_rate=1e-4,
    lr_scheduler_type="cosine",
    optim="paged_adamw_32bit",
    max_grad_norm=0.3,
    eval_steps=0.2,
    warmup_ratio=0.05,
    num_train_epochs=3,
    save_strategy="epoch",
    evaluation_strategy="steps",
    seed=42,
    fp16=True,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    peft_config=peft_config,
    args=training_arguments,
    tokenizer=tokenizer,
    dataset_text_field="query",
    max_seq_length=4096,
)

In [None]:
trainer.train()

You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
33,2.1292,2.175259
66,1.8907,2.145658
99,1.9236,2.131776
132,1.746,2.151092
165,1.6841,2.150991


TrainOutput(global_step=165, training_loss=2.0180282867316044, metrics={'train_runtime': 1871.7244, 'train_samples_per_second': 1.409, 'train_steps_per_second': 0.088, 'total_flos': 7168525457227776.0, 'train_loss': 2.0180282867316044, 'epoch': 3.0})

In [None]:
trainer.model.save_pretrained("./peft-dialogue-summary-lora-rank-16")

### Second step - LoRA == 16 - In-context learning, zero-shot:

In [None]:
random.seed(106)

random_sample = random.choice(dataset["test"])

input_ids = tokenizer(random_sample["query"], return_tensors="pt").to(DEVICE)

with torch.no_grad():
    output = trainer.model.generate(**input_ids, max_new_tokens=90)

model_summary = tokenizer.decode(output[0], skip_special_tokens=True)

print("*" * 66)
print("Original Conversation:\n")
print(random_sample["conversation"])

print("\n")
print("*" * 66)
print("Baseline Summary:\n")
print(random_sample["summary"])

print("\n")
print("*" * 66)
print("Model Summary:\n")
print(model_summary)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


******************************************************************
Original Conversation:

user: doing the 5K plan on the run app &amp; I’ve consistently done better/longer than the suggestions. This morning it asked to update plan using my new (improved) times &amp; it cut next week’s runs by more than half &amp; slowed my times even more. Why?
agent: Let's figure this out. Did you complete all workouts in your previous week in your Coach plan?
user: Yes. I haven’t missed a workout. For example, today started week 5, I originally had a recovery run of 4.25 miles that was changed to 1.5 miles w/ a 2 min slower time. The mileage total for this week dropped from 12.5 miles to 6.25 after the app updated based on my new data.
agent: Got it. Can you tell us what device, operating system and version of the app you're using?
user: iPhone 7+ Version 11.1.2 NRC App Version: 5.10.0 Thanks!
agent: Thanks for that information. Mind passing us screenshots of what you're seeing?
user: Sorry, screens

### Second step - LoRA == 16 - In-context learning, one-shot:

In [None]:
random.seed(106)

random_idx = random.sample(range(0, len(dataset["train"])), 1)

prompt = ""

prompt += dataset["train"][random_idx[0]]["query"]

prompt += "\n\n\n" + random_sample["query"]

input_ids = tokenizer(prompt, return_tensors="pt").to(DEVICE)

with torch.no_grad():
    output = trainer.model.generate(**input_ids, max_new_tokens=50)

model_summary = tokenizer.decode(output[0], skip_special_tokens=True)

print("*" * 66)
print("Original Conversation:\n")
print(random_sample["conversation"])

print("\n")
print("*" * 66)
print("Baseline Summary:\n")
print(random_sample["summary"])

print("\n")
print("*" * 66)
print("Model Summary:\n")
print(model_summary)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


******************************************************************
Original Conversation:

user: doing the 5K plan on the run app &amp; I’ve consistently done better/longer than the suggestions. This morning it asked to update plan using my new (improved) times &amp; it cut next week’s runs by more than half &amp; slowed my times even more. Why?
agent: Let's figure this out. Did you complete all workouts in your previous week in your Coach plan?
user: Yes. I haven’t missed a workout. For example, today started week 5, I originally had a recovery run of 4.25 miles that was changed to 1.5 miles w/ a 2 min slower time. The mileage total for this week dropped from 12.5 miles to 6.25 after the app updated based on my new data.
agent: Got it. Can you tell us what device, operating system and version of the app you're using?
user: iPhone 7+ Version 11.1.2 NRC App Version: 5.10.0 Thanks!
agent: Thanks for that information. Mind passing us screenshots of what you're seeing?
user: Sorry, screens

### Second step - LoRA == 16 - In-context learning, zero-shot - Rouge values:

In [None]:
predictions = []
labels = []

for i in range(len(dataset["test"])):
    inputs = tokenizer(dataset["test"][i]["query"], return_tensors="pt").to(DEVICE)
    inputs_length = len(inputs["input_ids"][0])

    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=90)

    output = tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

    predictions.append(output)
    labels.append(dataset["test"][i]["summary"])

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for o

In [None]:
rouge_metric = load_metric("rouge")
rouge_output = rouge_metric.compute(predictions=predictions, references=labels, rouge_types=None)
rouge_output = {key: round(value.mid.fmeasure * 100, 2) for key, value in rouge_output.items()}
rouge_output

{'rouge1': 29.94, 'rouge2': 11.66, 'rougeL': 24.79, 'rougeLsum': 23.29}

## LoRA == 32:

In [None]:
# "target_modules" values are inspired from the link below
# https://stackoverflow.com/questions/76768226/target-modules-for-applying-peft-lora-on-different-models

peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.1,
    target_modules=["q_proj", "up_proj", "o_proj", "k_proj", "down_proj", "gate_proj", "v_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
# only to get percentage of trainable parameters:

get_peft_model(model, peft_config).print_trainable_parameters()

trainable params: 50,069,504 || all params: 2,845,512,704 || trainable%: 1.7595951664392921


In [None]:
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj", "gate_proj"] # lora is applied to these in each layer
lora_rank = 32

peft_model = get_peft_model(model, peft_config)

hidden_size = peft_model.config.hidden_size
num_hidden_layers = peft_model.config.num_hidden_layers

# in each layer, lora is applied to seven modules:
added_parameters_per_layer = len(target_modules) * hidden_size * lora_rank
layers_modified = len(target_modules) * num_hidden_layers

total_added_parameters = added_parameters_per_layer * layers_modified
print(f"Total added parameters are: {total_added_parameters}")

Total added parameters are: 128450560


In [None]:
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"] # lora is applied to these in each layer
lora_rank = 32

peft_model = get_peft_model(model, peft_config)

hidden_size = peft_model.config.hidden_size
num_hidden_layers = peft_model.config.num_hidden_layers

# in each layer, lora is applied to four modules:
added_parameters_per_layer = len(target_modules) * hidden_size * lora_rank
layers_modified = len(target_modules) * num_hidden_layers

total_added_parameters = added_parameters_per_layer * layers_modified
print(f"Total added parameters are: {total_added_parameters}")

Total added parameters are: 41943040


In [None]:
training_arguments = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    group_by_length=True,
    save_safetensors=True,
    output_dir="/content",
    logging_steps=1,
    learning_rate=1e-4,
    lr_scheduler_type="cosine",
    optim="paged_adamw_32bit",
    max_grad_norm=0.3,
    eval_steps=0.2,
    warmup_ratio=0.05,
    num_train_epochs=3,
    save_strategy="epoch",
    evaluation_strategy="steps",
    seed=42,
    fp16=True,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    peft_config=peft_config,
    args=training_arguments,
    tokenizer=tokenizer,
    dataset_text_field="query",
    max_seq_length=4096,
)

In [None]:
trainer.train()

Step,Training Loss,Validation Loss
33,2.1302,2.175769
66,1.8871,2.145559
99,1.9196,2.13068
132,1.7374,2.150277
165,1.7016,2.15002


TrainOutput(global_step=165, training_loss=2.0175931229735866, metrics={'train_runtime': 1887.8369, 'train_samples_per_second': 1.397, 'train_steps_per_second': 0.087, 'total_flos': 7294647733911552.0, 'train_loss': 2.0175931229735866, 'epoch': 3.0})

In [None]:
trainer.model.save_pretrained("./peft-dialogue-summary-lora-rank-32")

### Second step - LoRA == 32 - In-context learning, zero-shot:

In [None]:
random.seed(106)

random_sample = random.choice(dataset["test"])

input_ids = tokenizer(random_sample["query"], return_tensors="pt").to(DEVICE)

with torch.no_grad():
    output = model.generate(**input_ids, max_new_tokens=90)

model_summary = tokenizer.decode(output[0], skip_special_tokens=True)

print("*" * 66)
print("Original Conversation:\n")
print(random_sample["conversation"])

print("\n")
print("*" * 66)
print("Baseline Summary:\n")
print(random_sample["summary"])

print("\n")
print("*" * 66)
print("Model Summary:\n")
print(model_summary)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


******************************************************************
Original Conversation:

user: doing the 5K plan on the run app &amp; I’ve consistently done better/longer than the suggestions. This morning it asked to update plan using my new (improved) times &amp; it cut next week’s runs by more than half &amp; slowed my times even more. Why?
agent: Let's figure this out. Did you complete all workouts in your previous week in your Coach plan?
user: Yes. I haven’t missed a workout. For example, today started week 5, I originally had a recovery run of 4.25 miles that was changed to 1.5 miles w/ a 2 min slower time. The mileage total for this week dropped from 12.5 miles to 6.25 after the app updated based on my new data.
agent: Got it. Can you tell us what device, operating system and version of the app you're using?
user: iPhone 7+ Version 11.1.2 NRC App Version: 5.10.0 Thanks!
agent: Thanks for that information. Mind passing us screenshots of what you're seeing?
user: Sorry, screens

### Second step - LoRA == 32 - In-context learning, one-shot:

In [None]:
random.seed(106)

random_idx = random.sample(range(0, len(dataset["train"])), 1)

prompt = ""

prompt += dataset["train"][random_idx[0]]["query"]

prompt += "\n\n\n" + random_sample["query"]

input_ids = tokenizer(prompt, return_tensors="pt").to(DEVICE)

with torch.no_grad():
    output = model.generate(**input_ids, max_new_tokens=50)

model_summary = tokenizer.decode(output[0], skip_special_tokens=True)

print("*" * 66)
print("Original Conversation:\n")
print(random_sample["conversation"])

print("\n")
print("*" * 66)
print("Baseline Summary:\n")
print(random_sample["summary"])

print("\n")
print("*" * 66)
print("Model Summary:\n")
print(model_summary)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


******************************************************************
Original Conversation:

user: doing the 5K plan on the run app &amp; I’ve consistently done better/longer than the suggestions. This morning it asked to update plan using my new (improved) times &amp; it cut next week’s runs by more than half &amp; slowed my times even more. Why?
agent: Let's figure this out. Did you complete all workouts in your previous week in your Coach plan?
user: Yes. I haven’t missed a workout. For example, today started week 5, I originally had a recovery run of 4.25 miles that was changed to 1.5 miles w/ a 2 min slower time. The mileage total for this week dropped from 12.5 miles to 6.25 after the app updated based on my new data.
agent: Got it. Can you tell us what device, operating system and version of the app you're using?
user: iPhone 7+ Version 11.1.2 NRC App Version: 5.10.0 Thanks!
agent: Thanks for that information. Mind passing us screenshots of what you're seeing?
user: Sorry, screens

### Second step - LoRA == 32 - In-context learning, zero-shot - Rouge values:

In [None]:
predictions = []
labels = []

for i in range(len(dataset["test"])):
    inputs = tokenizer(dataset["test"][i]["query"], return_tensors="pt").to(DEVICE)
    inputs_length = len(inputs["input_ids"][0])

    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=90)

    output = tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

    predictions.append(output)
    labels.append(dataset["test"][i]["summary"])

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for o

In [None]:
rouge_metric = load_metric("rouge")
rouge_output = rouge_metric.compute(predictions=predictions, references=labels, rouge_types=None)
rouge_output = {key: round(value.mid.fmeasure * 100, 2) for key, value in rouge_output.items()}
rouge_output

{'rouge1': 30.0, 'rouge2': 12.2, 'rougeL': 24.84, 'rougeLsum': 23.71}