In [1]:
!pip install accelerate datasets evaluate rouge_score peft -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone


In [2]:
import torch
import evaluate

from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq, Seq2SeqTrainer
from peft import LoraConfig, TaskType, get_peft_model

import pandas as pd
import numpy as np

# Model/Dataset Initial Setup

In [3]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model_name = 'google/flan-t5-base'

dataset = load_dataset('rubertmi00/HealthCoachDataset')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/430 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/744k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/207k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/266 [00:00<?, ? examples/s]

In [4]:
untuned_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

# Untuned Model Testing

In [5]:
index = 3

input = dataset['test'][index]['input']
output = dataset['test'][index]['output']

print(f'INPUT PROMPT:\n{input}')
print('***********************************')
print(f'EXPECTED RESPONSE:\n{output}')

raw_input_tok_ids = tokenizer(input, return_tensors='pt').input_ids.to(device)
raw_output = untuned_model.generate(raw_input_tok_ids, max_new_tokens=500, do_sample=True)[0]
untuned_model_raw_text = tokenizer.decode(raw_output, skip_special_tokens=True)

print('***********************************')
print(f'MODEL GENERATION - RAW INPUT:\n{untuned_model_raw_text}')

prompt = f"Give advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\n{input}\nAdvice: "
prompt_input_tok_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
prompt_output = untuned_model.generate(prompt_input_tok_ids, max_new_tokens=500, do_sample=True)[0]
untuned_model_prompt_text = tokenizer.decode(prompt_output, skip_special_tokens=True)

print('***********************************')
print(f'MODEL GENERATION - PROMPTED INPUT:\n{untuned_model_prompt_text}\n')

INPUT PROMPT:
I have a bad habit of thinking and worrying about what others may think of me. I really want to stop it and enjoy things.
***********************************
EXPECTED RESPONSE:
Thinking about what others think of us is a natural tendency. Human beings are social creatures and rely on feedback from the outside world of people, places, and things to let us know more about ourselves. We also have an inner voice, dialogue, conscience, etc. that helps us to determine the path for us. Often, when someone is worried about the external feedback, and focuses on this as the sole source of information, it can create a dependency that can become problematic. Balance is key to so many things in life. Your own voice is powerful and has strength to provide you the enjoyment you seek. Don't discount it and rely only on the voices of others.
***********************************
MODEL GENERATION - RAW INPUT:
When I get my act together, nobody is going to think of me.
***********************

In [6]:
expected_outputs = []
raw_outputs = []
prompted_outputs = []

for i in range(0,49):
  input = dataset['test'][i]['input']
  output = dataset['test'][i]['output']

  expected_outputs += [output]

  input_tok_ids = tokenizer(input, return_tensors='pt').input_ids.to(device)
  raw_output = untuned_model.generate(input_tok_ids, max_new_tokens=500, do_sample=True)[0]
  raw_output_text = tokenizer.decode(raw_output, skip_special_tokens=True)
  raw_outputs += [raw_output_text]

  prompt = f"Give advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\n{input}\nAdvice: "
  prompt_tok_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
  prompt_output = untuned_model.generate(prompt_tok_ids, max_new_tokens=500, do_sample=True)[0]
  prompt_output_text = tokenizer.decode(prompt_output, skip_special_tokens=True)
  prompted_outputs += [prompt_output_text]


rouge = evaluate.load('rouge')
raw_results = rouge.compute(
    predictions=raw_outputs,
    references=expected_outputs[0:len(raw_outputs)],
)

print('RAW INPUTS:')
print(raw_results)
print('***********************************')

prompted_results = rouge.compute(
    predictions=prompted_outputs,
    references=expected_outputs[0:len(prompted_outputs)],
)

print('PROMPTED INPUTS:')
print(prompted_results)
print('***********************************')

print("Absolute percentage improvement of the prompted inputs over the raw inputs:")
for key in prompted_results:
    improvement = prompted_results[key] - raw_results[key]
    print(f'{key}: {improvement*100:.2f}%')

Token indices sequence length is longer than the specified maximum sequence length for this model (695 > 512). Running this sequence through the model will result in indexing errors


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

RAW INPUTS:
{'rouge1': 0.057905811933130436, 'rouge2': 0.0075887110553299955, 'rougeL': 0.04171361544043344, 'rougeLsum': 0.04334077877578115}
***********************************
PROMPTED INPUTS:
{'rouge1': 0.06849481786524503, 'rouge2': 0.006325628872500641, 'rougeL': 0.05050340950036715, 'rougeLsum': 0.05212960701083329}
***********************************
Absolute percentage improvement of the prompted inputs over the raw inputs:
rouge1: 1.06%
rouge2: -0.13%
rougeL: 0.88%
rougeLsum: 0.88%


# Fine Tune the Model

In [7]:
def add_prompt(record):
    record = "Give advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\n" + record + "\nAdvice: "
    return record

def tokenize(examples):
    examples['input'] = [add_prompt(record) for record in examples['input']]

    tokenized_inputs = tokenizer(examples['input'], padding=True, truncation=True, return_tensors="pt").to(device)
    tokenized_outputs = tokenizer(examples['output'], padding=True, truncation=True, return_tensors="pt").to(device)

    model_inputs = {
      'input_ids' : tokenized_inputs['input_ids'].to(device),
      'labels': tokenized_outputs['input_ids'].to(device)
    }

    return model_inputs

tokenized_dataset = dataset.map(tokenize, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/266 [00:00<?, ? examples/s]

In [8]:
lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    r=32,
    lora_alpha=32,
    lora_dropout=0.1
)

peft_model = get_peft_model(untuned_model, lora_config)

training_args_lora = Seq2SeqTrainingArguments(
    output_dir = "rubertmi00/flan-t5-base-healthcoach",
    num_train_epochs = 2,
    learning_rate = 1e-3
)

peft_trainer = Seq2SeqTrainer(
    peft_model,
    training_args_lora,
    train_dataset = tokenized_dataset['train'],
    data_collator = DataCollatorForSeq2Seq(tokenizer),
    tokenizer = tokenizer
)

peft_trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss


TrainOutput(global_step=250, training_loss=2.73746875, metrics={'train_runtime': 581.1969, 'train_samples_per_second': 3.441, 'train_steps_per_second': 0.43, 'total_flos': 1391257976832000.0, 'train_loss': 2.73746875, 'epoch': 2.0})

In [9]:
model_path = './flan-t5-base-healthcoach'

peft_model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

('./flan-t5-base-healthcoach/tokenizer_config.json',
 './flan-t5-base-healthcoach/special_tokens_map.json',
 './flan-t5-base-healthcoach/spiece.model',
 './flan-t5-base-healthcoach/added_tokens.json',
 './flan-t5-base-healthcoach/tokenizer.json')

In [24]:
# peft_model.push_to_hub('flan-t5-healthcoach-base')

# Fine Tuned Model Testing

In [10]:
untuned_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16)
peft_model = AutoModelForSeq2SeqLM.from_pretrained('./flan-t5-base-healthcoach', device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [11]:
index = 3

input = dataset['test'][index]['input']
output = dataset['test'][index]['output']

print(f'INPUT PROMPT:\n{input}')
print('***********************************')
print(f'EXPECTED RESPONSE:\n{output}')

prompt = f"Give advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\n{input}\nAdvice: "
input_tok_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
untuned_output = untuned_model.generate(input_tok_ids, max_new_tokens=500, do_sample=True)[0]
untuned_model_output_text = tokenizer.decode(untuned_output, skip_special_tokens=True)

print('***********************************')
print(f'MODEL GENERATION - UNTUNED:\n{untuned_model_output_text}')

finetuned_output = peft_model.generate(input_tok_ids, max_new_tokens=500, do_sample=True)[0]
finetuned_model_output_text = tokenizer.decode(finetuned_output, skip_special_tokens=True)

print('***********************************')
print(f'MODEL GENERATION - FINETUNED INPUT:\n{finetuned_model_output_text}\n')

INPUT PROMPT:
I have a bad habit of thinking and worrying about what others may think of me. I really want to stop it and enjoy things.
***********************************
EXPECTED RESPONSE:
Thinking about what others think of us is a natural tendency. Human beings are social creatures and rely on feedback from the outside world of people, places, and things to let us know more about ourselves. We also have an inner voice, dialogue, conscience, etc. that helps us to determine the path for us. Often, when someone is worried about the external feedback, and focuses on this as the sole source of information, it can create a dependency that can become problematic. Balance is key to so many things in life. Your own voice is powerful and has strength to provide you the enjoyment you seek. Don't discount it and rely only on the voices of others.
***********************************
MODEL GENERATION - UNTUNED:
Remind yourself in front of others how important people are to you.
*****************

In [12]:
expected_outputs = []
untuned_model_outputs = []
finetuned_model_outputs = []

for i in range(0,49):
  input = dataset['test'][i]['input']
  output = dataset['test'][i]['output']

  expected_outputs += [output]

  prompt = f"Give advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\n{input}\nAdvice: "
  input_tok_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
  untuned_output = untuned_model.generate(input_tok_ids, max_new_tokens=500, do_sample=True)[0]
  untuned_model_output_text = tokenizer.decode(untuned_output, skip_special_tokens=True)
  untuned_model_outputs += [untuned_model_output_text]

  finetuned_output = peft_model.generate(input_tok_ids, max_new_tokens=500, do_sample=True)[0]
  finetuned_model_output_text = tokenizer.decode(finetuned_output, skip_special_tokens=True)
  finetuned_model_outputs += [finetuned_model_output_text]


rouge = evaluate.load('rouge')
untuned_model_results = rouge.compute(
    predictions=untuned_model_outputs,
    references=expected_outputs[0:len(untuned_model_outputs)],
)

print('UNTUNED MODEL:')
print(untuned_model_results)
print('***********************************')

finetuned_model_results = rouge.compute(
    predictions=finetuned_model_outputs,
    references=expected_outputs[0:len(finetuned_model_outputs)],
)

print('FINETUNED MODEL:')
print(finetuned_model_results)
print('***********************************')

print("Absolute percentage improvement of the finetuned model over the untuned model:")
for key in finetuned_model_results:
    improvement = finetuned_model_results[key] - untuned_model_results[key]
    print(f'{key}: {improvement*100:.2f}%')

Token indices sequence length is longer than the specified maximum sequence length for this model (728 > 512). Running this sequence through the model will result in indexing errors


UNTUNED MODEL:
{'rouge1': 0.07300470836179049, 'rouge2': 0.005173271077593561, 'rougeL': 0.0490935265292497, 'rougeLsum': 0.05130304709406662}
***********************************
FINETUNED MODEL:
{'rouge1': 0.22032053766587362, 'rouge2': 0.024845598173945736, 'rougeL': 0.11366520185608159, 'rougeLsum': 0.11749921031158533}
***********************************
Absolute percentage improvement of the finetuned model over the untuned model:
rouge1: 14.73%
rouge2: 1.97%
rougeL: 6.46%
rougeLsum: 6.62%


# Few-Shot Testing

In [13]:
index = 3

input = dataset['test'][index]['input']
output = dataset['test'][index]['output']

print(f'INPUT PROMPT:\n{input}')
print('***********************************')
print(f'EXPECTED RESPONSE:\n{output}')

prompt = f"Give advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\n{input}\nAdvice: "
prompt_input_tok_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
untuned_model_prompt_output = untuned_model.generate(prompt_input_tok_ids, max_new_tokens=500, do_sample=True)[0]
untuned_model_prompt_output_text = tokenizer.decode(untuned_model_prompt_output, skip_special_tokens=True)

print('***********************************')
print(f'MODEL GENERATION - UNTUNED:\n{untuned_model_prompt_output_text}')

few_shot_prompt = f"Give advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\nI'm starting to think that I might have depression. The thing is though, I'm not exactly sad. I can joke with my friends and stuff, but I've stopped caring about my work, and I've been starting to think that life is pointless. I really have no clue if I need help, or if I'm just tired or something.\nAdvice: It is possible that you have depression, but you could also be experiencing some other changes. Your statement makes it sound like you've stopped caring about your work, but you're still going to work and doing your work. Do you like what you do? When you said that life is pointless, what makes it that way? Is there something you can do each day that you can find meaning in? It sounds like you could certainly benefit from talking with someone about more specifics. I'm curious about how long you have felt this way and if you are aware of anything that is contributing to it.\n\nGive advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\nInterested in calisthenics training and bodyweight exercises for strength\nAdivce: Calisthenics is an excellent way to build strength using your body weight. The Calisthenics Strength Builder workout focuses on bodyweight exercises that target various muscle groups to help you get stronger and more toned. This workout plan combines strength training and calisthenics techniques.\n\n**Exercise Recommendations**:\n1. **Warm-up (5 minutes)**: Start with light jogging in place and dynamic stretching to prepare your body.\n\n2. **Upper Body Strength (15 minutes)**:\n a. Push-Ups: 3 sets of 12 reps with a 45-second rest.\n b. Pull-Ups or Inverted Rows: 3 sets of 8 reps with a 45-second rest.\n c. Diamond Push-Ups (for triceps): 3 sets of 10 reps with a 45-second rest.\n\n3. **Lower Body Strength (15 minutes)**:\n a. Squats: 3 sets of 15 reps with a 45-second rest.\n b. Lunges: 3 sets of 10 reps per leg with a 45-second rest.\n c. Glute Bridges: 3 sets of 12 reps with a 45-second rest.\n\n4. **Cool Down and Stretching (5 minutes)**: Finish your workout with stretching for all major muscle groups.\n\n**Total Workout Time**: Approximately 40 minutes\n\nThe Calisthenics Strength Builder workout incorporates a variety of bodyweight exercises to help you develop strength in both your upper and lower body while also engaging your core muscles. Focus on maintaining proper form throughout each exercise. As you progress, you can increase the number of sets or repetitions for each exercise to continue challenging yourself.\n\nGive advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\n{input}\nAdvice: "
few_shot_input_tok_ids = tokenizer(few_shot_prompt, return_tensors='pt').input_ids.to(device)
finetuned_model_few_shot_output = peft_model.generate(few_shot_input_tok_ids, max_new_tokens=500, do_sample=True)[0]
finetuned_model_few_shot_output_text = tokenizer.decode(finetuned_model_few_shot_output, skip_special_tokens=True)

print('***********************************')
print(f'MODEL GENERATION - FINETUNED FEW SHOT:\n{finetuned_model_few_shot_output_text}\n')

INPUT PROMPT:
I have a bad habit of thinking and worrying about what others may think of me. I really want to stop it and enjoy things.
***********************************
EXPECTED RESPONSE:
Thinking about what others think of us is a natural tendency. Human beings are social creatures and rely on feedback from the outside world of people, places, and things to let us know more about ourselves. We also have an inner voice, dialogue, conscience, etc. that helps us to determine the path for us. Often, when someone is worried about the external feedback, and focuses on this as the sole source of information, it can create a dependency that can become problematic. Balance is key to so many things in life. Your own voice is powerful and has strength to provide you the enjoyment you seek. Don't discount it and rely only on the voices of others.
***********************************
MODEL GENERATION - UNTUNED:
Change your "thing" to the opposite of good self-love.
******************************

In [14]:
expected_outputs = []
zs_outputs = []
fs_outputs = []

for i in range(0,49):
  index = i

  input = dataset['test'][i]['input']
  output = dataset['test'][i]['output']

  expected_outputs += [output]

  prompt = f"Give advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\n{input}\nAdvice: "
  input_tok_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
  zs_output = peft_model.generate(input_tok_ids, max_new_tokens=500, do_sample=True)[0]
  zs_output_text = tokenizer.decode(zs_output, skip_special_tokens=True)
  zs_outputs += [zs_output_text]

  few_shot_prompt = f"Give advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\nI'm starting to think that I might have depression. The thing is though, I'm not exactly sad. I can joke with my friends and stuff, but I've stopped caring about my work, and I've been starting to think that life is pointless. I really have no clue if I need help, or if I'm just tired or something.\nAdvice: It is possible that you have depression, but you could also be experiencing some other changes. Your statement makes it sound like you've stopped caring about your work, but you're still going to work and doing your work. Do you like what you do? When you said that life is pointless, what makes it that way? Is there something you can do each day that you can find meaning in? It sounds like you could certainly benefit from talking with someone about more specifics. I'm curious about how long you have felt this way and if you are aware of anything that is contributing to it.\n\nGive advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\nInterested in calisthenics training and bodyweight exercises for strength\nAdivce: Calisthenics is an excellent way to build strength using your body weight. The Calisthenics Strength Builder workout focuses on bodyweight exercises that target various muscle groups to help you get stronger and more toned. This workout plan combines strength training and calisthenics techniques.\n\n**Exercise Recommendations**:\n1. **Warm-up (5 minutes)**: Start with light jogging in place and dynamic stretching to prepare your body.\n\n2. **Upper Body Strength (15 minutes)**:\n a. Push-Ups: 3 sets of 12 reps with a 45-second rest.\n b. Pull-Ups or Inverted Rows: 3 sets of 8 reps with a 45-second rest.\n c. Diamond Push-Ups (for triceps): 3 sets of 10 reps with a 45-second rest.\n\n3. **Lower Body Strength (15 minutes)**:\n a. Squats: 3 sets of 15 reps with a 45-second rest.\n b. Lunges: 3 sets of 10 reps per leg with a 45-second rest.\n c. Glute Bridges: 3 sets of 12 reps with a 45-second rest.\n\n4. **Cool Down and Stretching (5 minutes)**: Finish your workout with stretching for all major muscle groups.\n\n**Total Workout Time**: Approximately 40 minutes\n\nThe Calisthenics Strength Builder workout incorporates a variety of bodyweight exercises to help you develop strength in both your upper and lower body while also engaging your core muscles. Focus on maintaining proper form throughout each exercise. As you progress, you can increase the number of sets or repetitions for each exercise to continue challenging yourself.\n\nGive advice to the user regarding their following concern about their physical and/or mental health in a couple sentences containing specific and actionable solutions:\n{input}\nAdvice: "
  input_tok_ids_fs = tokenizer(few_shot_prompt, return_tensors='pt').input_ids.to(device)
  fs_output = peft_model.generate(input_tok_ids_fs, max_new_tokens=500, do_sample=True)[0]
  fs_output_text = tokenizer.decode(fs_output, skip_special_tokens=True)
  fs_outputs += [fs_output_text]


zs_results = rouge.compute(
    predictions=zs_outputs,
    references=expected_outputs[0:len(zs_outputs)],
)

print('ZERO-SHOT INPUT:')
print(zs_results)
print('***********************************')

fs_results = rouge.compute(
    predictions=fs_outputs,
    references=expected_outputs[0:len(fs_outputs)],
)

print('FEW-SHOT INPUT:')
print(fs_results)
print('***********************************')

print("Absolute percentage improvement of the few-shot input over the zero-shot input:")
for key in fs_results:
    improvement = fs_results[key] - zs_results[key]
    print(f'{key}: {improvement*100:.2f}%')

ZERO-SHOT INPUT:
{'rouge1': 0.1975710279312467, 'rouge2': 0.023088487692947288, 'rougeL': 0.10570732837470623, 'rougeLsum': 0.10948058872298917}
***********************************
FEW-SHOT INPUT:
{'rouge1': 0.2152798357770001, 'rouge2': 0.023675383371496283, 'rougeL': 0.11371928415655058, 'rougeLsum': 0.117077348102889}
***********************************
Absolute percentage improvement of the few-shot input over the zero-shot input:
rouge1: 1.77%
rouge2: 0.06%
rougeL: 0.80%
rougeLsum: 0.76%
