# Install libraries

In [None]:
!pip install -q sentencepiece
!pip -q install git+https://github.com/huggingface/transformers
!pip install -q evaluate
!pip install -q datasets
!pip install -q pandas
!pip install -q rouge_score
!pip install -q nltk
!pip install bert_score
!pip install sentence-transformers
!pip install google-generativeai
!pip install -U bitsandbytes



# Create models and tokenizers

In [None]:
from transformers import AutoTokenizer , AutoModelForCausalLM, BitsAndBytesConfig
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
import torch
from torchsummary import summary
from google.colab import userdata

def del_model(model, tokenizer):
    del model
    del tokenizer
    torch.cuda.empty_cache()

def create_t5_model(model_size):
    tokenizer = T5Tokenizer.from_pretrained(model_size)
    model = TFT5ForConditionalGeneration.from_pretrained(model_size)
    return model, tokenizer

def create_mistral_model(model_size):
    quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                         llm_int4_enable_fp32_cpu_offload=True)

    model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1",
                        torch_dtype=torch.float32,
                        device_map='auto',
                        quantization_config=quantization_config,
                        use_auth_token="userdata.get('MISTRAL')
                        )
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1",
                        use_auth_token=userdata.get('MISTRAL')
                        )
    return model, tokenizer


# Import utility library

In [None]:
#!pip install summary_utils
!wget https://raw.githubusercontent.com/sleighton2022/datasci266-final-project/main/summary_utils.py
from summary_utils import SummaryEvaluator, DatasetManager, SummaryModel

# Experiments

## Key parameters

In [29]:
# %% tags=["parameters"]
seed = 42
sample_size = 2
model_type = "t5"
#model_type = "mistral"
model_size = "t5-base"
prompt_template = "tldr: {document}"

## Initialization

In [8]:
results = {}
prompt_template_set = {}

## Load the dataset

In [30]:
# Load dataset
dataset_manager = DatasetManager(seed=seed,sample_size=sample_size)
sampled_dataset = dataset_manager.load_sampled_dataset(dataset_label="test")

## Helper functions

In [28]:
def evaluate_prompts(model,tokenizer,model_size,prompt_template_set=prompt_template_set,eval_summarizer=None):
  summary_model = SummaryModel(model, tokenizer)
  summary_evaluator = SummaryEvaluator()
  all_generated_summaries = []
  """
  if eval_summarizer is not None:
    gen_summarizer = eval_summarizer
  else:
    gen_summarizer = summary_model.default_summarizer
  """
  for prompt_name, prompt_template in prompt_template_set.items():
    print(prompt_name, prompt_template)
    results_name = model_size + "-" + prompt_name
    generated_summaries = summary_model.generate_summaries(sampled_dataset,prompt_template=prompt_template, gen_summarizer=eval_summarizer)
    results[results_name] = summary_evaluator.evaluate(sampled_dataset, generated_summaries)
    all_generated_summaries.append(generated_summaries)
  del_model(model, tokenizer)
  return all_generated_summaries

In [10]:
def mistral_summarizer(self,prompt):
# Tokenize and generate
        input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids
        if torch.cuda.is_available():
            input_ids = input_ids.cuda()
        output_ids = self.model.generate(
            input_ids,
            max_new_tokens=50,
            num_beams=4,
            early_stopping=True,
            pad_token_id=self.tokenizer.eos_token_id  # Set EOS token for stopping
        )

        # Decode and return ONLY the summary portion
        output_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)

        # Find the index where the summary starts (after the prompt)
        summary_start = output_text.find(prompt) + len(prompt) + 1  # +1 to skip the newline

        # Return only the text after the prompt
        return output_text[summary_start:].strip()  # Strip any extra whitespace


In [11]:
# Initialize an empty list to hold the data
def create_dataframe(results):
  rows = []

  # Iterate over the data and flatten the structure
  for prompt, metrics in results.items():
    row = {'prompt': prompt}
    row.update({
        'rouge1': metrics['rouge']['rouge1'],
        'rouge2': metrics['rouge']['rouge2'],
        'rougeL': metrics['rouge']['rougeL'],
        'bleu': metrics['bleu'],
        'bertscore': metrics['bertscore'],
        'vector_similarity': metrics['vector_similarity']
    })
    rows.append(row)

  # Create a DataFrame from the list of rows
  df = pd.DataFrame(rows)

  # Display the DataFrame
  print(df)

In [12]:
def print_summaries(summaries):
  for summary in summaries:
    print(summary)

## 1 Prompts

In [8]:
prompt_template_set = {}
prompt_template_set["prompt1"] = "Summarize this article: {document}"
prompt_template_set["prompt2"] = "What are the key points of this article: {document}"
prompt_template_set["prompt3"] = "Summarize this article for a 5th grader: {document}"
prompt_template_set["prompt4"] = "Write a summary of this article in 50 words: {document}"
prompt_template_set["prompt5"] = "Summarize the article in 3 bullet points: {document}"

#### 1.1 T5 base  







In [None]:
model_size = "t5-base"
model, tokenizer = create_t5_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size)

In [22]:
for summary in summaries:
  print(summary)


['21 women were heading to Liverpool when their minibus was hit by a lorry . their friend, 18-year-old Bethany Jones, was killed in the crash . minibus driver James Johnson was jailed for more than six years for causing her death .']
["21 women were heading to Liverpool when their minibus was hit by a lorry . their friend, 18-year-old Bethany Jones, was killed in the crash . minibus driver James Johnson was jailed for more than six years for causing her friend's death ."]
["21 women were heading to Liverpool when their minibus was hit by a lorry . their friend, 18-year-old Bethany Jones, was killed in the crash . minibus driver James Johnson was jailed for more than six years for causing her friend's death ."]
['21 women were heading to Liverpool when their minibus was hit by a lorry . their friend, 18-year-old Bethany Jones, was killed in the crash . minibus driver james johnson was jailed for more than six years in 2013 .']
["21 women were heading to Liverpool when their minibus was 

In [None]:
create_dataframe(results)

            prompt    rouge1    rouge2    rougeL      bleu  bertscore  \
0  t5-base-prompt1  0.190947  0.024673  0.129306  0.015440   0.501195   
1  t5-base-prompt2  0.200283  0.027235  0.131034  0.016077   0.506631   
2  t5-base-prompt3  0.194695  0.026244  0.132029  0.014832   0.505272   
3  t5-base-prompt4  0.199000  0.024999  0.133052  0.014553   0.503812   
4  t5-base-prompt5  0.185086  0.023984  0.126145  0.015732   0.499326   

   vector_similarity  
0           0.513904  
1           0.487193  
2           0.533619  
3           0.507103  
4           0.503888  


#### 1.2 T5 small

In [None]:
model_size = "t5-small"
model, tokenizer = create_t5_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size)

In [24]:
print_summaries(summaries)

['her friend Bethany Jones, 18, was killed while her minibus was hit by a lorry . the driver was jailed for more than six years for causing the crash . she said the help she received from a charity led her to want to support others .']
['her friend Bethany Jones, 18, was killed while her minibus was hit by a lorry . the driver was jailed for more than six years for causing the crash . she said the help she received from a charity led her to want to support others .']
['her friend Bethany Jones, 18, was killed while her minibus was hit by a lorry on the M62 . she broke her shoulder, back and pelvis and said the help she received from a charity led her to want to support others . the minibus driver was jailed for more than six years for causing the death .']
['her friend Bethany Jones, 18, was killed while her minibus was hit by a lorry on the M62 . she broke her shoulder, back and pelvis and said the help she received from a charity led her to want to support others . the crash made her

In [None]:
create_dataframe(results)

             prompt    rouge1    rouge2    rougeL      bleu  bertscore  \
0   t5-base-prompt1  0.190947  0.024673  0.129306  0.015440   0.501195   
1   t5-base-prompt2  0.200283  0.027235  0.131034  0.016077   0.506631   
2   t5-base-prompt3  0.194695  0.026244  0.132029  0.014832   0.505272   
3   t5-base-prompt4  0.199000  0.024999  0.133052  0.014553   0.503812   
4   t5-base-prompt5  0.185086  0.023984  0.126145  0.015732   0.499326   
5  t5-small-prompt1  0.191549  0.021002  0.128870  0.013668   0.496701   
6  t5-small-prompt2  0.187117  0.020717  0.128254  0.013492   0.493760   
7  t5-small-prompt3  0.188544  0.022267  0.130077  0.013937   0.491193   
8  t5-small-prompt4  0.178968  0.017083  0.121125  0.012290   0.495284   
9  t5-small-prompt5  0.181769  0.025169  0.130003  0.014895   0.493805   

   vector_similarity  
0           0.513904  
1           0.487193  
2           0.533619  
3           0.507103  
4           0.503888  
5           0.426731  
6           0.436847  
7

#### 1.3 T5 large  

In [None]:
model_size = "t5-large"
model, tokenizer = create_t5_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size)

In [26]:
print_summaries(summaries)

['minibus driver jailed for more than six years for causing death of friend . Sarah Johnson broke her shoulder, back and pelvis in the crash . Ms Johnson said crash made her realise how lucky she was .']
['minibus driver jailed for more than six years for causing death of friend . Sarah Johnson broke her shoulder, back and pelvis in minibus crash . Ms Johnson said crash made her realise how lucky she was .']
['minibus driver jailed for more than six years for causing death of friend . Sarah Johnson broke her shoulder, back and pelvis in the crash . Ms Johnson says crash made her realise how lucky she was .']
['minibus driver james johnson jailed for more than six years for causing hen party crash . friend Bethany Jones, 18, was killed in the crash on the M62 in london . minibus passenger Sarah Johnson broke her shoulder, back and pelvis . she said the crash had made her realise how lucky she was .']
['minibus driver jailed for more than six years for causing death of friend . Sarah Joh

In [None]:
create_dataframe(results)

              prompt    rouge1    rouge2    rougeL      bleu  bertscore  \
0    t5-base-prompt1  0.190947  0.024673  0.129306  0.015440   0.501195   
1    t5-base-prompt2  0.200283  0.027235  0.131034  0.016077   0.506631   
2    t5-base-prompt3  0.194695  0.026244  0.132029  0.014832   0.505272   
3    t5-base-prompt4  0.199000  0.024999  0.133052  0.014553   0.503812   
4    t5-base-prompt5  0.185086  0.023984  0.126145  0.015732   0.499326   
5   t5-small-prompt1  0.191549  0.021002  0.128870  0.013668   0.496701   
6   t5-small-prompt2  0.187117  0.020717  0.128254  0.013492   0.493760   
7   t5-small-prompt3  0.188544  0.022267  0.130077  0.013937   0.491193   
8   t5-small-prompt4  0.178968  0.017083  0.121125  0.012290   0.495284   
9   t5-small-prompt5  0.181769  0.025169  0.130003  0.014895   0.493805   
10  t5-large-prompt1  0.204444  0.029873  0.142275  0.016052   0.510986   
11  t5-large-prompt2  0.202771  0.027891  0.140294  0.015622   0.505149   
12  t5-large-prompt3  0.1

#### 1.5 Mistral

In [None]:
#Code to load mistral model
model_size = "mistral"
model, tokenizer = create_mistral_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size,eval_summarizer=mistral_summarizer)

In [13]:
print_summaries(summaries)

['Sarah Johnson was one of 21 women on a minibus that was hit by a lorry on the M62 in 2013. Her friend Bethany Jones was killed in the crash, and Sarah']
['Ms Johnson is now working as a support worker for Day One, helping other victims of major trauma.']
['Sarah Johnson was in a minibus with her friends when it was hit by a lorry on the M62. Her friend Bethany Jones was killed and Sarah was badly hurt. The minibus driver, James Johnson']
['Sarah Johnson was one of 21 women on a minibus that was hit by a lorry on the M62. Her friend Bethany Jones, 18, was killed and several others were badly hurt.']
['Ms Johnson is now working as a support worker for Day One, helping others who have been through similar experiences.']


In [None]:
create_dataframe(results)

             prompt    rouge1    rouge2    rougeL      bleu  bertscore  \
0  t5-large-prompt1  0.079800  0.020845  0.060078  0.005461   0.470786   
1  t5-large-prompt2  0.075691  0.017116  0.054823  0.004563   0.469554   
2  t5-large-prompt3  0.081605  0.022617  0.060730  0.006416   0.475210   
3  t5-large-prompt4  0.101566  0.024055  0.069222  0.006478   0.483945   
4  t5-large-prompt5  0.089638  0.019487  0.061460  0.005540   0.470911   

   vector_similarity  
0           0.599967  
1           0.596550  
2           0.581367  
3           0.555626  
4           0.586925  


## 2 Templates

In [14]:
prompt_template_set = {}

prompt_template_set["template1"] = """
Input:
Article: {document}

Task: Summarize the above article.

Output:
Summary:
"""

prompt_template_set["template2"] = """
Input:
Article: {document}

Task: Extract and summarize the key points from the article.

Output:
Key Points Summary:
"""

prompt_template_set["template3"] = """Input:
Article: {document}

Task: Summarize this article for a 5th grader.

Output:
Summary (50 words):
"""


prompt_template_set["template4"] = """Input:
Article: {document}

Task: Summarize the article in approximately 100 words.

Output:
Summary (50 words):
"""

prompt_template_set["template5"] = """Input:
Article: {document}

Task: Summarize the article in bullet points.

Output:
Summary:
- bullet_point_1
- bullet_point_2
- bullet_point_3
"""

#### 2.1 T5 base

In [None]:
model_size = "t5-base"
model, tokenizer = create_t5_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size)

In [29]:
print_summaries(summaries)

['21 women were heading to Liverpool when their minibus was hit by a lorry . their friend, 18-year-old Bethany Jones, was killed in the crash . minibus driver James Johnson was jailed for more than six years for causing her death .']
["21 women were heading to Liverpool when their minibus was hit by a lorry . their friend, 18-year-old Bethany Jones, was killed in the crash . minibus driver James Johnson was jailed for more than six years for causing her friend's death ."]
["21 women were heading to Liverpool when their minibus was hit by a lorry . their friend, 18-year-old Bethany Jones, was killed in the crash . minibus driver James Johnson was jailed for more than six years for causing her friend's death ."]
['21 women were heading to Liverpool when their minibus was hit by a lorry . their friend, 18-year-old Bethany Jones, was killed in the crash . minibus driver james johnson was jailed for more than six years in 2013 .']
["21 women were heading to Liverpool when their minibus was 

In [None]:
create_dataframe(results)

              prompt    rouge1    rouge2    rougeL      bleu  bertscore  \
0  t5-base-template1  0.185725  0.019665  0.126126  0.012149   0.499389   
1  t5-base-template2  0.196112  0.026284  0.125525  0.014168   0.496072   
2  t5-base-template3  0.187986  0.027379  0.123386  0.014340   0.497435   
3  t5-base-template4  0.191913  0.024529  0.124188  0.014073   0.497428   
4  t5-base-template5  0.176543  0.025939  0.107458  0.013176   0.486879   

   vector_similarity  
0           0.492220  
1           0.476907  
2           0.483379  
3           0.493260  
4           0.468765  


#### 2.2 T5 small

In [None]:
model_size = "t5-small"
model, tokenizer = create_t5_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size)

In [31]:
print_summaries(summaries)

['her friend Bethany Jones, 18, was killed while her minibus was hit by a lorry . the driver was jailed for more than six years for causing the crash . she said the help she received from a charity led her to want to support others .']
['her friend Bethany Jones, 18, was killed while her minibus was hit by a lorry . the driver was jailed for more than six years for causing the crash . she said the help she received from a charity led her to want to support others .']
['her friend Bethany Jones, 18, was killed while her minibus was hit by a lorry on the M62 . she broke her shoulder, back and pelvis and said the help she received from a charity led her to want to support others . the minibus driver was jailed for more than six years for causing the death .']
['her friend Bethany Jones, 18, was killed while her minibus was hit by a lorry on the M62 . she broke her shoulder, back and pelvis and said the help she received from a charity led her to want to support others . the crash made her

In [None]:
create_dataframe(results)

               prompt    rouge1    rouge2    rougeL      bleu  bertscore  \
0   t5-base-template1  0.185725  0.019665  0.126126  0.012149   0.499389   
1   t5-base-template2  0.196112  0.026284  0.125525  0.014168   0.496072   
2   t5-base-template3  0.187986  0.027379  0.123386  0.014340   0.497435   
3   t5-base-template4  0.191913  0.024529  0.124188  0.014073   0.497428   
4   t5-base-template5  0.176543  0.025939  0.107458  0.013176   0.486879   
5  t5-small-template1  0.178138  0.024033  0.125536  0.014100   0.491026   
6  t5-small-template2  0.174037  0.021258  0.121758  0.013361   0.495665   
7  t5-small-template3  0.167127  0.021007  0.119920  0.013213   0.492251   
8  t5-small-template4  0.173759  0.021311  0.117255  0.013748   0.493222   
9  t5-small-template5  0.175931  0.020062  0.117802  0.013313   0.491720   

   vector_similarity  
0           0.492220  
1           0.476907  
2           0.483379  
3           0.493260  
4           0.468765  
5           0.416400  
6 

#### 2.3 T5 large

In [None]:
model_size = "t5-large"
model, tokenizer = create_t5_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size)

In [33]:
print_summaries(summaries)

['minibus driver jailed for more than six years for causing death of friend . Sarah Johnson broke her shoulder, back and pelvis in the crash . Ms Johnson said crash made her realise how lucky she was .']
['minibus driver jailed for more than six years for causing death of friend . Sarah Johnson broke her shoulder, back and pelvis in minibus crash . Ms Johnson said crash made her realise how lucky she was .']
['minibus driver jailed for more than six years for causing death of friend . Sarah Johnson broke her shoulder, back and pelvis in the crash . Ms Johnson says crash made her realise how lucky she was .']
['minibus driver james johnson jailed for more than six years for causing hen party crash . friend Bethany Jones, 18, was killed in the crash on the M62 in london . minibus passenger Sarah Johnson broke her shoulder, back and pelvis . she said the crash had made her realise how lucky she was .']
['minibus driver jailed for more than six years for causing death of friend . Sarah Joh

In [None]:
create_dataframe(results)

                prompt    rouge1    rouge2    rougeL      bleu  bertscore  \
0    t5-base-template1  0.185725  0.019665  0.126126  0.012149   0.499389   
1    t5-base-template2  0.196112  0.026284  0.125525  0.014168   0.496072   
2    t5-base-template3  0.187986  0.027379  0.123386  0.014340   0.497435   
3    t5-base-template4  0.191913  0.024529  0.124188  0.014073   0.497428   
4    t5-base-template5  0.176543  0.025939  0.107458  0.013176   0.486879   
5   t5-small-template1  0.178138  0.024033  0.125536  0.014100   0.491026   
6   t5-small-template2  0.174037  0.021258  0.121758  0.013361   0.495665   
7   t5-small-template3  0.167127  0.021007  0.119920  0.013213   0.492251   
8   t5-small-template4  0.173759  0.021311  0.117255  0.013748   0.493222   
9   t5-small-template5  0.175931  0.020062  0.117802  0.013313   0.491720   
10  t5-large-template1  0.204912  0.032149  0.140912  0.015487   0.504403   
11  t5-large-template2  0.212757  0.032126  0.142940  0.015773   0.506131   

#### 2.5 Mistral

In [None]:
model_size = "mistral"
model, tokenizer = create_mistral_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size,eval_summarizer=mistral_summarizer)

In [16]:
print_summaries(summaries)

['Sarah Johnson was one of 21 women on a minibus that was hit by a lorry on the M62 in 2013. Her friend Bethany Jones was killed in the crash, and Sarah']
['Ms Johnson is now working as a support worker for Day One, helping other victims of major trauma.']
['Sarah Johnson was in a minibus with her friends when it was hit by a lorry on the M62. Her friend Bethany Jones was killed and Sarah was badly hurt. The minibus driver, James Johnson']
['Sarah Johnson was one of 21 women on a minibus that was hit by a lorry on the M62. Her friend Bethany Jones, 18, was killed and several others were badly hurt.']
['Ms Johnson is now working as a support worker for Day One, helping others who have been through similar experiences.']


In [None]:
create_dataframe(results)

              prompt    rouge1    rouge2    rougeL      bleu  bertscore  \
0  mistral-template1  0.077878  0.018334  0.059615  0.004902   0.459197   
1  mistral-template2  0.070451  0.016662  0.052368  0.004580   0.446344   
2  mistral-template3  0.081330  0.019684  0.059085  0.005002   0.466990   
3  mistral-template4  0.077298  0.018825  0.058975  0.005481   0.456700   
4  mistral-template5  0.052658  0.011128  0.035191  0.003573   0.399282   

   vector_similarity  
0           0.526428  
1           0.502251  
2           0.492006  
3           0.515477  
4           0.476567  


## 3 Few-shot learnings

In [None]:
learning_dataset = dataset_manager.load_sampled_dataset(dataset_label="train")

prompt_template_set = {}
prompt_template_set["learning1"] = "Document: " + learning_dataset["document"][0] + "Summary:" + \
                                   learning_dataset["summary"][0] + " Summarize the following {document}"

prompt_template_set["learning2"] = "Document: " + learning_dataset["document"][0] + "Summary:" + \
                                   learning_dataset["summary"][0] + \
                                   "Document: " + learning_dataset["document"][1] + "Summary:" + \
                                   learning_dataset["summary"][1] + " Summarize the following {document}"

"""
prompt_template_set["learning3"] = "Document: " + learning_dataset["document"][0] + "Summary:" + \
                                   learning_dataset["summary"][0] + \
                                   "Document: " + learning_dataset["document"][1] + "Summary:" + \
                                   learning_dataset["summary"][1] + \
                                   "Document: " + learning_dataset["document"][2] + "Summary:" + \
                                   learning_dataset["summary"][2] + " Summarize the following {document}"
"""

### 3.1 T5-large

In [32]:
model_size = "t5-large"
model, tokenizer = create_t5_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size)

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


learning1 Document: In Wales, councils are responsible for funding and overseeing schools.
But in England, Mr Osborne's plan will mean local authorities will cease to have a role in providing education.
Academies are directly funded by central government and head teachers have more freedom over admissions and to change the way the school works.
It is a significant development in the continued divergence of schools systems on either side of Offa's Dyke.
And although the Welsh Government will get extra cash to match the money for English schools to extend the school day, it can spend it on any devolved policy area.
Ministers have no plans to follow suit.
At the moment, governing bodies are responsible for setting school hours and they need ministerial permission to make significant changes.
There are already more than 2,000 secondary academies in England and its extension to all state schools is unlikely to shake the Welsh Government's attachment to what they call a "community, comprehen



Average BERTSCORE score: 0.4012531638145447
Average VECTOR_SIMILARITY score: 0.07358400523662567


In [None]:
create_dataframe(results)

In [None]:
print_summaries(summaries)

####3.5 Mistral

In [None]:
model_size = "mistral"
model, tokenizer = create_mistral_model(model_size)
summaries = evaluate_prompts(model=model,tokenizer=tokenizer,model_size=model_size,eval_summarizer=mistral_summarizer,prompt_template_set=prompt_template_set)

In [21]:
print_summaries(summaries)

['Ms Johnson, who is now working as a support worker for Day One, said she wanted to give back to the charity that had helped her.\nShe said: "It\'s something I\'m passionate about.\n"It\'s']


In [None]:
create_dataframe(results)

              prompt    rouge1    rouge2    rougeL      bleu  bertscore  \
0  mistral-learning1  0.024205  0.006380  0.018101  0.001472   0.375454   
1  mistral-learning2  0.025150  0.007356  0.018224  0.001685   0.375454   

   vector_similarity  
0           0.073044  
1           0.073044  
