# Setup

In [None]:
# %load_ext lab_black

In [None]:
# ! pip install torch==1.13.1 --quiet
# ! pip install torchdata==0.5.1 --quiet
# ! pip install transformers==4.27.2 datasets==2.11.0 --quiet
# ! pip install py7zr --quiet
# ! pip3 install evaluate==0.4.0 rouge_score==0.1.2 loralib==0.1.2 --quiet
# ! pip3 install peft --quiet
# ! pip3 install numerize --quiet

In [4]:
import os
import textwrap
import pandas as pd
import numpy as np
from pprint import pprint
import random
from numerize import numerize
from tqdm.auto import tqdm

import torch

from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig
from transformers import TrainingArguments, Trainer, Seq2SeqTrainingArguments
import evaluate

In [None]:
# some util functions
print_dashes = lambda: print("-" * 80)
txtwrap = lambda text: textwrap.fill(text, width=80)
print_dialogue = lambda x: [print(txtwrap(text=i)) for i in x.split("\n")]

# data load

In [10]:
huggingface_dataset_name = "knkarthick/dialogsum"
dataset = load_dataset(huggingface_dataset_name)

Downloading readme:   0%|          | 0.00/4.58k [00:00<?, ?B/s]

Downloading and preparing dataset csv/knkarthick--dialogsum to /root/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-931380d0e19583fc/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/442k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-931380d0e19583fc/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
# single data point
pprint(dataset["train"][13])

{'dialogue': '#Person1#: How old is Keith?\n'
             "#Person2#: He's 21. how old is James?\n"
             "#Person1#: He's a year older than Keith, but he looks younger.\n"
             "#Person2#: How's your father?\n"
             "#Person1#: He's fine. He retired last week. It's turning going "
             'in his life. Now he can relax and enjoy his retirement.\n'
             '#Person2#: He can spend more time with his grandchildren.\n'
             "#Person1#: Oh, I don't think he wants to. He wants to travel to "
             'several different countries around the world.\n'
             '#Person2#: So, he wants to have a more active retirement. Good '
             'idea!\n'
             '#Person1#: How do you want to spend your old age?\n'
             '#Person2#: In the same way, probably.',
 'id': 'train_13',
 'summary': "#Person1# and #Person2# are talking about their families' ages. "
            "#Person2#'s father wants to travel around the world after "
        

# load model

- Load model with special dtype that optimizes for memory.
- bfloat16 is a dtype that was developed by google brain
- it is 16 bit representation of 32 bit floating point
- [more about blfoat16](https://cloud.google.com/tpu/docs/bfloat16)

In [None]:
model_id = "google/flan-t5-base"

# Load tokenizer of FLAN-t5-small
tokenizer = AutoTokenizer.from_pretrained(model_id, torch_dtype=torch.bfloat16)

# Load model
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)

# prepare dataset

Here we convert data points in model consumable format

```python

{
 # raw data   
 'id': 'train_13',
 'dialogue': "#Person1#: How old is Keith?\n#Person2#: He's 21. how old is James?\n#Person1#: He's a year older than Keith, but he looks younger.\n#Person2#: How's your father?\n#Person1#: He's fine. He retired last week. It's turning going in his life. Now he can relax and enjoy his retirement.\n#Person2#: He can spend more time with his grandchildren.\n#Person1#: Oh, I don't think he wants to. He wants to travel to several different countries around the world.\n#Person2#: So, he wants to have a more active retirement. Good idea!\n#Person1#: How do you want to spend your old age?\n#Person2#: In the same way, probably.",
 'summary': "#Person1# and #Person2# are talking about their families' ages. #Person2#'s father wants to travel around the world after retirement.",
 'topic': 'age',

 # transformed data
 # - these are token ids of each token of DIALOGUE as generated by tokenizer
 'input_ids': tensor([[ 8779,   140,   125,  2817,    16,    48,  3582,     5,  1713,   345,
          13515,   536,  4663,    10,   571,   625,    19, 17017,    58,  1713,
            345, 13515,   357,  4663,    10,   216,    31,     7,  1401,     5,
            149,   625,    19,  2549,    58,  1713,   345, 13515,   536,  4663,
              ...
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0]]),

 # - these are token ids of each token of SUMMARY as generated by tokenizer
 'labels': tensor([[ 1713,   345, 13515,   536,  4663,    11,  1713,   345, 13515,   357,
           4663,    33,  2508,    81,    70,  1791,    31,     3,  2568,     5,
           1713,   345, 13515,   357,  4663,    31,     7,  2353,  2746,    12,
           1111,   300,     8,   296,   227,  6576,     5,     1,     0,     0,
              ...

              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0]])}
```

- Each model has its maximum sequence length
- We need to handle this while tokenizing the text sequences for fine tuning
- We get error of this kind if we pass a sequence longer than that required by the model

```Token indices sequence length is longer than the specified maximum sequence length for this model (522 > 512).
Running this sequence through the model will result in indexing errors```

- Special methods like padding and truncation exist to handle cases of sequence length

In [11]:
from datasets import concatenate_datasets

# The maximum total input sequence length after tokenization.
# Sequences longer than this will be truncated, sequences shorter will be padded.
tokenized_inputs = concatenate_datasets([dataset["train"], dataset["test"]]).map(lambda x: tokenizer(x["dialogue"], truncation=True), batched=True, remove_columns=["dialogue", "summary"])
max_source_length = max([len(x) for x in tokenized_inputs["input_ids"]])
print(f"Max source length: {max_source_length}")

# The maximum total sequence length for target text after tokenization.
# Sequences longer than this will be truncated, sequences shorter will be padded."
tokenized_targets = concatenate_datasets([dataset["train"], dataset["test"]]).map(lambda x: tokenizer(x["summary"], truncation=True), batched=True, remove_columns=["dialogue", "summary"])
max_target_length = max([len(x) for x in tokenized_targets["input_ids"]])
print(f"Max target length: {max_target_length}")


Map:   0%|          | 0/13960 [00:00<?, ? examples/s]

Max source length: 512


Map:   0%|          | 0/13960 [00:00<?, ? examples/s]

Max target length: 277


In [None]:
def summary_task_tokenize_util(sample, tokenizer, padding='max_length'):

    print('batch_size',len(sample['dialogue']))


    # get each dialog in a batch and add instruction
    inputs = [f"Summarize:\n" + item for item in sample["dialogue"]]

    # tokenize each input in a batch
    model_inputs = tokenizer(inputs, max_length=max_source_length, padding=padding, truncation=True)

    # labels
    labels = tokenizer(text_target=sample["summary"], max_length=max_target_length, padding=padding, truncation=True)

    # batch label cleaning
    # remove tokenizer pad_token and default to -100
    # if valid label i.e if not tokenizer pad_token keep token as it is
    if padding == "max_length":
        labels["input_ids"] = [
            [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
        ]

    model_inputs["labels"] = labels["input_ids"]

    ############################################################
    # ^^^^^ setup inputs and outputs for the model ^^^^^
    # tokenize prompt for model fine tuning
    # for parallelization we need to make sure that model receives same sized vectors
    # due to varying sizes of text sequences the sequence can be shorter or longer
    # - padding if input length is shorter than that required by the model
    # - truncation if input length is greater than that required by model

    # more about padding and truncation here
    # https://huggingface.co/docs/transformers/pad_truncation
    ############################################################

    ############################################################
    # more optimized way for padding
    # transformers.DataCollatorForSeq2Seq
    # it dynamically pads sequences as it receives labels
    ############################################################

    return model_inputs


# sample test
# t = summary_task_tokenize_util(sample=dataset["train"][1000], tokenizer=tokenizer)
# t["input_ids"].shape

# mapping this function across the dataset
# - todo batched = True not working figure this out later
tokenized_datasets = dataset.map(
    summary_task_tokenize_util,
    batched=True,
    fn_kwargs={"tokenizer": tokenizer},
    remove_columns=["id", "topic", "dialogue", "summary"],
)

Map:   0%|          | 0/12460 [00:00<?, ? examples/s]

batch_size 1000
batch_size 1000
batch_size 1000
batch_size 1000
batch_size 1000
batch_size 1000
batch_size 1000
batch_size 1000
batch_size 1000
batch_size 1000
batch_size 1000
batch_size 1000
batch_size 460


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

batch_size 1000
batch_size 500


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

batch_size 500


In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 12460
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 1500
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 500
    })
})

In [None]:
# reducing size of dataset by filtering for finetuning on small set
tokenized_datasets = tokenized_datasets.filter(
    lambda example, index: index % 100 == 0, with_indices=True
)

Filter:   0%|          | 0/12460 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1500 [00:00<?, ? examples/s]

Filter:   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 125
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 15
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 5
    })
})

# total parameters

In [None]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# numel returns total number of elements in the tensor
# summing number of elements in each set of parameters will give us total parameters

print(f"""total number of parameters : {numerize.numerize(total_params)}""")

total number of parameters : 247.58M


# fine tuning LLM

# rouge_score

In [None]:
rouge = evaluate.load("rouge")


def compute_metrics(eval_pred):
    """
    Reference : https://huggingface.co/docs/transformers/tasks/summarization
    """

    # get predictions and ground truth
    predictions, labels = eval_pred

    # decode prediction token ids
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)

    # decode ground truth token ids
    # -100 is pad token id in NLP tasks
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # use decoded_preds, decoded_labels to get rouge score
    result = rouge.compute(
        predictions=decoded_preds, references=decoded_labels, use_stemmer=True
    )

    prediction_lens = [
        np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions
    ]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

In [None]:
from transformers import DataCollatorForSeq2Seq

# we want to ignore tokenizer pad token in the loss
label_pad_token_id = -100

# Data collator
data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    model=model,
    label_pad_token_id=label_pad_token_id,
    pad_to_multiple_of=8
)


In [None]:
from huggingface_hub import HfFolder
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

# Hugging Face repository id
# repository_id = f"{model_id.split('/')[1]}-{dataset_id}"

# Define training args
training_args = Seq2SeqTrainingArguments(
    output_dir='./models',
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    predict_with_generate=True,
    fp16=False, # Overflows with fp16
    learning_rate=5e-5,
    num_train_epochs=10,
    # logging & evaluation strategies
    logging_dir=f"./logs",
    logging_strategy="steps",
    logging_steps=100,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    load_best_model_at_end=True,
    # metric_for_best_model="overall_f1",
    # push to hub parameters
    report_to="tensorboard",
    push_to_hub=False,
    # hub_strategy="every_save",
    # hub_model_id=repository_id,
    # hub_token=HfFolder.get_token(),
)

# Create Trainer instance
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    compute_metrics=compute_metrics,
)


In [None]:
trainer.train()

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,No log,1.409896,0.2674,0.0694,0.2333,0.2325,18.4667
2,No log,1.354687,0.2834,0.0692,0.2427,0.2413,18.8
3,No log,1.330729,0.286,0.0688,0.2434,0.2418,18.4667
4,No log,1.319271,0.3174,0.0951,0.2683,0.2672,18.4667
5,No log,1.315104,0.3221,0.094,0.2645,0.2629,18.8
6,No log,1.315104,0.311,0.0865,0.2706,0.2689,18.4
7,1.481400,1.315104,0.311,0.0865,0.2706,0.2689,18.4
8,1.481400,1.315104,0.311,0.0865,0.2706,0.2689,18.4
9,1.481400,1.307292,0.311,0.0865,0.2706,0.2689,18.4
10,1.481400,1.307292,0.311,0.0865,0.2706,0.2689,18.4


TrainOutput(global_step=160, training_loss=1.44990234375, metrics={'train_runtime': 434.4873, 'train_samples_per_second': 2.877, 'train_steps_per_second': 0.368, 'total_flos': 855946690560000.0, 'train_loss': 1.44990234375, 'epoch': 10.0})

In [2]:
! huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
# model.push_to_hub(repo_id='flan-t5-dialogsum')

pytorch_model.bin:   0%|          | 0.00/495M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/sagarshf/flan-t5-dialogsum/commit/f4926beca989269b4112a7b26de69d2ce584b663', commit_message='Upload T5ForConditionalGeneration', commit_description='', oid='f4926beca989269b4112a7b26de69d2ce584b663', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
# tokenizer.push_to_hub(repo_id="https://huggingface.co/sagarshf/flan-t5-dialogsum")

# load trained model

In [30]:
from transformers import AutoModel

# original model (NOT fine tuned)
model_id = "google/flan-t5-base"
original_model = model = AutoModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)

# Load tokenizer of FLAN-t5-base
tokenizer = AutoTokenizer.from_pretrained(model_id, torch_dtype=torch.bfloat16)

# Load fully fine tuned model
trained_model_id = "sagarshf/flan-t5-dialogsum"
model = AutoModelForSeq2SeqLM.from_pretrained(trained_model_id, torch_dtype=torch.bfloat16, revision='f4926beca989269b4112a7b26de69d2ce584b663')

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

# Qualitative evaluation

In [None]:
original_model.to('cuda:0')
model.to('cuda:0')

In [42]:
def inference_summarize(model, dialogue, ref_summary, tokenizer):
    prompt = f"""Summarize:\n{dialogue}"""

    inputs = tokenizer(prompt, return_tensors='pt').to('cuda:0')
    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=100,
        )[0],
        skip_special_tokens=True
    )

    return (prompt, ref_summary, output)

def print_inference_summary(prompt, summary, output, which_model='MODEL', summary_only=False):
    dash_line = '-'.join('' for x in range(100))
    if not summary_only:
        print(dash_line)
        print(f'INPUT PROMPT:\n{prompt}')
        print(dash_line)
        pprint(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
        print(dash_line)
        pprint(f'{which_model} GENERATION - ZERO SHOT:\n{output}')
    else:
        print(dash_line)
        pprint(f'{which_model} GENERATION - ZERO SHOT:\n{output}')



check_index = int(np.random.randint(0,100,1)[0])
print(check_index)

dialogue = dataset['test'][check_index]['dialogue']
summary = dataset['test'][check_index]['summary']

print_inference_summary(*inference_summarize(model, dialogue, ref_summary=summary, tokenizer=tokenizer), which_model='Full fine tuned MODEL')
print_inference_summary(*inference_summarize(original_model, dialogue, ref_summary=summary, tokenizer=tokenizer), which_model='NON fine tuned MODEL', summary_only=True)

4
---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Summarize:
#Person1#: You're finally here! What took so long?
#Person2#: I got stuck in traffic again. There was a terrible traffic jam near the Carrefour intersection.
#Person1#: It's always rather congested down there during rush hour. Maybe you should try to find a different route to get home.
#Person2#: I don't think it can be avoided, to be honest.
#Person1#: perhaps it would be better if you started taking public transport system to work.
#Person2#: I think it's something that I'll have to consider. The public transport system is pretty good.
#Person1#: It would be better for the environment, too.
#Person2#: I know. I feel bad about how much my car is adding to the pollution problem in this city.
#Person1#: Taking the subway would be a lot less stressful than driving as well.
#Person2#: The only problem is that I'm going to really miss having the freedom that you ha

# Quantitative evaluation

In [23]:
len(dataset['test'])

1500

In [47]:
eval_res = dict(prompt=[], reference_summary=[], original_model_summary=[], finetuned_model_summary=[])

for i in tqdm(range(300)):
    dialogue = dataset['test'][i]['dialogue']
    summary = dataset['test'][i]['summary']
    p,r,o = inference_summarize(model, dialogue, ref_summary=summary, tokenizer=tokenizer)
    _,_,o_ = inference_summarize(original_model, dialogue, ref_summary=summary, tokenizer=tokenizer)

    eval_res['prompt'].append(p)
    eval_res['reference_summary'].append(r)
    eval_res['original_model_summary'].append(o_)
    eval_res['finetuned_model_summary'].append(o)

  0%|          | 0/300 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1022 > 512). Running this sequence through the model will result in indexing errors


In [49]:
df_eval = pd.DataFrame(eval_res)

In [57]:
df_eval

Unnamed: 0,prompt,reference_summary,original_model_summary,finetuned_model_summary
0,"Summarize:\n#Person1#: Ms. Dawson, I need you ...",Ms. Dawson helps #Person1# to write a memo to ...,The memo is to be distributed to all employees...,#Person1# needs #Person2# to take a dictation ...
1,"Summarize:\n#Person1#: Ms. Dawson, I need you ...",In order to prevent employees from wasting tim...,The memo is to be distributed to all employees...,#Person1# needs #Person2# to take a dictation ...
2,"Summarize:\n#Person1#: Ms. Dawson, I need you ...",Ms. Dawson takes a dictation for #Person1# abo...,The memo is to be distributed to all employees...,#Person1# needs #Person2# to take a dictation ...
3,Summarize:\n#Person1#: You're finally here! Wh...,#Person2# arrives late because of traffic jam....,The traffic jam at the Carrefour intersection ...,#Person2# is stuck in traffic. #Person2# think...
4,Summarize:\n#Person1#: You're finally here! Wh...,#Person2# decides to follow #Person1#'s sugges...,The traffic jam at the Carrefour intersection ...,#Person2# is stuck in traffic. #Person2# think...
...,...,...,...,...
295,Summarize:\n#Person1#: Carol telephone.\n#Pers...,"Carol is taking a shower when Carol calls her,...",Carol telephoned Susan.,Carol telephones Carrollite Susan. Carol wants...
296,Summarize:\n#Person1#: Carol telephone.\n#Pers...,Susan calls to ask Carol about the party time....,Carol telephoned Susan.,Carol telephones Carrollite Susan. Carol wants...
297,"Summarize:\n#Person1#: Hey, don't I know you f...",#Person1# thinks that she knows #Person2# some...,The person who is the most familiar to the oth...,#Person2# is not familiar with #Person1#. #Per...
298,"Summarize:\n#Person1#: Hey, don't I know you f...",#Person1# thinks she has met #Person2# somewhe...,The person who is the most familiar to the oth...,#Person2# is not familiar with #Person1#. #Per...


In [80]:
df_eval.to_csv(f"./eval_full_finetuned_model.csv", index=False)

In [64]:
rouge = evaluate.load("rouge")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [74]:
original_metrics = rouge.compute(predictions=df_eval['original_model_summary'], references=df_eval['reference_summary'])
original_metrics

{'rouge1': 0.22691586493758692,
 'rouge2': 0.07137069084457659,
 'rougeL': 0.19226951805896492,
 'rougeLsum': 0.1926809334543114}

In [75]:
finetuned_metrics = rouge.compute(predictions=df_eval['finetuned_model_summary'], references=df_eval['reference_summary'])
finetuned_metrics

{'rouge1': 0.3539014193991718,
 'rouge2': 0.12378436691006199,
 'rougeL': 0.29223416316814044,
 'rougeLsum': 0.2926295755153629}

In [79]:
perc_increase = {}
for i in ['rouge1', 'rouge2', 'rougeL', 'rougeLsum']:
    orig_score, fine_score = original_metrics[i], finetuned_metrics[i]
    inc = f"{round((fine_score-orig_score)*100/orig_score)}%"
    perc_increase[i] = inc

perc_increase

{'rouge1': '56%', 'rouge2': '73%', 'rougeL': '52%', 'rougeLsum': '52%'}


- We can see that with small set used for fine tuning we can get massive increase in ROUGE scores