In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np
from fedn.utils.helpers.helpers import get_helper, save_metadata, save_metrics
from peft import LoraConfig, get_peft_model, TaskType, PeftModel, PeftConfig
import collections

In [2]:
HELPER_MODULE = 'numpyhelper'
helper = get_helper(HELPER_MODULE)

In [3]:
def compile_model():
    peft_model_base = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base", torch_dtype=torch.bfloat16)
    tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")

    peft_model = PeftModel.from_pretrained(peft_model_base, 
                                        '/Users/nash/Project/peft/peft-dialogue-summary-checkpoint-local', 
                                        torch_dtype=torch.bfloat16,
                                        is_trainable=False)
    return peft_model

In [4]:
def load_parameters(model_path):
    """ Load model parameters from file and populate model.

    param model_path: The path to load from.
    :type model_path: str
    :return: The loaded model.
    :rtype: torch.nn.Module
    """
    parameters_np = helper.load(model_path)
    model = compile_model()
    params_dict = zip(model.state_dict().keys(), parameters_np)
    state_dict = collections.OrderedDict({key: torch.tensor(x) for key, x in params_dict})
    model.load_state_dict(state_dict, strict=True)
    return model

In [5]:
in_model_path = '/Users/nash/Downloads/147c5fce-6219-4e17-a8cc-763ad9bd4028'

In [10]:
fedn_model = load_parameters(in_model_path)

In [11]:
from datasets import load_dataset

#dataset
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

In [13]:
#base model
model_name='google/flan-t5-base'

original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [14]:
dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []
#instruct_model_summaries = []
fedn_model_summaries = []

for idx, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """
    
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    human_baseline_text_output = human_baseline_summaries[idx]
    
    original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

    #instruct_model_outputs = instruct_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    #instruct_model_text_output = tokenizer.decode(instruct_model_outputs[0], skip_special_tokens=True)

    fedn_model_outputs = fedn_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    fedn_model_text_output = tokenizer.decode(fedn_model_outputs[0], skip_special_tokens=True)

    original_model_summaries.append(original_model_text_output)
    #instruct_model_summaries.append(instruct_model_text_output)
    fedn_model_summaries.append(fedn_model_text_output)

#zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, instruct_model_summaries, peft_model_summaries))
zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, fedn_model_summaries))
 
#df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'instruct_model_summaries', 'peft_model_summaries'])
df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'fedn_model_summaries'])

In [15]:
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,fedn_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,#Person1#: I need to take a dictation for you.,#Person1#: I need to take a dictation for you.
1,In order to prevent employees from wasting tim...,#Person1#: I need to take a dictation for you.,#Person1#: I need to take a dictation for you.
2,Ms. Dawson takes a dictation for #Person1# abo...,#Person1#: I need to take a dictation for you.,#Person1#: I need to take a dictation for you.
3,#Person2# arrives late because of traffic jam....,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
4,#Person2# decides to follow #Person1#'s sugges...,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
5,#Person2# complains to #Person1# about the tra...,The traffic jam at the Carrefour intersection ...,The traffic jam at the Carrefour intersection ...
6,#Person1# tells Kate that Masha and Hero get d...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
7,#Person1# tells Kate that Masha and Hero are g...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
8,#Person1# and Kate talk about the divorce betw...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
9,#Person1# and Brian are at the birthday party ...,"#Person1#: Happy birthday, Brian. #Person2#: I...","#Person1#: Happy birthday, Brian. #Person2#: I..."


In [16]:
rouge = evaluate.load('rouge')

original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)


fedn_model_results = rouge.compute(
    predictions=fedn_model_summaries,
    references=human_baseline_summaries[0:len(fedn_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
# print('INSTRUCT MODEL:')
# print(instruct_model_results)
print('FEDN MODEL:')
print(fedn_model_results)

ORIGINAL MODEL:
{'rouge1': 0.23966940079036853, 'rouge2': 0.11484057971014493, 'rougeL': 0.21586618876941455, 'rougeLsum': 0.21758185139233527}
FEDN MODEL:
{'rouge1': 0.2437953036915803, 'rouge2': 0.11657459505541348, 'rougeL': 0.22006717130387343, 'rougeLsum': 0.2218015017882039}


In [18]:
#data
results = pd.read_csv("/Users/nash/Project/peft/dialogue-summary-training-results.csv")

In [25]:
results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 5 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Unnamed: 0                1500 non-null   int64 
 1   human_baseline_summaries  1500 non-null   object
 2   original_model_summaries  1500 non-null   object
 3   instruct_model_summaries  1500 non-null   object
 4   peft_model_summaries      1500 non-null   object
dtypes: int64(1), object(4)
memory usage: 58.7+ KB


In [26]:
human_baseline_summaries = results['human_baseline_summaries'].values
original_model_summaries = results['original_model_summaries'].values
fedn_model_summaries = results['peft_model_summaries'].values

original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)


fedn_model_results = rouge.compute(
    predictions=fedn_model_summaries,
    references=human_baseline_summaries[0:len(fedn_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
# print('INSTRUCT MODEL:')
# print(instruct_model_results)
print('FEDN MODEL:')
print(fedn_model_results)

ORIGINAL MODEL:
{'rouge1': 0.2331453024822976, 'rouge2': 0.07621937454368599, 'rougeL': 0.20148301431607618, 'rougeLsum': 0.2013860606897664}
FEDN MODEL:
{'rouge1': 0.4081267958884861, 'rouge2': 0.16370094839971364, 'rougeL': 0.3251047913173757, 'rougeLsum': 0.32527800803146933}


In [28]:
print("Absolute percentage improvement of FEDN MODEL over ORIGINAL MODEL")

improvement = (np.array(list(fedn_model_results.values())) - np.array(list(original_model_results.values())))
for key, value in zip(fedn_model_results.keys(), improvement):
    print(f'{key}: {value*100:.2f}%')

Absolute percentage improvement of FEDN MODEL over ORIGINAL MODEL
rouge1: 17.50%
rouge2: 8.75%
rougeL: 12.36%
rougeLsum: 12.39%
