In [17]:
from datasets import load_dataset, Dataset
from transformers import (
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    AutoTokenizer,
    set_seed
)
import transformers
import torch
from peft import PeftModel
import pandas as pd
import evaluate
import numpy as np

In [2]:
df=pd.read_csv("./cva_gst_dataset/test.csv")

In [3]:
dataset = Dataset.from_pandas(df)

##  Evaluate the Model Qualitatively (Human Evaluation)

In [4]:
def get_formatted_prompt(prompt):
    return f"Task: Convert the JSON configuration below into a valid gst-launch-1.0 pipeline command.\nInput JSON:\n{prompt}\nOutput:\n"

In [5]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=False,
    )

In [6]:
model_name='microsoft/phi-2'
base_model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                      device_map='auto',
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
def generate_text_from_model(model, prompt):
    toks = eval_tokenizer(prompt, return_tensors="pt")
    res = model.generate(**toks.to("cuda"), max_new_tokens=500, do_sample=True,num_return_sequences=1,temperature=0.1,num_beams=1,top_p=0.95,).to('cpu')
    return eval_tokenizer.batch_decode(res,skip_special_tokens=True)

In [8]:
eval_tokenizer = AutoTokenizer.from_pretrained(model_name, add_bos_token=True, trust_remote_code=True, use_fast=False)
eval_tokenizer.pad_token = eval_tokenizer.eos_token

In [9]:
ft_model = PeftModel.from_pretrained(base_model, "peft_GST_pipeline_training_QLora/final-checkpoint/checkpoint-100",torch_dtype=torch.float16,is_trainable=False)

In [10]:
%%time
seed = 42
set_seed(seed)

index = 10

sample_prompt = dataset[index]['prompt']
sample_pipeline = dataset[index]['pipeline']

formatted_prompt = get_formatted_prompt(sample_prompt)

peft_model_res = generate_text_from_model(ft_model,formatted_prompt)
peft_model_output = peft_model_res[0].split('Output:\n')[1]
output, success, result = peft_model_output.partition('#End')

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{formatted_prompt}\n')
print(dash_line)
print(f'GROUND TRUTH:\n{sample_pipeline}\n')
print(dash_line)
print(f'PEFT MODEL:\n{output}')

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Task: Convert the JSON configuration below into a valid gst-launch-1.0 pipeline command.
Input JSON:
{"source_type": "file", "source_location": "/opt/data/retail/storage_7/cam_10.mp4", "video_scale": {"width": 1044, "height": 958}, "detect": {"model": "/var/lib/models/onnx/recognition/5.0/FP16/model_4960.xml", "device": "HDDL", "model_proc": "/opt/intel/model_proc/classification/model_9754.json", "threshold": 0.74, "inference_interval": 7}, "track": {"tracking_type": "short-term-imageless"}, "inference": [{"model": "/usr/local/models/intel/detection/5.0/INT8/model_3984.xml", "device": "HDDL", "inference_region": "roi-list"}, {"model": "/usr/share/models/openvino/classification/2.1/INT8/model_2305.xml", "device": "CPU", "inference_region": "roi-list"}, {"model": "/home/user/.local/models/pytorch/detection/2.1/INT8/model_4294.xml", "device": "CPU", "inference_region": "roi-li

## Evaluate the Model Quantitatively (with ROUGE Metric)

In [11]:
base_model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                      device_map='auto',
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [12]:
prompts = dataset[0:10]['prompt']
target_pipelines = dataset[0:10]['pipeline']

base_model_pipelines = []
peft_model_pipelines = []

for idx, prompt in enumerate(prompts):
    target_text_output = target_pipelines[idx]
    formatted_prompt = get_formatted_prompt(prompt)
    
    base_model_res = generate_text_from_model(base_model,formatted_prompt)
    base_model_text_output = base_model_res[0].split('Output:\n')[1]
    
    peft_model_res = generate_text_from_model(ft_model,formatted_prompt)
    peft_model_output = peft_model_res[0].split('Output:\n')[1]

    peft_model_text_output, success, result = peft_model_output.partition('#End')
    

    base_model_pipelines.append(base_model_text_output)
    peft_model_pipelines.append(peft_model_text_output)

zipped_pipelines = list(zip(target_pipelines, base_model_pipelines, peft_model_pipelines))
 
df = pd.DataFrame(zipped_pipelines, columns = ['target_pipelines', 'base_model_pipelines', 'peft_model_pipelines'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

In [15]:
rouge = evaluate.load('rouge')

base_model_results = rouge.compute(
    predictions=base_model_pipelines,
    references=target_pipelines[0:len(base_model_pipelines)],
    use_aggregator=True,
    use_stemmer=True,
)

peft_model_results = rouge.compute(
    predictions=peft_model_pipelines,
    references=target_pipelines[0:len(peft_model_pipelines)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(base_model_results)
print('PEFT MODEL:')
print(peft_model_results)


ORIGINAL MODEL:
{'rouge1': 0.7407341611232064, 'rouge2': 0.6067449163123264, 'rougeL': 0.6902516056002185, 'rougeLsum': 0.6899260396914986}
PEFT MODEL:
{'rouge1': 0.9031376045870323, 'rouge2': 0.8813577342009995, 'rougeL': 0.8885402189318237, 'rougeLsum': 0.8889637696507235}


In [18]:
print("Absolute percentage improvement of PEFT MODEL over ORIGINAL MODEL")

improvement = (np.array(list(peft_model_results.values())) - np.array(list(base_model_results.values())))
for key, value in zip(peft_model_results.keys(), improvement):
    print(f'{key}: {value*100:.2f}%')

Absolute percentage improvement of PEFT MODEL over ORIGINAL MODEL
rouge1: 16.24%
rouge2: 27.46%
rougeL: 19.83%
rougeLsum: 19.90%
