In [1]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
!git config --global credential.helper store

In [3]:
from datasets import load_dataset

billsum = load_dataset("billsum", split="ca_test")

Downloading readme:   0%|          | 0.00/7.27k [00:00<?, ?B/s]

Downloading data: 100%|██████████| 91.8M/91.8M [00:05<00:00, 15.9MB/s]
Downloading data: 100%|██████████| 15.8M/15.8M [00:00<00:00, 35.0MB/s]
Downloading data: 100%|██████████| 6.12M/6.12M [00:00<00:00, 27.0MB/s]


Generating train split:   0%|          | 0/18949 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3269 [00:00<?, ? examples/s]

Generating ca_test split:   0%|          | 0/1237 [00:00<?, ? examples/s]

In [4]:
billsum = billsum.train_test_split(test_size=0.2)

In [5]:
billsum["train"][0]

{'text': 'The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 120500 of the Health and Safety Code is amended to read:\n120500.\nAs used in the Communicable Disease Prevention and Control Act, defined in Section 27, “sexually transmitted diseases” means diseases that are primarily transmitted through sexual contact.\nSEC. 2.\nSection 120505 of the Health and Safety Code is amended to read:\n120505.\nThe department shall develop and review plans and provide leadership and consultation for, and participate in, a program for the prevention and control of sexually transmitted diseases.\nSEC. 3.\nSection 120510 of the Health and Safety Code is amended to read:\n120510.\nThe department shall cooperate in the prevention, control, and cure of sexually transmitted diseases with all of the following:\n(a) Physicians and surgeons.\n(b) Medical schools.\n(c) Public and private hospitals, dispensaries, and clinics.\n(d) Administrators of public and private elementary

In [6]:
from transformers import AutoTokenizer

checkpoint = "google-t5/t5-small"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [7]:
prefix = "summarize: "


def preprocess_function(examples):
    inputs = [prefix + doc for doc in examples["text"]]
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True)

    labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [8]:
tokenized_billsum = billsum.map(preprocess_function, batched=True)

Map:   0%|          | 0/989 [00:00<?, ? examples/s]

Map:   0%|          | 0/248 [00:00<?, ? examples/s]

In [9]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)

In [10]:
import evaluate

rouge = evaluate.load("rouge")

In [11]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

In [12]:
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer

model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)

In [13]:
training_args = Seq2SeqTrainingArguments(
    output_dir="finetune_model",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=10,
    predict_with_generate=True,
    use_ipex=True,
    fp16=True,
    push_to_hub=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_billsum["train"],
    eval_dataset=tokenized_billsum["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

  warn(
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,No log,2.776521,0.122,0.0343,0.1017,0.102,19.0
2,No log,2.534224,0.151,0.0626,0.1265,0.1264,19.0
3,No log,2.439753,0.1761,0.0767,0.1469,0.1469,19.0
4,No log,2.392098,0.1977,0.0919,0.1659,0.1659,19.0
5,No log,2.362346,0.1985,0.0932,0.1659,0.166,19.0
6,No log,2.342689,0.1977,0.0937,0.1659,0.1659,19.0
7,No log,2.330341,0.1973,0.0935,0.1658,0.1658,19.0
8,No log,2.322132,0.1959,0.0926,0.1644,0.1644,19.0
9,2.585600,2.317513,0.1967,0.094,0.1649,0.165,19.0
10,2.585600,2.315709,0.1966,0.0943,0.1649,0.165,19.0


2024-03-27 21:46:16,136 - absl - INFO - Using default tokenizer.
2024-03-27 21:46:40,137 - absl - INFO - Using default tokenizer.
2024-03-27 21:47:04,169 - absl - INFO - Using default tokenizer.
2024-03-27 21:47:28,219 - absl - INFO - Using default tokenizer.
2024-03-27 21:47:52,291 - absl - INFO - Using default tokenizer.
2024-03-27 21:48:16,338 - absl - INFO - Using default tokenizer.
2024-03-27 21:48:40,408 - absl - INFO - Using default tokenizer.
2024-03-27 21:49:04,548 - absl - INFO - Using default tokenizer.
'(ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')), '(Request ID: 92f7356a-c87a-4667-8ec6-6679412f8c86)')' thrown while requesting PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/64/68/64685be0e4078737240cb0e2b740c01646d8d76d56e887d3d462bfdc292dc273/a4756e18e742042da3a9fedeb51687f1e49f28bb368dc9b645d20aa4db8d24d5?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQFN2

TrainOutput(global_step=620, training_loss=2.5335489580708166, metrics={'train_runtime': 251.8796, 'train_samples_per_second': 39.265, 'train_steps_per_second': 2.461, 'total_flos': 2677060833116160.0, 'train_loss': 2.5335489580708166, 'epoch': 10.0})

In [15]:
text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."

In [16]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from torch import no_grad

# Load your fine-tuned model and tokenizer
model_path = "finetune_model"  # Change this to your model's directory
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

def summarize(text):
    # Encode the text input
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")

    # Generate summary with your model
    with no_grad():
        summary_ids = model.generate(inputs['input_ids'], max_length=150, num_beams=4, early_stopping=True)

    # Decode the generated ids to a text string
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Example text
text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
# Get the summary
summary = summarize(text)
print(summary)


The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It will ask the ultra-wealthy and corporations to pay their fair share.


In [17]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import numpy as np
import time

model_checkpoint = "finetune_model"  # Change this to your model's directory
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

# Prepare your text
text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")

# Ensure model is in evaluation mode
model.eval()

# Check if CUDA (GPU support) is available and use it, otherwise fall back to CPU

model = model.to("xpu")
inputs = inputs.to("xpu")

num_times = 100
warmup_rounds = 20

latency_list = []
for i in range(num_times):
    time_start = time.time()
    with torch.no_grad():
        outputs = model.generate(inputs['input_ids'], max_length=150, num_beams=4, early_stopping=True)
    if i >= warmup_rounds:
        latency_list.append(time.time() - time_start)

fp32_inference_time = np.mean(latency_list)
print(f"Inference time for FP32 took {fp32_inference_time:.3f} seconds")


Inference time for FP32 took 0.855 seconds


In [18]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import numpy as np
import time
import intel_extension_for_pytorch as ipex

model_checkpoint = "finetune_model"  # Change this to your model's directory
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

# Prepare your text
text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")

# Ensure model is in evaluation mode
model.eval()

# Check if CUDA (GPU support) is available and use it, otherwise fall back to CPU
model = model.to("xpu")
inputs = inputs.to("xpu")

num_times = 100
warmup_rounds = 20

model = ipex.optimize(model, dtype=torch.bfloat16)
latency_list = []
with torch.no_grad():
    ########################### code changes ########################
    with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16):
    ########################### code changes ########################
        for i in range(num_times):
            time_start = time.time()
            outputs = model.generate(inputs['input_ids'], max_length=150, num_beams=4, early_stopping=True)
            if i >= warmup_rounds:
                latency_list.append(time.time() - time_start)

fp32_inference_time = np.mean(latency_list)
print(f"Inference time for FP32 took {fp32_inference_time:.3f} seconds")



Inference time for FP32 took 0.971 seconds
