diff --git a/README.md b/README.md
index 6334282..b8ea758 100644
--- a/README.md
+++ b/README.md
@@ -182,7 +182,7 @@ Using the fine-tuned model to generate new text can be done using the `generate_
The arguments for the `generate_text.py` script are as follows:
```
-usage: generate_text.py [-h] --model_config MODEL_CONFIG [--benchmark_mode] [--benchmark_seq_length BENCHMARK_SEQ_LENGTH]
+usage: generate_text.py [-h] --model_config MODEL_CONFIG [--benchmark_mode] [--benchmark_seq_length BENCHMARK_SEQ_LENGTH] [--device cpu]
optional arguments:
-h, --help show this help message and exit
@@ -191,6 +191,8 @@ optional arguments:
--benchmark_mode use intel pytorch extension to optimize model.
--benchmark_seq_length BENCHMARK_SEQ_LENGTH
length of generation if benchmark mode is used.
+ --device cpu
+ Choose run on cpu or xpu, default is cpu.
```
**Configuration Parameters**
@@ -242,14 +244,17 @@ Within the yaml configuration file, the following optional arguments can be spec
**YAML file** | **Environment Name** | **Configuration** |
| :---: | :---: | :---: |
-`env/intel/text-intel-torch.yml` | `text-intel-torch` | Python=3.9.7, PyTorch v1.13, Intel® Extension for PyTorch v1.13, Intel® Neural Compressor v2.0.0 |
+`env/intel/text-intel-torch-cpu.yml` | `text-intel-torch` | Python=3.9.7, PyTorch v1.13, Intel® Extension for PyTorch v1.13, Intel® Neural Compressor v2.0.0 |
+`env/intel/text-intel-torch-xpu.yml` | `text-intel-torch` | Python=3.9.7, PyTorch v1.13, Intel® Extension for PyTorch v1.13+xpu, Intel® Neural Compressor v2.0.0 |
### Optimized Solution Setup
-Follow the below conda installation commands to setup the Intel® oneAPI optimized PyTorch environment for model training and text generation.
+Follow the below conda installation commands to setup the Intel® oneAPI optimized PyTorch environment for model training and text generation. Please choose `env/intel/text-intel-torch-xpu.yml` if you have Intel GPU available.
```sh
-conda env create -f env/intel/text-intel-torch.yml
+conda env create -f env/intel/text-intel-torch-cpu.yml # For CPU
+or
+conda env create -f env/intel/text-intel-torch-xpu.yml # For XPU
```
*Activate stock conda environment*
@@ -257,12 +262,17 @@ Use the following command to activate the environment that was created:
```sh
conda activate text-intel-torch
```
+Please perform this additional installation step only if you are using an Intel GPU, CPU users can skip this step:
+```sh
+pip install -r env/intel/update_to_torch_xpu.txt
+```
-This script utilizes the dependencies found in the `env/intel/text-intel-torch.yml` file to create an environment as follows:
+This script utilizes the dependencies found in the `env/intel/text-intel-torch-cpu.yml` or `env/intel/text-intel-torch-xpu.yml` file to create an environment as follows:
| **YAML file** | **Environment Name** | **Configuration** |
| :-------------------: | :------------------: | :-----------------------------: |
-| `env/intel/text-intel-torch.yml` | `text-intel-torch` | Python=3.9.x with PyTorch v1.13, Intel® Extension For PyTorch v 1.13.0, Intel® Neural Compressor v2.0.0 |
+| `env/intel/text-intel-torch-cpu.yml` | `text-intel-torch` | Python=3.9.x with PyTorch v1.13, Intel® Extension For PyTorch v 1.13.0, Intel® Neural Compressor v2.0.0 |
+| `env/intel/text-intel-torch-xpu.yml, update_to_torch_xpu.txt` | `text-intel-torch` | Python=3.9.x with PyTorch v1.13, Intel® Extension For PyTorch v 1.13.0+xpu, Intel® Neural Compressor v2.0.0 |
### Intel® oneAPI Optimized Implementation
@@ -282,6 +292,33 @@ The command to fine-tune the model with Intel® optimizations enabled is:
ipexrun --use_logical_core --enable_tcmalloc src/finetune_model.py --model_config configs/config_base.yml --data_path data/abcnews-date-text.csv --save_path saved_models/gpt2-medium-finetuned-intel --intel
```
+**For Intel GPU training**, device 'xpu' must be added to 'python/site-packages/transformers/training_args.py' as shown in the below diff at lines.
+It can be changed manually follows:
+```python
+diff --git a/training_args.py b/training_args.py
+index 1a90710..9421ccd 100644
+--- a/training_args.py
++++ b/training_args.py
+@@ -1466,6 +1466,8 @@ class TrainingArguments:
+ torch.distributed.init_process_group(
+ backend=self.xpu_backend, rank=rank, world_size=size, timeout=self.ddp_timeout_delta
+ )
++ elif torch.xpu.is_available():
++ device = torch.device("xpu")
+ elif is_torch_tpu_available():
+ device = xm.xla_device()
+ self._n_gpu = 0
+```
+Or to apply the patch automatically, please run the following Python script only once.
+```bash
+$ cd src
+$ python ./apply_xpu_patch.py
+```
+and then run the fine tuning step as follows:
+```sh
+ipexrun --use_logical_core --enable_tcmalloc src/finetune_model.py --model_config configs/config_base.yml --data_path data/abcnews-date-text.csv --save_path saved_models/gpt2-medium-finetuned-intel --intel
+```
+
**Expected Output**
The output-trained model will be saved as a `huggingface` pre-trained model saved to the 'saved_models/gpt2-medium-finetuned-intel'. Training time in seconds would be generated at the end of the training module.
@@ -308,6 +345,11 @@ As above, this trained model can be used to generate text using the provided `ge
python src/generate_text.py --model_config configs/config_finetuned_intel.yml
```
+To run inference on an Intel GPU, please run the following
+```sh
+python src/generate_text.py --model_config configs/config_finetuned_inc.yml --device xpu
+```
+
**Expected Output**
The converted ONNX fine-tuned model will be saved to `saved_models/gpt2-medium-finetuned-intel-onnx`.
@@ -348,6 +390,11 @@ Once the quantized model is created, we can use the `generate_text.py` script on
python src/generate_text.py --model_config configs/config_finetuned_inc.yml
```
+To run inference on an Intel GPU, please run the following
+```sh
+python src/generate_text.py --model_config configs/config_finetuned_inc.yml --device xpu
+```
+
## Performance Observations
In the following, we report some results comparing Intel® technologies vs the stock alternative on the task of generating new texts of various lengths.
diff --git a/env/intel/text-intel-torch.yml b/env/intel/text-intel-torch-cpu.yml
similarity index 100%
rename from env/intel/text-intel-torch.yml
rename to env/intel/text-intel-torch-cpu.yml
diff --git a/env/intel/text-intel-torch-xpu.yml b/env/intel/text-intel-torch-xpu.yml
new file mode 100644
index 0000000..ce5ad9e
--- /dev/null
+++ b/env/intel/text-intel-torch-xpu.yml
@@ -0,0 +1,15 @@
+# Refer to https://intel.github.io/intel-extension-for-pytorch/xpu/1.13.10+xpu/tutorials/getting_started.html
+# for intel_extension_for_pytorch installation.
+name: text-intel-torch
+channels:
+ - conda-forge
+dependencies:
+ - python=3.9.7
+ - pip=21.2.4
+ - pip:
+ - neural-compressor==2.0
+ - pandas==1.5.3
+ - numpy==1.23.5
+ - transformers==4.26.1
+ - datasets==2.10.1
+ - optimum[onnxruntime]==1.6.4
diff --git a/env/intel/update_to_torch_xpu.txt b/env/intel/update_to_torch_xpu.txt
new file mode 100644
index 0000000..459ee5b
--- /dev/null
+++ b/env/intel/update_to_torch_xpu.txt
@@ -0,0 +1,3 @@
+-i https://developer.intel.com/ipex-whl-stable-xpu
+torch==1.13.0a0
+intel_extension_for_pytorch==1.13.10+xpu
diff --git a/src/apply_xpu_patch.py b/src/apply_xpu_patch.py
new file mode 100644
index 0000000..4cd0478
--- /dev/null
+++ b/src/apply_xpu_patch.py
@@ -0,0 +1,12 @@
+import transformers
+import os
+import subprocess
+
+module_path = transformers.__path__[0]
+# get path for training_args.py
+target_file_path = os.path.join(module_path, "training_args.py")
+
+# apply patch to training_args.py file in the transformers package
+subprocess.run(["patch", target_file_path, "transformers_xpu.patch"])
+
+print("patch applied successfully")
diff --git a/src/generate_text.py b/src/generate_text.py
index c0bd13e..64c3ae3 100644
--- a/src/generate_text.py
+++ b/src/generate_text.py
@@ -26,7 +26,7 @@
AutoModelForCausalLM,
set_seed
)
-
+import intel_extension_for_pytorch as ipex
def generate_text(
tokenizer: PreTrainedTokenizer,
@@ -36,7 +36,8 @@ def generate_text(
min_length: int = 0,
max_length: int = 10,
algorithm: str = 'greedy',
- stop_token: str = '.') -> Tuple[List[int],float]:
+ stop_token: str = '.',
+ device: str = 'cpu') -> Tuple[List[int],float]:
"""Generate text using the provided model and algorithm.
Args:
@@ -60,18 +61,26 @@ def generate_text(
Tuple(List[int], float): generated sequence of token_ids, total time for
of model inference calls
"""
-
all_token_ids = input_ids.clone()
all_attention_masks = attention_mask.clone()
eos_token_id = tokenizer([stop_token], return_tensors='np')[
'input_ids'][0][0]
has_eos = torch.zeros(1, dtype=torch.bool)
+ ones_mask = torch.ones([1, 1])
+
+ if device == 'xpu':
+ print("#### xpu")
+ all_token_ids = input_ids.to("xpu")
+ all_attention_masks = attention_mask.to("xpu")
+ eos_token_id = torch.tensor([eos_token_id], device=torch.device("xpu"))
+ has_eos = has_eos.to("xpu")
+ ones_mask = ones_mask.to("xpu")
+ model = model.to("xpu")
total_time = 0
for step in range(max_length):
-
- if isinstance(model, torch.nn.Module):
+ if isinstance(model, torch.nn.Module) and device == 'cpu':
start = time.time()
next_token_logits = torch.nn.functional.softmax(
model(
@@ -79,6 +88,14 @@ def generate_text(
attention_mask=all_attention_masks)[:, -1, :], dim=1)
end = time.time()
total_time += end - start
+ elif isinstance(model, torch.nn.Module) and device == 'xpu':
+ start = time.time()
+ next_token_logits = torch.nn.functional.softmax(
+ model(
+ input_ids=all_token_ids,
+ attention_mask=all_attention_masks)[0][:, -1, :], dim=1)
+ end = time.time()
+ total_time += end - start
elif isinstance(model, ort.InferenceSession):
ort_input = {
"input_ids": np.array(all_token_ids),
@@ -101,7 +118,7 @@ def generate_text(
all_token_ids = torch.cat(
[all_token_ids, tokens_to_add.unsqueeze(-1)], dim=-1)
all_attention_masks = torch.cat(
- [all_attention_masks, torch.ones([1, 1])], dim=-1).type_as(all_attention_masks)
+ [all_attention_masks, ones_mask], dim=-1).type_as(all_attention_masks)
if step > min_length and next_tokens == eos_token_id:
break
@@ -115,8 +132,8 @@ def generate(
max_length: int = 10,
prompt_file: str = None,
benchmark_mode: bool = False,
- n_runs: int = 100):
-
+ n_runs: int = 100,
+ device: str = 'cpu'):
# read prompts from file into a list for batch processing
tokenized_input = []
if not benchmark_mode and prompt_file is not None:
@@ -143,7 +160,8 @@ def generate(
tokenized_prompt.attention_mask,
min_length=max_length,
max_length=max_length,
- algorithm='greedy')
+ algorithm='greedy',
+ device=device)
if i > 10:
times.append(total_time)
print(f"Average Generation time: {np.mean(times)}s")
@@ -160,7 +178,8 @@ def generate(
tokenized_prompt.attention_mask,
min_length=min_length,
max_length=max_length,
- algorithm='sample')
+ algorithm='sample',
+ device=device)
tokenized_output.append(res)
out_json = []
@@ -216,7 +235,8 @@ def main(flags):
min_length=min_length,
max_length=max_length,
prompt_file=prompt_file,
- benchmark_mode=flags.benchmark_mode
+ benchmark_mode=flags.benchmark_mode,
+ device=flags.device
)
@@ -241,6 +261,11 @@ def main(flags):
type=int,
default=10
)
+ parser.add_argument('--device',
+ type=str,
+ required=False,
+ default='cpu',
+ help='Choose run on cpu or xpu, default is cpu.')
FLAGS = parser.parse_args()
main(FLAGS)
diff --git a/src/transformers_xpu.patch b/src/transformers_xpu.patch
new file mode 100644
index 0000000..57d5a30
--- /dev/null
+++ b/src/transformers_xpu.patch
@@ -0,0 +1,20 @@
+--- a.py 2023-04-04 11:50:27.000000000 +0000
++++ b.py 2023-04-04 11:52:00.000000000 +0000
+@@ -53,6 +53,7 @@
+ requires_backends,
+ )
+
++import intel_extension_for_pytorch
+
+ if is_torch_available():
+ import torch
+@@ -1466,6 +1467,8 @@
+ torch.distributed.init_process_group(
+ backend=self.xpu_backend, rank=rank, world_size=size, timeout=self.ddp_timeout_delta
+ )
++ elif torch.xpu.is_available():
++ device = torch.device("xpu")
+ elif is_torch_tpu_available():
+ device = xm.xla_device()
+ self._n_gpu = 0
+~