Skip to content

Commit

Permalink
optionally convert ov tokenizer if old cached model without them used… (
Browse files Browse the repository at this point in the history
#2136)

… on prev notebook version
  • Loading branch information
eaidova committed Jun 20, 2024
1 parent b4acdf9 commit 07673f4
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 213 deletions.
224 changes: 13 additions & 211 deletions notebooks/llm-chatbot/llm-chatbot-generate-api.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,8 @@
}
],
"source": [
"from transformers import AutoTokenizer\n",
"from openvino_tokenizers import convert_tokenizer\n",
"from openvino_genai import LLMPipeline\n",
"\n",
"if model_to_run.value == \"INT4\":\n",
Expand All @@ -847,7 +849,15 @@
" model_dir = fp16_model_dir\n",
"print(f\"Loading model from {model_dir}\\n\")\n",
"\n",
"pipe = LLMPipeline(model_dir.as_posix(), device.value)\n",
"# optionally convert tokenizer if used cached model without it\n",
"if not (model_dir / \"openvino_tokenizer.xml\").exists() or not (model_dir / \"openvino_detokenizer.xml\").exists():\n",
" hf_tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)\n",
" ov_tokenizer, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True)\n",
" ov.save_model(ov_tokenizer, model_dir / \"openvino_tokenizer.xml\")\n",
" ov.save_model(ov_tokenizer, model_dir / \"openvino_detokenizer.xml\")\n",
"\n",
"\n",
"pipe = LLMPipeline(model_dir, device.value)\n",
"print(pipe.generate(\"The Sun is yellow bacause\", temperature=1.2, top_k=4, do_sample=True, max_new_tokens=50))"
]
},
Expand Down Expand Up @@ -1288,7 +1298,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.8.10"
},
"openvino_notebooks": {
"imageUrl": "https://user-images.githubusercontent.com/29454499/255799218-611e7189-8979-4ef5-8a80-5a75e0136b50.png",
Expand All @@ -1309,215 +1319,7 @@
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {
"0de749e5d4e64b92943cc93b20c238b2": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "DropdownModel",
"state": {
"_options_labels": [
"INT4"
],
"description": "Model to run:",
"index": 0,
"layout": "IPY_MODEL_ba5fdb6d60124e4b8f00dc458f7a1d11",
"style": "IPY_MODEL_59113cf6827543a999b56c76edebf0b1"
}
},
"14503a6b4ebe400f83694f3adeac7c48": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "CheckboxModel",
"state": {
"description": "Prepare INT8 model",
"disabled": false,
"layout": "IPY_MODEL_84cd116b746d409098b56b72dd959d48",
"style": "IPY_MODEL_16d5352d69c849c8b85a3c7178ab6199",
"value": false
}
},
"16d5352d69c849c8b85a3c7178ab6199": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "CheckboxStyleModel",
"state": {
"description_width": ""
}
},
"1ab4ca29d79b458c98010115414b9ea3": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "DropdownModel",
"state": {
"_options_labels": [
"English",
"Chinese",
"Japanese"
],
"description": "Model Language:",
"index": 0,
"layout": "IPY_MODEL_7ef98cb657884940af5cd4dea9aa1a62",
"style": "IPY_MODEL_7aa62f9c9b984160b4c9d1193c7f4371"
}
},
"2298fa15b3ba4b31995ea4ac45f81b23": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "DescriptionStyleModel",
"state": {
"description_width": ""
}
},
"2ccf31d1aba14054b4d2f80d96ff6e5b": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "CheckboxModel",
"state": {
"description": "Prepare INT4 model",
"disabled": false,
"layout": "IPY_MODEL_b9e80ac2c6bf4ee4a85ce1c7ff5144bb",
"style": "IPY_MODEL_c3ec8182c1cb42edb196c3c1400d5385",
"value": true
}
},
"47319ba150c74d07aa2ae3274e74222f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "DescriptionStyleModel",
"state": {
"description_width": ""
}
},
"55263ba3216c4d64925fabeb1b20f10c": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "CheckboxStyleModel",
"state": {
"description_width": ""
}
},
"59113cf6827543a999b56c76edebf0b1": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "DescriptionStyleModel",
"state": {
"description_width": ""
}
},
"76522c605b774adb8f0414d32210a16e": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "2.0.0",
"model_name": "LayoutModel",
"state": {}
},
"7aa62f9c9b984160b4c9d1193c7f4371": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "DescriptionStyleModel",
"state": {
"description_width": ""
}
},
"7ef98cb657884940af5cd4dea9aa1a62": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "2.0.0",
"model_name": "LayoutModel",
"state": {}
},
"84cd116b746d409098b56b72dd959d48": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "2.0.0",
"model_name": "LayoutModel",
"state": {}
},
"8dbc93b040454d9a81b4159e814e3df2": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "2.0.0",
"model_name": "LayoutModel",
"state": {}
},
"b9e80ac2c6bf4ee4a85ce1c7ff5144bb": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "2.0.0",
"model_name": "LayoutModel",
"state": {}
},
"ba5fdb6d60124e4b8f00dc458f7a1d11": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "2.0.0",
"model_name": "LayoutModel",
"state": {}
},
"c3ec8182c1cb42edb196c3c1400d5385": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "CheckboxStyleModel",
"state": {
"description_width": ""
}
},
"cbbb9403b1c4468b9bbb72f8c0e2edb0": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "2.0.0",
"model_name": "LayoutModel",
"state": {}
},
"df2ccc14f50c4b9fa2237f2ccf9815cf": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "DropdownModel",
"state": {
"_options_labels": [
"tiny-llama-1b-chat",
"gemma-2b-it",
"phi-3-mini-instruct",
"red-pajama-3b-chat",
"qwen2-7b-instruct",
"gemma-7b-it",
"llama-2-chat-7b",
"llama-3-8b-instruct",
"mpt-7b-chat",
"mistral-7b",
"zephyr-7b-beta",
"notus-7b-v1",
"neural-chat-7b-v3-1"
],
"description": "Model:",
"index": 4,
"layout": "IPY_MODEL_8dbc93b040454d9a81b4159e814e3df2",
"style": "IPY_MODEL_47319ba150c74d07aa2ae3274e74222f"
}
},
"e2b4bd49dc504784ba0eb34fc60909e0": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "DropdownModel",
"state": {
"_options_labels": [
"CPU",
"GPU.0",
"GPU.1",
"AUTO"
],
"description": "Device:",
"index": 0,
"layout": "IPY_MODEL_cbbb9403b1c4468b9bbb72f8c0e2edb0",
"style": "IPY_MODEL_2298fa15b3ba4b31995ea4ac45f81b23"
}
},
"f83b96a5108440ada8cd7b16b67c6599": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "2.0.0",
"model_name": "CheckboxModel",
"state": {
"description": "Prepare FP16 model",
"disabled": false,
"layout": "IPY_MODEL_76522c605b774adb8f0414d32210a16e",
"style": "IPY_MODEL_55263ba3216c4d64925fabeb1b20f10c",
"value": false
}
}
},
"state": {},
"version_major": 2,
"version_minor": 0
}
Expand Down
13 changes: 11 additions & 2 deletions notebooks/llm-question-answering/llm-question-answering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -543,13 +543,22 @@
}
],
"source": [
"from transformers import AutoTokenizer\n",
"\n",
"if model_to_run.value == \"INT4\":\n",
" model_dir = int4_model_dir\n",
"elif model_to_run.value == \"INT8\":\n",
" model_dir = int8_model_dir\n",
"else:\n",
" model_dir = fp16_model_dir\n",
"print(f\"Loading model from {model_dir}\")"
"print(f\"Loading model from {model_dir}\")\n",
"\n",
"# optionally convert tokenizer if used cached model without it\n",
"if not (model_dir / \"openvino_tokenizer.xml\").exists() or not (model_dir / \"openvino_detokenizer.xml\").exists():\n",
" hf_tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)\n",
" ov_tokenizer, ov_detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True)\n",
" ov.save_model(ov_tokenizer, model_dir / \"openvino_tokenizer.xml\")\n",
" ov.save_model(ov_tokenizer, model_dir / \"openvino_detokenizer.xml\")"
]
},
{
Expand Down Expand Up @@ -993,7 +1002,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.8.10"
},
"openvino_notebooks": {
"imageUrl": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/daafd702-5a42-4f54-ae72-2e4480d73501",
Expand Down

0 comments on commit 07673f4

Please sign in to comment.