From 255e889950a325df9d2173fa3cd1e93d95f1f006 Mon Sep 17 00:00:00 2001 From: Rishin Raj Date: Tue, 25 Nov 2025 08:57:30 +0000 Subject: [PATCH 1/6] Added installation guide for installing release branches Signed-off-by: Rishin Raj --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8972e5b56..86fd7d2fb 100644 --- a/README.md +++ b/README.md @@ -93,9 +93,13 @@ python3.10 -m venv qeff_env source qeff_env/bin/activate pip install -U pip -# Clone and Install the QEfficient Repo. +# Clone and Install the QEfficient Repo (mainline). pip install git+https://github.com/quic/efficient-transformers +# Install a specific branch, tag or commit by appending @ref +# Release branch (e.g., release/1.20): +pip install "git+https://github.com/quic/efficient-transformers@release/v1.20.0" + # Or build wheel package using the below command. pip install build wheel python -m build --wheel --outdir dist From 003bb459ec28a018a1e0ef5f5197debb1e4e0832 Mon Sep 17 00:00:00 2001 From: Rishin Raj Date: Tue, 25 Nov 2025 09:00:26 +0000 Subject: [PATCH 2/6] Updated comment Signed-off-by: Rishin Raj --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 86fd7d2fb..edb67a6fb 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ pip install -U pip pip install git+https://github.com/quic/efficient-transformers # Install a specific branch, tag or commit by appending @ref -# Release branch (e.g., release/1.20): +# Release branch (e.g., release/v1.20.0): pip install "git+https://github.com/quic/efficient-transformers@release/v1.20.0" # Or build wheel package using the below command. From f8af5161737edc38417a7fa20bb40e4735267603 Mon Sep 17 00:00:00 2001 From: Abukhoyer Shaik Date: Tue, 25 Nov 2025 09:33:22 +0000 Subject: [PATCH 3/6] Adding an example for local model Signed-off-by: Abukhoyer Shaik --- docs/source/quick_start.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index 9358f9c4a..3221d2c30 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -220,5 +220,19 @@ Benchmark the model on Cloud AI 100, run the infer API to print tokens and tok/s # We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach tokenizer = AutoTokenizer.from_pretrained(model_name) qeff_model.generate(prompts=["My name is"],tokenizer=tokenizer) +``` + +### Complete Example +If the model and tokenizer are already downloaded, we can directly load them from local path. + +```python +from QEfficient import QEFFAutoModelForCausalLM +from transformers import AutoTokenizer + +model = QEFFAutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="~/.cache/huggingface/hub/models--gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e") +model.compile(num_cores=16) +tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="~/.cache/huggingface/hub/models--gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e/") +model.generate(prompts=["Hi there!!"], tokenizer=tokenizer) + ``` End to End demo examples for various models are available in [**notebooks**](https://github.com/quic/efficient-transformers/tree/main/notebooks) directory. Please check them out. From 9363a6d2773fdaa796fe366b408a2cd4c1fe0164 Mon Sep 17 00:00:00 2001 From: Abukhoyer Shaik Date: Tue, 25 Nov 2025 09:58:03 +0000 Subject: [PATCH 4/6] Adding an example for local model Signed-off-by: Abukhoyer Shaik --- docs/source/quick_start.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index 3221d2c30..cc75240cf 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -222,16 +222,17 @@ tokenizer = AutoTokenizer.from_pretrained(model_name) qeff_model.generate(prompts=["My name is"],tokenizer=tokenizer) ``` -### Complete Example +### Local Model Execution If the model and tokenizer are already downloaded, we can directly load them from local path. ```python from QEfficient import QEFFAutoModelForCausalLM from transformers import AutoTokenizer -model = QEFFAutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="~/.cache/huggingface/hub/models--gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e") +local_model_repo = "~/.cache/huggingface/hub/models--gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e" +model = QEFFAutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=local_model_repo) model.compile(num_cores=16) -tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="~/.cache/huggingface/hub/models--gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e/") +tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=local_model_repo) model.generate(prompts=["Hi there!!"], tokenizer=tokenizer) ``` From f3c7d945ae60513338cbb5c4abfb946d03247fec Mon Sep 17 00:00:00 2001 From: Abukhoyer Shaik Date: Tue, 25 Nov 2025 10:07:13 +0000 Subject: [PATCH 5/6] Adding an example for local model Signed-off-by: Abukhoyer Shaik --- docs/source/quick_start.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index cc75240cf..f15d8de2f 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -229,11 +229,18 @@ If the model and tokenizer are already downloaded, we can directly load them fro from QEfficient import QEFFAutoModelForCausalLM from transformers import AutoTokenizer +# Local path to the downloaded model. You can find downloaded HF models in: +# - Default location: ~/.cache/huggingface/hub/models--{model_name}/snapshots/{snapshot_id}/ local_model_repo = "~/.cache/huggingface/hub/models--gpt2/snapshots/607a30d783dfa663caf39e06633721c8d4cfcd7e" + +# Load model from local path model = QEFFAutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=local_model_repo) + model.compile(num_cores=16) + +# Load tokenizer from the same local path tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=local_model_repo) -model.generate(prompts=["Hi there!!"], tokenizer=tokenizer) +model.generate(prompts=["Hi there!!"], tokenizer=tokenizer) ``` End to End demo examples for various models are available in [**notebooks**](https://github.com/quic/efficient-transformers/tree/main/notebooks) directory. Please check them out. From 4cb289a6252d0337e6ddb9519f42ed3da2fe8ddd Mon Sep 17 00:00:00 2001 From: Rishin Raj Date: Wed, 26 Nov 2025 08:53:33 +0000 Subject: [PATCH 6/6] Minor redme update Signed-off-by: Rishin Raj --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index edb67a6fb..cb6f32382 100644 --- a/README.md +++ b/README.md @@ -93,10 +93,10 @@ python3.10 -m venv qeff_env source qeff_env/bin/activate pip install -U pip -# Clone and Install the QEfficient Repo (mainline). +# Clone and Install the QEfficient repository from the mainline branch pip install git+https://github.com/quic/efficient-transformers -# Install a specific branch, tag or commit by appending @ref +# Clone and Install the QEfficient repository from a specific branch, tag or commit by appending @ref # Release branch (e.g., release/v1.20.0): pip install "git+https://github.com/quic/efficient-transformers@release/v1.20.0"