Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
1453942
Announcement update for Granite Vision (#474)
qcdipankar Jun 22, 2025
740f7c2
Fixes for mllama (#462)
qcdipankar Jun 23, 2025
61b1445
BugFix: Fix reshape error for llama swiftkv models (#432)
quic-shagun Jun 25, 2025
eff9472
Gemma 3 minor fixes (#476)
quic-akuruvil Jun 25, 2025
77cfb29
Bug fix for spdTransform (#467)
qcdipankar Jun 27, 2025
6c64d35
[QEff. Finetune]: Enabled FT CI tests. (#420)
quic-meetkuma Jul 1, 2025
10fb2ac
Gemma 3 minor fixes (#476) - CPR (#484)
quic-akuruvil Jul 1, 2025
71e554f
Revert "Gemma 3 minor fixes (#476) - CPR" (#485)
quic-hemagnih Jul 1, 2025
d823503
[Docs/Readme]: Main Readme updating for latest news and adding the on…
abukhoy Jul 2, 2025
c5a5c17
QUICKFIX: Removed the redundant breakpoint comment in modeling_llava_…
quic-dhirajku Jul 3, 2025
b90c1ac
MDP hash support (#479)
quic-rishinr Jul 3, 2025
db38927
[QEff Finetune] Adding dataset padding changes (#478)
quic-swatia Jul 4, 2025
6254efe
Fixed QNN data format config issue. (#480)
shubhagr-qc Jul 7, 2025
2ba491d
Corrected Total Inference Time unit (#505)
asmigosw Jul 9, 2025
3aaa2d8
[QEff. Finetune]: Added support to sync gradients across devices duri…
quic-meetkuma Jul 9, 2025
30d1579
[QEff Finetune]: Implement logger for finetuning and enable dumping (…
quic-mamta Jul 9, 2025
432dcf5
Gemma 3 minor fixes (#476) - CPR (#484)
quic-akuruvil Jul 1, 2025
ad6fc66
Revert "Gemma 3 minor fixes (#476) - CPR" (#485)
quic-hemagnih Jul 1, 2025
cab6243
Gemma 3 minor fixes (#476) - CPR (#484)
quic-akuruvil Jul 1, 2025
0101967
Revert "Gemma 3 minor fixes (#476) - CPR" (#485)
quic-hemagnih Jul 1, 2025
5a6a7b7
Create a constant value for MIN_MASKED_ATTN_VALUE
quic-amitraj Jun 6, 2025
1673f30
Value update for mask
quic-amitraj Jun 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 31 additions & 34 deletions QEfficient/base/modeling_qeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import hashlib
import inspect
import json
import logging
import shutil
import subprocess
Expand All @@ -23,7 +22,7 @@
from QEfficient.base.pytorch_transforms import PytorchTransform
from QEfficient.compile.qnn_compiler import compile as qnn_compile
from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils import constants, dump_qconfig
from QEfficient.utils import constants, create_json, dump_qconfig, generate_mdp_partition_config, load_json
from QEfficient.utils.cache import QEFF_HOME, to_hashable

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -269,17 +268,17 @@ def _compile(
specializations=specializations,
custom_io=custom_io,
device_group=list(range(mdp_ts_num_devices)),
num_cores=compiler_options.get("aic_num_cores", 16),
mxfp6=compiler_options.get("mxfp6_matmul", False),
num_cores=compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES),
mxfp6=compiler_options.get("mxfp6_matmul", constants.DEFAULT_AIC_MXPF6_MATMUL),
mxint8=mxint8_kv_cache,
qnn_config=qnn_config,
)

return self.qpc_path

command = constants.COMPILER + [f"-m={onnx_path}"]
if mdp_ts_json_path := compiler_options.pop("mdp_ts_json_path", None):
mdp_ts_num_devices = None

if mdp_ts_json_path := compiler_options.pop("mdp_load_partition_config", None):
command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")

for key, value in compiler_options.items():
Expand All @@ -289,6 +288,17 @@ def _compile(
command.append(option)
continue
command.append(f"{option}={value}")

# Create a dummy mdp_ts_json if mdp-load-partition-config not provided and num_devices > 1
if mdp_ts_json_path is not None:
mdp_ts_json = load_json(str(mdp_ts_json_path))
elif mdp_ts_num_devices > 1:
mdp_ts_json = generate_mdp_partition_config(
mdp_ts_num_devices, compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES)
)
else:
mdp_ts_json = None

compile_hash = hashlib.sha256(to_hashable(command))

if specializations is not None:
Expand All @@ -299,30 +309,37 @@ def _compile(

if num_speculative_tokens:
compile_hash.update(to_hashable({"num_speculative_tokens": num_speculative_tokens}))
# Hash num_devices too, since default value would always be 1.
compile_hash.update(to_hashable(mdp_ts_num_devices))

# Hash the MDP partition config and the number of devices.
compile_hash.update(to_hashable(mdp_ts_json))
compile_hash.update(to_hashable({"mdp_ts_num_devices": mdp_ts_num_devices}))

# Check if already compiled
compile_hash = compile_hash.hexdigest()[:16]
compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
qpc_path = compile_dir / "qpc"
qpc_path.mkdir(parents=True, exist_ok=True)

if qpc_path.is_dir():
if (qpc_path / "programqpc.bin").is_file():
self.qpc_path = qpc_path
return qpc_path
# Probably compilation failure last time, delete directory to start over
shutil.rmtree(qpc_path)

# write the MDP partition config file if not provided
if mdp_ts_json is not None:
mdp_ts_json_path = compile_dir / f"mdp_ts_{mdp_ts_num_devices}.json"
create_json(str(mdp_ts_json_path), mdp_ts_json)
command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")

# Write specializations.json file
if specializations is not None:
specializations_json = compile_dir / "specializations.json"
with open(specializations_json, "w") as fp:
json.dump(
{"specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]},
fp,
indent=4,
)
specializations_data = {
"specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]
}
create_json(str(specializations_json), specializations_data)
command.append(f"-network-specialization-config={specializations_json}")

# Write custom_io.yaml file
Expand All @@ -333,26 +350,6 @@ def _compile(
fp.write(f" - IOName: {io_name}\n Precision: {dtype}\n\n")
command.append(f"-custom-IO-list-file={custom_io_yaml}")

# Write mdp_config.json file
if not mdp_ts_json_path and mdp_ts_num_devices > 1:
num_cores = compiler_options.get("aic_num_cores", 16)
mdp_ts_json = compile_dir / f"mdp_ts_{mdp_ts_num_devices}.json"
with open(mdp_ts_json, "w") as fp:
json.dump(
{
"connections": [{"devices": list(range(mdp_ts_num_devices)), "type": "p2p"}],
"partitions": [
{
"name": "Partition0",
"devices": [{"deviceId": d, "numCores": num_cores} for d in range(mdp_ts_num_devices)],
}
],
},
fp,
indent=4,
)
command.append(f"-mdp-load-partition-config={mdp_ts_json}")

command.append(f"-aic-binary-dir={qpc_path}")
logger.info(f"Running compiler: {' '.join(command)}")
try:
Expand Down
48 changes: 29 additions & 19 deletions QEfficient/cloud/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#
# -----------------------------------------------------------------------------

import logging
import random
import warnings
from typing import Any, Dict, Optional, Union
Expand All @@ -17,7 +18,7 @@
import torch.utils.data
from peft import PeftModel, get_peft_model
from torch.optim.lr_scheduler import StepLR
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer

from QEfficient.finetune.configs.training import TrainConfig
from QEfficient.finetune.utils.config_utils import (
Expand All @@ -26,18 +27,22 @@
update_config,
)
from QEfficient.finetune.utils.dataset_utils import get_dataloader
from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.finetune.utils.parser import get_finetune_parser
from QEfficient.finetune.utils.train_utils import get_longest_seq_length, print_model_size, train
from QEfficient.utils._utils import login_and_download_hf_lm
from QEfficient.finetune.utils.train_utils import (
get_longest_seq_length,
print_model_size,
print_trainable_parameters,
train,
)
from QEfficient.utils._utils import hf_download

# Try importing QAIC-specific module, proceed without it if unavailable
try:
import torch_qaic # noqa: F401
except ImportError as e:
print(f"Warning: {e}. Proceeding without QAIC modules.")

logger.log_rank_zero(f"{e}. Moving ahead without these qaic modules.", logging.WARNING)

from transformers import AutoModelForSequenceClassification

# Suppress all warnings
warnings.filterwarnings("ignore")
Expand Down Expand Up @@ -106,7 +111,8 @@ def load_model_and_tokenizer(
- Resizes model embeddings if tokenizer vocab size exceeds model embedding size.
- Sets pad_token_id to eos_token_id if not defined in the tokenizer.
"""
pretrained_model_path = login_and_download_hf_lm(train_config.model_name)
logger.log_rank_zero(f"Loading HuggingFace model for {train_config.model_name}")
pretrained_model_path = hf_download(train_config.model_name)
if train_config.task_type == "seq_classification":
model = AutoModelForSequenceClassification.from_pretrained(
pretrained_model_path,
Expand All @@ -116,7 +122,7 @@ def load_model_and_tokenizer(
)

if not hasattr(model, "base_model_prefix"):
raise RuntimeError("Given huggingface model does not have 'base_model_prefix' attribute.")
logger.raise_error("Given huggingface model does not have 'base_model_prefix' attribute.", RuntimeError)

for param in getattr(model, model.base_model_prefix).parameters():
param.requires_grad = False
Expand All @@ -141,11 +147,10 @@ def load_model_and_tokenizer(
# If there is a mismatch between tokenizer vocab size and embedding matrix,
# throw a warning and then expand the embedding matrix
if len(tokenizer) > model.get_input_embeddings().weight.shape[0]:
print("WARNING: Resizing embedding matrix to match tokenizer vocab size.")
logger.log_rank_zero("Resizing the embedding matrix to match the tokenizer vocab size.", logging.WARNING)
model.resize_token_embeddings(len(tokenizer))

# FIXME (Meet): Cover below line inside the logger once it is implemented.
print_model_size(model, train_config)
print_model_size(model)

# Note: Need to call this before calling PeftModel.from_pretrained or get_peft_model.
# Because, both makes model.is_gradient_checkpointing = True which is used in peft library to
Expand All @@ -157,7 +162,9 @@ def load_model_and_tokenizer(
if hasattr(model, "supports_gradient_checkpointing") and model.supports_gradient_checkpointing:
model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"preserve_rng_state": False})
else:
raise RuntimeError("Given model doesn't support gradient checkpointing. Please disable it and run it.")
logger.raise_error(
"Given model doesn't support gradient checkpointing. Please disable it and run it.", RuntimeError
)

model = apply_peft(model, train_config, peft_config_file, **kwargs)

Expand Down Expand Up @@ -192,7 +199,7 @@ def apply_peft(
else:
peft_config = generate_peft_config(train_config, peft_config_file, **kwargs)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
print_trainable_parameters(model)

return model

Expand All @@ -217,25 +224,26 @@ def setup_dataloaders(
- Length of longest sequence in the dataset.

Raises:
ValueError: If validation is enabled but the validation set is too small.
RuntimeError: If validation is enabled but the validation set is too small.

Notes:
- Applies a custom data collator if provided by get_custom_data_collator.
- Configures DataLoader kwargs using get_dataloader_kwargs for train and val splits.
"""

train_dataloader = get_dataloader(tokenizer, dataset_config, train_config, split="train")
print(f"--> Num of Training Set Batches loaded = {len(train_dataloader)}")
logger.log_rank_zero(f"Number of Training Set Batches loaded = {len(train_dataloader)}")

eval_dataloader = None
if train_config.run_validation:
eval_dataloader = get_dataloader(tokenizer, dataset_config, train_config, split="val")
if len(eval_dataloader) == 0:
raise ValueError(
f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})"
logger.raise_error(
f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})",
ValueError,
)
else:
print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
logger.log_rank_zero(f"Number of Validation Set Batches loaded = {len(eval_dataloader)}")

longest_seq_length, _ = get_longest_seq_length(
torch.utils.data.ConcatDataset([train_dataloader.dataset, eval_dataloader.dataset])
Expand Down Expand Up @@ -274,13 +282,15 @@ def main(peft_config_file: str = None, **kwargs) -> None:
dataset_config = generate_dataset_config(train_config.dataset)
update_config(dataset_config, **kwargs)

logger.prepare_for_logs(train_config.output_dir, train_config.dump_logs, train_config.log_level)

setup_distributed_training(train_config)
setup_seeds(train_config.seed)
model, tokenizer = load_model_and_tokenizer(train_config, dataset_config, peft_config_file, **kwargs)

# Create DataLoaders for the training and validation dataset
train_dataloader, eval_dataloader, longest_seq_length = setup_dataloaders(train_config, dataset_config, tokenizer)
print(
logger.log_rank_zero(
f"The longest sequence length in the train data is {longest_seq_length}, "
f"passed context length is {train_config.context_length} and overall model's context length is "
f"{model.config.max_position_embeddings}"
Expand Down
1 change: 0 additions & 1 deletion QEfficient/exporter/export_hf_to_cloud_ai_100.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ def export_bertstyle_model_to_onnx(model_name, model, tokenizer, onnx_dir_path,
)

# Generate inputFiles
# todo(ochougul):rename to bert_style_input_list.txt
input_list_file = os.path.join(onnx_dir_path, "input_list.txt")
generate_input_files(
input_files_path=os.path.join(onnx_dir_path, "inputFiles"),
Expand Down
4 changes: 0 additions & 4 deletions QEfficient/exporter/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,6 @@ def fix_onnx_fp16(
:str: Updated base name of exported ONNX model.
"""
model = onnx.load(os.path.join(gen_models_path, f"{model_base_name}.onnx"))
# TODO: Remove this `fix_onnx_fp16` function and replace with this transform
# as we're not utilizing the validations done in this function
model, fp16_fix = FP16ClipTransform.apply(model, onnx_base_dir=gen_models_path)

if fp16_fix:
Expand Down Expand Up @@ -256,8 +254,6 @@ def fix_onnx_fp16(
if ort_outputs is not None:
for oname, orto, ortof in zip(output_names, ort_outputs, ort_outputs_fixed):
fix_diff = np.abs(orto.astype(np.float32) - ortof.astype(np.float32)).max()
# TODO: need to the debug this
# info(oname, fix_diff)
close_outputs.append(fix_diff < 1e-5)
else:
info("No constants out of FP16 range")
Expand Down
5 changes: 4 additions & 1 deletion QEfficient/finetune/configs/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#
# -----------------------------------------------------------------------------

import logging
from dataclasses import dataclass


Expand Down Expand Up @@ -94,5 +95,7 @@ class TrainConfig:
use_profiler: bool = False # Enable pytorch profiler, can not be used with flop counter at the same time.
# profiler_dir: str = "PATH/to/save/profiler/results" # will be used if using profiler

dump_root_dir: str = "mismatches/step_"
opByOpVerifier: bool = False

dump_logs: bool = True
log_level: str = logging.INFO
16 changes: 10 additions & 6 deletions QEfficient/finetune/data/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
# SPDX-License-Identifier: BSD-3-Clause
#
# -----------------------------------------------------------------------------

import random
from itertools import islice

import numpy as np
import torch


Expand All @@ -22,14 +20,14 @@ def __init__(self, data_source, batch_size: int, drop_last: bool, shuffle: bool
self.batch_size = batch_size
self.drop_last = drop_last
self.shuffle = shuffle
self.data_source = data_source

def __iter__(self):
ids = np.argsort(self.lengths, kind="mergesort")
ids = list(range(len(self.data_source)))
if self.drop_last:
ids = ids[: len(ids) // self.batch_size * self.batch_size]

batches = [ids[i : i + self.batch_size] for i in range(0, len(ids), self.batch_size)]

if self.shuffle:
random.shuffle(batches)

Expand All @@ -45,11 +43,17 @@ def __len__(self):

class DistributedLengthBasedBatchSampler(torch.utils.data.BatchSampler):
def __init__(
self, data_source, batch_size: int, num_replicas: int, rank: int, shuffle: bool = True, seed: int = 0
self,
data_source,
batch_size: int,
num_replicas: int,
rank: int,
shuffle: bool = True,
seed: int = 0,
) -> None:
random.seed(seed)
self.batch_sampler = LengthBasedBatchSampler(
data_source, batch_size=batch_size, drop_last=True, shuffle=shuffle
data_source, batch_size=batch_size, drop_last=False, shuffle=shuffle
)
self.num_replicas = num_replicas
self.rank = rank
Expand Down
10 changes: 9 additions & 1 deletion QEfficient/finetune/dataset/alpaca_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import torch
from torch.utils.data import Dataset

from QEfficient.finetune.utils.logging_utils import logger

PROMPT_DICT = {
"prompt_input": (
"Below is an instruction that describes a task, paired with an input that provides further context. "
Expand All @@ -27,7 +29,13 @@

class InstructionDataset(Dataset):
def __init__(self, dataset_config, tokenizer, partition="train", context_length=None):
self.ann = json.load(open(dataset_config.data_path))
try:
self.ann = json.load(open(dataset_config.data_path))
except FileNotFoundError:
logger.raise_error(
"Loading of alpaca dataset failed! Please use (wget -c https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/refs/heads/main/alpaca_data.json -P dataset/) to download the alpaca dataset.",
FileNotFoundError,
)
# Use 5% of the dataset for evaluation
eval_length = int(len(self.ann) / 20)
if partition == "train":
Expand Down
Loading
Loading