Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Failed to export NNCF QAT model for Ultralytics YOLOv8 #2846

Closed
quinnZE opened this issue Jul 27, 2024 · 5 comments
Closed

Failed to export NNCF QAT model for Ultralytics YOLOv8 #2846

quinnZE opened this issue Jul 27, 2024 · 5 comments
Labels
bug Something isn't working

Comments

@quinnZE
Copy link

quinnZE commented Jul 27, 2024

🐛 Describe the bug

I am currently attempting to train and export an Ultralytics YOLOv8 Large model using NNCF QAT. I used the code referenced in issue 2486 to implement training and exporting.

from copy import deepcopy
from datetime import datetime
from typing import Any, Dict, Tuple

import torch
from znt.ultralytics.models.yolo.detect import DetectionTrainer
from znt.ultralytics.utils import DEFAULT_CFG
from znt.ultralytics.utils import LOGGER
from znt.ultralytics.utils import RANK
from znt.ultralytics.utils import __version__
from znt.ultralytics.utils.torch_utils import de_parallel
from znt.ultralytics.utils.torch_utils import strip_optimizer
from znt.ultralytics.cfg import get_znt_exp_config_overrides

import nncf
from nncf import NNCFConfig
from nncf.torch import create_compressed_model
from nncf.torch import register_default_init_args
from nncf.torch.dynamic_graph.io_handling import nncf_model_input
from nncf.torch.initialization import PTInitializingDataLoader
from nncf.torch.model_creation import is_wrapped_model

import openvino as ov

import torch
from ultralytics.utils.torch_utils import get_latest_opset

# 1 integration issue:
# MyInitializingDataLoader must support deep copy because DetectionTrainer does a deep copy
# of the model and MyInitializingDataLoader during training setup. The input data_loader
# of ultralytics.data.build.InfiniteDataLoader type does not support deep copy and
# can not be used directly into MyInitializingDataLoader. The workaround for this limitation is
# to create a deepcopable dataset from the data_loader.
class MyInitializingDataLoader(PTInitializingDataLoader):
    def __init__(self, data_loader, preprocess_batch_fn, num_samples=300):
        super().__init__(data_loader)
        self._batch_size = self._data_loader.batch_size
        # Using list of images instead of 'ultralytics.data.build.InfiniteDataLoader' to support deepcopy.
        self._data_loader = []
        num_samples = num_samples / self._batch_size
        for count, data_item in enumerate(data_loader):
            if count > num_samples:
                break
            batch = preprocess_batch_fn(data_item)
            self._data_loader.append((batch["img"], None))

    @property
    def batch_size(self):
        return self._batch_size

    def get_inputs(self, dataloader_output: Any) -> Tuple[Tuple, Dict]:
        # your implementation - `dataloader_output` is what is returned by your dataloader,
        # and you have to turn it into a (args, kwargs) tuple that is required by your model
        # in this function, for instance, if your dataloader returns dictionaries where
        # the input image is under key `"img"`, and your YOLOv8 model accepts the input
        # images as 0-th `forward` positional arg, you would do:
        return (dataloader_output[0],), {}

    def get_target(self, dataloader_output: Any) -> Any:
        # and in this function you should extract the "ground truth" value from your
        # dataloader, so, for instance, if your dataloader output is a dictionary where
        # ground truth images are under a "gt" key, then here you would write:
        return dataloader_output[1]


class MyTrainer(DetectionTrainer):
    def __init__(self, nncf_config_dict, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
        super().__init__(cfg, overrides, _callbacks)
        self.nncf_config = NNCFConfig.from_dict(nncf_config_dict)
        self.nncf_dataloader = None

    def setup_model(self):
        ckpt = super().setup_model()

        if not is_wrapped_model(self.model):
            # Make copy of model to support `DetectionTrainer` save/load logic
            self.original_model = deepcopy(self.model)
            if ckpt.get("model_compression_state"):
                self.resume_model_for_qat(ckpt)
            else:
                self.prepare_model_for_qat()
        return ckpt

    def _setup_train(self, world_size):
        super()._setup_train(world_size)
        # Disable EMA for QAT. Using EMA may reduce the accuracy of the model during training.
        if self.ema:
            self.ema.enabled = False

    def get_nncf_dataloader(self):
        if self.nncf_dataloader is None:
            num_samples = self.nncf_config["compression"]["initializer"]["range"]["num_init_samples"]
            train_loader = self.get_dataloader(self.trainset, batch_size=1, rank=RANK, mode="train")
            self.nncf_dataloader = MyInitializingDataLoader(train_loader, self.preprocess_batch, num_samples)
        return self.nncf_dataloader

    def create_wrap_inputs_fn(self):
        # 2 integration issue:
        # NNCF requires the same structure of inputs in the forward function during model training
        # for correct model tracing, but the DetectionModel forward function support image tensor
        # or dict as input:
        # def forward(self, x, *args, **kwargs):
        #     if isinstance(x, dict):  # for cases of training and validating while training.
        #         return self.loss(x, *args, **kwargs)
        #     return self.predict(x, *args, **kwargs)
        # In this case, wrap_inputs_fn should be implemented to specify the "original" model input
        def wrap_inputs_fn(args, kwargs):
            if isinstance(args[0], dict):
                return args, kwargs
            args = (nncf_model_input(args[0]),) + args[1:]
            return args, kwargs

        return wrap_inputs_fn

    def prepare_model_for_qat(self):
        nncf_dataloader = self.get_nncf_dataloader()
        self.nncf_config = register_default_init_args(self.nncf_config, nncf_dataloader)

        self.model = self.model.to(self.device)
        _, self.model = create_compressed_model(
            self.model, self.nncf_config, wrap_inputs_fn=self.create_wrap_inputs_fn()
        )

    def resume_model_for_qat(self, ckpt):
        # 3 integration issue:
        # resume QAT model from the model_compression_state
        _, self.model = create_compressed_model(
            self.model,
            self.nncf_config,
            compression_state=ckpt["model_compression_state"],
            wrap_inputs_fn=self.create_wrap_inputs_fn(),
        )
        self.model.load_state_dict(ckpt["model_state_dict"])

    def save_qat_model(self):
        # 4 integration issue:
        # NNCF QAT model is not picklable. Use state dict instead of model pickling.
        import pandas as pd  # scope for faster startup

        metrics = {**self.metrics, **{"fitness": self.fitness}}
        results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}

        compression_controller = self.model.nncf.compression_controller
        model_compression_state = {}
        if compression_controller is not None:
            model_compression_state = compression_controller.get_compression_state()

        ckpt = {
            "epoch": self.epoch,
            "best_fitness": self.best_fitness,
            "model": deepcopy(de_parallel(self.original_model)).half(),
            "model_state_dict": de_parallel(self.model).state_dict(),
            "model_compression_state": model_compression_state,
            "optimizer": self.optimizer.state_dict(),
            "train_args": vars(self.args),  # save as dict
            "train_metrics": metrics,
            "train_results": results,
            "date": datetime.now().isoformat(),
            "version": __version__,
        }

        # Save last and best
        torch.save(ckpt, self.last)
        if self.best_fitness == self.fitness:
            torch.save(ckpt, self.best)
        if (self.save_period > 0) and (self.epoch > 0) and (self.epoch % self.save_period == 0):
            torch.save(ckpt, self.wdir / f"epoch{self.epoch}.pt")
        del ckpt

    def export_model_to_openvino(self):
        pt_model = self.model
        if is_wrapped_model(pt_model):
            pt_model = nncf.strip(pt_model, do_copy=True)
        nncf_dataloader = self.get_nncf_dataloader()
        example_input = next(iter(nncf_dataloader))[0]
        try:
            ov_model = ov.convert_model(pt_model, example_input=example_input)
        except:
            onnx_model = "model.onnx"
            torch.onnx.export(pt_model, example_input, onnx_model)
            ov_model = ov.convert_model(onnx_model)
        model_path = self.best.split('.pt')[0] + 'openvino.xml'

        ov.save_model(ov_model, model_path, compress_to_fp16=False)


    def final_eval(self):
        """Performs final evaluation and validation for object detection YOLO model."""
        for f in self.last, self.best:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
                if f is self.best:
                    LOGGER.info(f"\nValidating {f}...")
                    self.model = f
                    self.setup_model()
                    self.validator.args.plots = self.args.plots
                    self.metrics = self.validator(model=self.model)
                    self.metrics.pop("fitness", None)
                    self.run_callbacks("on_fit_epoch_end")

    def save_model(self):
        if is_wrapped_model(self.model):
            self.save_qat_model()
        else:
            super().save_model()


def main():
    args = dict(model="yolov8l.pt", data="coco8.yaml", epochs=3, mode="train", verbose=False)
    nncf_config_dict = {
        "input_info": {"sample_size": [1, 3, 960, 960]},
        "compression": {
            "algorithm": "quantization",
            "ignored_scopes": ["{re}/Detect"],  # ignored the post-processing
            "initializer": {"range": {"num_init_samples": 300}},
        },
    }

    network_yaml, training_configs = get_znt_exp_config_overrides(znt_id=9999)
    training_configs['data'] = 'coco8.yaml'
    print(training_configs['data'])
    nncf_trainer = MyTrainer(nncf_config_dict, overrides=training_configs)
    nncf_trainer.train()
    nncf_trainer.export_model_to_openvino()

if __name__ == "__main__":
    main()

The training proceeded normally with the exception that the validation scores remained stagnant regardless of loss changes. However, when attempting to export I received the following.

  File "/home/ze-flyer/ZE/PycharmProjects/znt-4/znt/ultralytics/nncf_tester.py", line 227, in <module>
    main()
  File "/home/ze-flyer/ZE/PycharmProjects/znt-4/znt/ultralytics/nncf_tester.py", line 224, in main
    nncf_trainer.export_model_to_openvino()
  File "/home/ze-flyer/ZE/PycharmProjects/znt-4/znt/ultralytics/nncf_tester.py", line 173, in export_model_to_openvino
    pt_model = nncf.strip(pt_model, do_copy=True)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/common/strip.py", line 38, in strip
    return strip_pt(model, do_copy)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/torch/strip.py", line 25, in strip
    return model.nncf.strip(do_copy)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/torch/nncf_network.py", line 968, in strip
    return self.compression_controller.strip(do_copy)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/api/compression.py", line 266, in strip
    return self.strip_model(self.model, do_copy)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/torch/quantization/algo.py", line 1474, in strip_model
    model = strip_quantized_model(model)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/torch/quantization/strip.py", line 174, in strip_quantized_model
    model = replace_quantizer_to_torch_native_module(model)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/torch/quantization/strip.py", line 45, in replace_quantizer_to_torch_native_module
    nncf_module = model.nncf.get_containing_module(node.node_name)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/torch/nncf_network.py", line 710, in get_containing_module
    return self.get_module_by_scope(scope)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/torch/nncf_network.py", line 695, in get_module_by_scope
    return get_module_by_scope(curr_module, scope)
  File "/home/ze-flyer/anaconda3/envs/znt-4/lib/python3.10/site-packages/nncf/torch/dynamic_graph/scope_access.py", line 30, in get_module_by_scope
    raise nncf.InternalError(
nncf.errors.InternalError: Could not find a bn module member in NNCFBatchNorm2d module of scope DetectionModel/Sequential[model]/Conv[0]/NNCFBatchNorm2d[bn] during node search```

### Environment

requirements.txt:
```about-time==4.2.1
absl-py==2.1.0
aiohttp==3.8.5
aiohttp-cors==0.7.0
aiorwlock==1.3.0
aiosignal==1.3.1
alabaster==0.7.16
alembic==1.12.1
alive-progress==3.1.5
anyio==3.7.0
astroid==3.0.2
async-generator==1.10
async-timeout==4.0.2
attrs==23.1.0
autograd==1.6.2
Babel==2.14.0
bcrypt==4.0.1
blessed==1.20.0
cachetools==5.3.1
certifi==2023.5.7
cffi==1.15.1
charset-normalizer==3.2.0
clean==0.1.4
click==8.1.3
cma==3.2.2
colorama==0.4.6
colorful==0.5.5
colorlog==6.7.0
contourpy==1.2.0
cryptography==41.0.2
cycler==0.12.1
Deprecated==1.2.14
dill==0.3.7
distlib==0.3.7
docker==6.1.3
docutils==0.20.1
ecdsa==0.18.0
exceptiongroup==1.1.1
fastapi==0.96.0
fastapi-login==1.9.0
filelock==3.12.2
fonttools==4.46.0
frozenlist==1.4.0
fsspec==2023.6.0
future==1.0.0
google-api-core==2.11.1
google-auth==2.22.0
google-cloud-core==2.3.3
google-cloud-storage==2.10.0
google-crc32c==1.5.0
google-resumable-media==2.5.0
googleapis-common-protos==1.59.1
gpustat==1.1
GPUtil==1.4.0
grapheme==0.6.0
graphviz==0.20.1
greenlet==2.0.2
grpcio==1.51.3
h11==0.14.0
huggingface-hub==0.23.4
idna==3.4
imagesize==1.4.1
install==1.3.5
isort==5.13.2
Jinja2==3.1.3
joblib==1.3.2
jsonschema==4.18.4
jsonschema-specifications==2023.7.1
jstyleson==0.0.2
kaleido==0.2.1
kiwisolver==1.4.5
Mako==1.2.4
markdown-it-py==3.0.0
MarkupSafe==2.1.3
matplotlib==3.8.2
mccabe==0.7.0
mdurl==0.1.2
mpmath==1.3.0
msgpack==1.0.5
multidict==6.0.4
natsort==8.4.0
networkx==3.3
ninja==1.11.1.1
nncf==2.11.0
numpy==1.25.1
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-ml-py==12.535.77
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.5.40
nvidia-nvtx-cu12==12.1.105
opencensus==0.11.2
opencensus-context==0.1.3
opencv-python==4.8.0.76
opencv-python-headless==4.10.0.82
openvino==2024.2.0
openvino-telemetry==2024.1.0
optuna==3.4.0
outcome==1.2.0
packaging==23.1
pandas==2.0.3
paramiko==3.2.0
passlib==1.7.4
Pillow==10.1.0
platformdirs==3.9.1
plotly==5.18.0
prettytable==3.10.0
prometheus-client==0.17.1
protobuf==4.23.4
psutil==5.9.5
psycopg2-binary==2.9.6
py-cpuinfo==9.0.0
py-spy==0.3.14
pyarrow==12.0.1
pyasn1==0.5.0
pyasn1-modules==0.3.0
pycparser==2.21
pydantic==1.10.8
pydot==3.0.1
Pygments==2.17.2
PyJWT==2.7.0
pylint==3.0.3
pymoo==0.6.1.1
PyNaCl==1.5.0
pyparsing==3.1.1
python-dateutil==2.8.2
python-jose==3.3.0
python-multipart==0.0.6
pytorch-quantization==2.1.3
pytz==2023.3
PyYAML==6.0.1
ray==2.31.0
referencing==0.30.0
requests==2.31.0
rich==13.7.1
rpds-py==0.9.2
rsa==4.9
scikit-learn==1.4.0
scipy==1.12.0
seaborn==0.13.2
six==1.16.0
smart-open==6.3.0
sniffio==1.3.0
snowballstemmer==2.2.0
sortedcontainers==2.4.0
Sphinx==7.2.6
sphinx-rtd-theme==2.0.0
sphinx_glpi_theme==0.6
sphinxcontrib-applehelp==1.0.8
sphinxcontrib-devhelp==1.0.6
sphinxcontrib-htmlhelp==2.0.5
sphinxcontrib-jquery==4.1
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.7
sphinxcontrib-serializinghtml==1.1.10
SQLAlchemy==2.0.30
starlette==0.27.0
sympy==1.12.1
tabulate==0.9.0
tenacity==8.2.3
tensorboardX==2.6.1
termcolor==2.3.0
thop==0.1.1.post2209072238
threadpoolctl==3.2.0
tomli==2.0.1
tomlkit==0.12.3
torch==2.3.1
torchvision==0.18.1
tqdm==4.66.0
trio==0.21.0
triton==2.3.1
typing_extensions==4.6.3
tzdata==2023.3
ultralytics @ git+https://github.com/THU-MIG/yolov10.git@aad320dd80b56694e590c950b25060a134966496
ultralytics-thop==0.2.8
urllib3==1.26.16
uvicorn==0.22.0
virtualenv==20.21.0
wcwidth==0.2.6
websocket-client==1.6.1
wrapt==1.16.0
yarl==1.9.2```

OS Ubuntu 22.04
NVIDIA Driver 550.90.07

### Minimal Reproducible Example

_No response_

### Are you going to submit a PR?

- [ ] Yes I'd like to help by submitting a PR!
@alexsu52
Copy link
Contributor

Hello @quinnZE!

I could not run your code because I faced with the error:

Traceback (most recent call last):
  File "nncf/experiments/yolov8/main.py", line 14, in <module>
    from znt.ultralytics.models.yolo.detect import DetectionTrainer
ModuleNotFoundError: No module named 'znt'

Please provide a working reproducer.

I also checked the code from the issue #2486 with the last NNCF and did not face the provided error.

@quinnZE
Copy link
Author

quinnZE commented Jul 31, 2024

My apologies @alexsu52, I am using a modified repo that forks Ultralytics, causing the import issue. Below I have provided an updated snippet. I will also try to replicate with vanilla Ultralytics on my end and see if the issue appears as it may be due to one of my custom modifications, though I don't believe I have done much to edit the Batch Normalization

from copy import deepcopy
from datetime import datetime
from typing import Any, Dict, Tuple

import torch
from ultralytics.models.yolo.detect import DetectionTrainer
from ultralytics.utils import DEFAULT_CFG
from ultralytics.utils import LOGGER
from ultralytics.utils import RANK
from ultralytics.utils import __version__
from ultralytics.utils.torch_utils import de_parallel
from ultralytics.utils.torch_utils import strip_optimizer
from ultralytics.cfg import get_znt_exp_config_overrides

import nncf
from nncf import NNCFConfig
from nncf.torch import create_compressed_model
from nncf.torch import register_default_init_args
from nncf.torch.dynamic_graph.io_handling import nncf_model_input
from nncf.torch.initialization import PTInitializingDataLoader
from nncf.torch.model_creation import is_wrapped_model

import openvino as ov

import torch
from ultralytics.utils.torch_utils import get_latest_opset

# 1 integration issue:
# MyInitializingDataLoader must support deep copy because DetectionTrainer does a deep copy
# of the model and MyInitializingDataLoader during training setup. The input data_loader
# of ultralytics.data.build.InfiniteDataLoader type does not support deep copy and
# can not be used directly into MyInitializingDataLoader. The workaround for this limitation is
# to create a deepcopable dataset from the data_loader.
class MyInitializingDataLoader(PTInitializingDataLoader):
    def __init__(self, data_loader, preprocess_batch_fn, num_samples=300):
        super().__init__(data_loader)
        self._batch_size = self._data_loader.batch_size
        # Using list of images instead of 'ultralytics.data.build.InfiniteDataLoader' to support deepcopy.
        self._data_loader = []
        num_samples = num_samples / self._batch_size
        for count, data_item in enumerate(data_loader):
            if count > num_samples:
                break
            batch = preprocess_batch_fn(data_item)
            self._data_loader.append((batch["img"], None))

    @property
    def batch_size(self):
        return self._batch_size

    def get_inputs(self, dataloader_output: Any) -> Tuple[Tuple, Dict]:
        # your implementation - `dataloader_output` is what is returned by your dataloader,
        # and you have to turn it into a (args, kwargs) tuple that is required by your model
        # in this function, for instance, if your dataloader returns dictionaries where
        # the input image is under key `"img"`, and your YOLOv8 model accepts the input
        # images as 0-th `forward` positional arg, you would do:
        return (dataloader_output[0],), {}

    def get_target(self, dataloader_output: Any) -> Any:
        # and in this function you should extract the "ground truth" value from your
        # dataloader, so, for instance, if your dataloader output is a dictionary where
        # ground truth images are under a "gt" key, then here you would write:
        return dataloader_output[1]


class MyTrainer(DetectionTrainer):
    def __init__(self, nncf_config_dict, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
        super().__init__(cfg, overrides, _callbacks)
        self.nncf_config = NNCFConfig.from_dict(nncf_config_dict)
        self.nncf_dataloader = None

    def setup_model(self):
        ckpt = super().setup_model()

        if not is_wrapped_model(self.model):
            # Make copy of model to support `DetectionTrainer` save/load logic
            self.original_model = deepcopy(self.model)
            if ckpt.get("model_compression_state"):
                self.resume_model_for_qat(ckpt)
            else:
                self.prepare_model_for_qat()
        return ckpt

    def _setup_train(self, world_size):
        super()._setup_train(world_size)
        # Disable EMA for QAT. Using EMA may reduce the accuracy of the model during training.
        if self.ema:
            self.ema.enabled = False

    def get_nncf_dataloader(self):
        if self.nncf_dataloader is None:
            num_samples = self.nncf_config["compression"]["initializer"]["range"]["num_init_samples"]
            train_loader = self.get_dataloader(self.trainset, batch_size=1, rank=RANK, mode="train")
            self.nncf_dataloader = MyInitializingDataLoader(train_loader, self.preprocess_batch, num_samples)
        return self.nncf_dataloader

    def create_wrap_inputs_fn(self):
        # 2 integration issue:
        # NNCF requires the same structure of inputs in the forward function during model training
        # for correct model tracing, but the DetectionModel forward function support image tensor
        # or dict as input:
        # def forward(self, x, *args, **kwargs):
        #     if isinstance(x, dict):  # for cases of training and validating while training.
        #         return self.loss(x, *args, **kwargs)
        #     return self.predict(x, *args, **kwargs)
        # In this case, wrap_inputs_fn should be implemented to specify the "original" model input
        def wrap_inputs_fn(args, kwargs):
            if isinstance(args[0], dict):
                return args, kwargs
            args = (nncf_model_input(args[0]),) + args[1:]
            return args, kwargs

        return wrap_inputs_fn

    def prepare_model_for_qat(self):
        nncf_dataloader = self.get_nncf_dataloader()
        self.nncf_config = register_default_init_args(self.nncf_config, nncf_dataloader)

        self.model = self.model.to(self.device)
        _, self.model = create_compressed_model(
            self.model, self.nncf_config, wrap_inputs_fn=self.create_wrap_inputs_fn()
        )

    def resume_model_for_qat(self, ckpt):
        # 3 integration issue:
        # resume QAT model from the model_compression_state
        _, self.model = create_compressed_model(
            self.model,
            self.nncf_config,
            compression_state=ckpt["model_compression_state"],
            wrap_inputs_fn=self.create_wrap_inputs_fn(),
        )
        self.model.load_state_dict(ckpt["model_state_dict"])

    def save_qat_model(self):
        # 4 integration issue:
        # NNCF QAT model is not picklable. Use state dict instead of model pickling.
        import pandas as pd  # scope for faster startup

        metrics = {**self.metrics, **{"fitness": self.fitness}}
        results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}

        compression_controller = self.model.nncf.compression_controller
        model_compression_state = {}
        if compression_controller is not None:
            model_compression_state = compression_controller.get_compression_state()

        ckpt = {
            "epoch": self.epoch,
            "best_fitness": self.best_fitness,
            "model": deepcopy(de_parallel(self.original_model)).half(),
            "model_state_dict": de_parallel(self.model).state_dict(),
            "model_compression_state": model_compression_state,
            "optimizer": self.optimizer.state_dict(),
            "train_args": vars(self.args),  # save as dict
            "train_metrics": metrics,
            "train_results": results,
            "date": datetime.now().isoformat(),
            "version": __version__,
        }

        # Save last and best
        torch.save(ckpt, self.last)
        if self.best_fitness == self.fitness:
            torch.save(ckpt, self.best)
        if (self.save_period > 0) and (self.epoch > 0) and (self.epoch % self.save_period == 0):
            torch.save(ckpt, self.wdir / f"epoch{self.epoch}.pt")
        del ckpt

    def export_model_to_openvino(self):
        pt_model = self.model
        if is_wrapped_model(pt_model):
            pt_model = nncf.strip(pt_model, do_copy=True)
        nncf_dataloader = self.get_nncf_dataloader()
        example_input = next(iter(nncf_dataloader))[0]
        try:
            ov_model = ov.convert_model(pt_model, example_input=example_input)
        except:
            onnx_model = "model.onnx"
            torch.onnx.export(pt_model, example_input, onnx_model)
            ov_model = ov.convert_model(onnx_model)
        model_path = self.best.split('.pt')[0] + 'openvino.xml'

        ov.save_model(ov_model, model_path, compress_to_fp16=False)


    def final_eval(self):
        """Performs final evaluation and validation for object detection YOLO model."""
        for f in self.last, self.best:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
                if f is self.best:
                    LOGGER.info(f"\nValidating {f}...")
                    self.model = f
                    self.setup_model()
                    self.validator.args.plots = self.args.plots
                    self.metrics = self.validator(model=self.model)
                    self.metrics.pop("fitness", None)
                    self.run_callbacks("on_fit_epoch_end")

    def save_model(self):
        if is_wrapped_model(self.model):
            self.save_qat_model()
        else:
            super().save_model()


def main():
    args = dict(model="yolov8l.pt", data="coco8.yaml", epochs=3, mode="train", verbose=False)
    nncf_config_dict = {
        "input_info": {"sample_size": [1, 3, 960, 960]},
        "compression": {
            "algorithm": "quantization",
            "ignored_scopes": ["{re}/Detect"],  # ignored the post-processing
            "initializer": {"range": {"num_init_samples": 300}},
        },
    }

    network_yaml, training_configs = get_znt_exp_config_overrides(znt_id=9999)
    training_configs['data'] = 'coco8.yaml'
    print(training_configs['data'])
    nncf_trainer = MyTrainer(nncf_config_dict, overrides=training_configs)
    nncf_trainer.train()
    nncf_trainer.export_model_to_openvino()

if __name__ == "__main__":
    main()

@quinnZE
Copy link
Author

quinnZE commented Aug 5, 2024

@alexsu52 I have updated the code and used vanilla Ultralytics and I am still encountering the same error during export with both the previous versions and the update versions from the original issue:

Versions:
nncf: 2.8.1
torch: 2.1.2
torchvision: 0.16.2
ultralytics: 8.1.11

Here is the complete code and traceback for my most recent run. This should run on any machine installed with the proper packages.

from copy import deepcopy
from datetime import datetime
from typing import Any, Dict, Tuple

import torch
from ultralytics.models.yolo.detect import DetectionTrainer
from ultralytics.utils import DEFAULT_CFG
from ultralytics.utils import LOGGER
from ultralytics.utils import RANK
from ultralytics.utils import __version__
from ultralytics.utils.torch_utils import de_parallel
from ultralytics.utils.torch_utils import strip_optimizer

import nncf
from nncf import NNCFConfig
from nncf.torch import create_compressed_model
from nncf.torch import register_default_init_args
from nncf.torch.dynamic_graph.io_handling import nncf_model_input
from nncf.torch.initialization import PTInitializingDataLoader
from nncf.torch.model_creation import is_wrapped_model

import openvino as ov

# 1 integration issue:
# MyInitializingDataLoader must support deep copy because DetectionTrainer does a deep copy
# of the model and MyInitializingDataLoader during training setup. The input data_loader
# of ultralytics.data.build.InfiniteDataLoader type does not support deep copy and
# can not be used directly into MyInitializingDataLoader. The workaround for this limitation is
# to create a deepcopable dataset from the data_loader.
class MyInitializingDataLoader(PTInitializingDataLoader):
    def __init__(self, data_loader, preprocess_batch_fn, num_samples=300):
        super().__init__(data_loader)
        self._batch_size = self._data_loader.batch_size
        # Using list of images instead of 'ultralytics.data.build.InfiniteDataLoader' to support deepcopy.
        self._data_loader = []
        num_samples = num_samples / self._batch_size
        for count, data_item in enumerate(data_loader):
            if count > num_samples:
                break
            batch = preprocess_batch_fn(data_item)
            self._data_loader.append((batch["img"], None))

    @property
    def batch_size(self):
        return self._batch_size

    def get_inputs(self, dataloader_output: Any) -> Tuple[Tuple, Dict]:
        # your implementation - `dataloader_output` is what is returned by your dataloader,
        # and you have to turn it into a (args, kwargs) tuple that is required by your model
        # in this function, for instance, if your dataloader returns dictionaries where
        # the input image is under key `"img"`, and your YOLOv8 model accepts the input
        # images as 0-th `forward` positional arg, you would do:
        return (dataloader_output[0],), {}

    def get_target(self, dataloader_output: Any) -> Any:
        # and in this function you should extract the "ground truth" value from your
        # dataloader, so, for instance, if your dataloader output is a dictionary where
        # ground truth images are under a "gt" key, then here you would write:
        return dataloader_output[1]


class MyTrainer(DetectionTrainer):
    def __init__(self, nncf_config_dict, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
        super().__init__(cfg, overrides, _callbacks)
        self.nncf_config = NNCFConfig.from_dict(nncf_config_dict)
        self.nncf_dataloader = None

    def setup_model(self):
        ckpt = super().setup_model()

        if not is_wrapped_model(self.model):
            # Make copy of model to support `DetectionTrainer` save/load logic
            self.original_model = deepcopy(self.model)
            if ckpt.get("model_compression_state"):
                self.resume_model_for_qat(ckpt)
            else:
                self.prepare_model_for_qat()
        return ckpt

    def _setup_train(self, world_size):
        super()._setup_train(world_size)
        # Disable EMA for QAT. Using EMA may reduce the accuracy of the model during training.
        if self.ema:
            self.ema.enabled = False

    def get_nncf_dataloader(self):
        if self.nncf_dataloader is None:
            num_samples = self.nncf_config["compression"]["initializer"]["range"]["num_init_samples"]
            train_loader = self.get_dataloader(self.trainset, batch_size=1, rank=RANK, mode="train")
            self.nncf_dataloader = MyInitializingDataLoader(train_loader, self.preprocess_batch, num_samples)
        return self.nncf_dataloader

    def create_wrap_inputs_fn(self):
        # 2 integration issue:
        # NNCF requires the same structure of inputs in the forward function during model training
        # for correct model tracing, but the DetectionModel forward function support image tensor
        # or dict as input:
        # def forward(self, x, *args, **kwargs):
        #     if isinstance(x, dict):  # for cases of training and validating while training.
        #         return self.loss(x, *args, **kwargs)
        #     return self.predict(x, *args, **kwargs)
        # In this case, wrap_inputs_fn should be implemented to specify the "original" model input
        def wrap_inputs_fn(args, kwargs):
            if isinstance(args[0], dict):
                return args, kwargs
            args = (nncf_model_input(args[0]),) + args[1:]
            return args, kwargs

        return wrap_inputs_fn

    def prepare_model_for_qat(self):
        nncf_dataloader = self.get_nncf_dataloader()
        self.nncf_config = register_default_init_args(self.nncf_config, nncf_dataloader)

        self.model = self.model.to(self.device)
        _, self.model = create_compressed_model(
            self.model, self.nncf_config, wrap_inputs_fn=self.create_wrap_inputs_fn()
        )

    def resume_model_for_qat(self, ckpt):
        # 3 integration issue:
        # resume QAT model from the model_compression_state
        _, self.model = create_compressed_model(
            self.model,
            self.nncf_config,
            compression_state=ckpt["model_compression_state"],
            wrap_inputs_fn=self.create_wrap_inputs_fn(),
        )
        self.model.load_state_dict(ckpt["model_state_dict"])

    def save_qat_model(self):
        # 4 integration issue:
        # NNCF QAT model is not picklable. Use state dict instead of model pickling.
        import pandas as pd  # scope for faster startup

        metrics = {**self.metrics, **{"fitness": self.fitness}}
        results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}

        compression_controller = self.model.nncf.compression_controller
        model_compression_state = {}
        if compression_controller is not None:
            model_compression_state = compression_controller.get_compression_state()

        ckpt = {
            "epoch": self.epoch,
            "best_fitness": self.best_fitness,
            "model": deepcopy(de_parallel(self.original_model)).half(),
            "model_state_dict": de_parallel(self.model).state_dict(),
            "model_compression_state": model_compression_state,
            "optimizer": self.optimizer.state_dict(),
            "train_args": vars(self.args),  # save as dict
            "train_metrics": metrics,
            "train_results": results,
            "date": datetime.now().isoformat(),
            "version": __version__,
        }

        # Save last and best
        torch.save(ckpt, self.last)
        if self.best_fitness == self.fitness:
            torch.save(ckpt, self.best)
        if (self.save_period > 0) and (self.epoch > 0) and (self.epoch % self.save_period == 0):
            torch.save(ckpt, self.wdir / f"epoch{self.epoch}.pt")
        del ckpt

    def export_model_to_openvino(self, model_path):
        pt_model = self.model
        if is_wrapped_model(pt_model):
            pt_model = nncf.strip(pt_model, do_copy=True)
        nncf_dataloader = self.get_nncf_dataloader()
        example_input = next(iter(nncf_dataloader))[0]
        try:
            ov_model = ov.convert_model(pt_model, example_input=example_input)
        except:
            onnx_model = "model.onnx"
            torch.onnx.export(pt_model, example_input, onnx_model)
            ov_model = ov.convert_model(onnx_model)

        ov.save_model(ov_model, model_path, compress_to_fp16=False)

    def final_eval(self):
        """Performs final evaluation and validation for object detection YOLO model."""
        for f in self.last, self.best:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
                if f is self.best:
                    LOGGER.info(f"\nValidating {f}...")
                    self.model = f
                    self.setup_model()
                    self.validator.args.plots = self.args.plots
                    self.metrics = self.validator(model=self.model)
                    self.metrics.pop("fitness", None)
                    self.run_callbacks("on_fit_epoch_end")

    def save_model(self):
        if is_wrapped_model(self.model):
            self.save_qat_model()
        else:
            super().save_model()


def main():
    args = dict(model="yolov8n.pt", data="coco8.yaml", epochs=3, mode="train", verbose=False)
    nncf_config_dict = {
        "input_info": {"sample_size": [1, 3, 640, 640]},
        "log_dir": "yolov8_output",  # The log directory for NNCF-specific logging outputs.
        "compression": {
            "algorithm": "quantization",
            "ignored_scopes": ["{re}/Detect"],  # ignored the post-processing
            "initializer": {"range": {"num_init_samples": 300}},
        },
    }
    nncf_trainer = MyTrainer(nncf_config_dict, overrides=args)
    nncf_trainer.train()
    nncf_trainer.export_model_to_openvino(model_path='/home/ze-flyer/openvino_yolov8.xml')


if __name__ == "__main__":
    main()

Error trace:

/home/ze-flyer/anaconda3/envs/ultralytics/bin/python /home/ze-flyer/ZE/PycharmProjects/ultralytics/nncf_train.py 
INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino
Ultralytics YOLOv8.2.64 🚀 Python-3.10.14 torch-2.1.2+cu121 CUDA:0 (NVIDIA GeForce RTX 4090, 24208MiB)
engine/trainer: task=detect, mode=train, model=yolov8n.pt, data=coco8.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train12, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_width=None, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, bgr=0.0, mosaic=1.0, mixup=0.0, copy_paste=0.0, auto_augment=randaugment, erasing=0.4, crop_fraction=1.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/train12

                 from  n    params  module                                       arguments                     
0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
8                  -1  1    460288  ultralytics.nn.modules.block.C2f             [256, 256, 1, True]           
9                  -1  1    164608  ultralytics.nn.modules.block.SPPF            [256, 256, 5]                 
10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
12                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384, 128, 1]                 
13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
15                  -1  1     37248  ultralytics.nn.modules.block.C2f             [192, 64, 1]                  
16                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
18                  -1  1    123648  ultralytics.nn.modules.block.C2f             [192, 128, 1]                 
19                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
21                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
22        [15, 18, 21]  1    897664  ultralytics.nn.modules.head.Detect           [80, [64, 128, 256]]          
Model summary: 225 layers, 3,157,200 parameters, 3,157,184 gradients, 8.9 GFLOPs

Transferred 355/355 items from pretrained weights
train: Scanning /home/ze-flyer/ZE/PycharmProjects/yolov5/datasets/coco8/labels/train.cache... 4 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4/4 [00:00<?, ?it/s]
INFO:nncf:Not adding activation input quantizer for operation: 168 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[0]/Conv[0]/NNCFConv2d[conv]/conv2d_0
169 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[0]/Conv[0]/NNCFBatchNorm2d[bn]/batch_norm_0
170 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[0]/Conv[0]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 171 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[0]/Conv[1]/NNCFConv2d[conv]/conv2d_0
172 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[0]/Conv[1]/NNCFBatchNorm2d[bn]/batch_norm_0
173 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[0]/Conv[1]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 174 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[0]/NNCFConv2d[2]/conv2d_0
INFO:nncf:Not adding activation input quantizer for operation: 175 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[0]/Conv[0]/NNCFConv2d[conv]/conv2d_0
176 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[0]/Conv[0]/NNCFBatchNorm2d[bn]/batch_norm_0
177 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[0]/Conv[0]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 178 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[0]/Conv[1]/NNCFConv2d[conv]/conv2d_0
179 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[0]/Conv[1]/NNCFBatchNorm2d[bn]/batch_norm_0
180 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[0]/Conv[1]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 181 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[0]/NNCFConv2d[2]/conv2d_0
INFO:nncf:Not adding activation input quantizer for operation: 182 DetectionModel/Sequential[model]/Detect[22]/cat_0
INFO:nncf:Not adding activation input quantizer for operation: 213 DetectionModel/Sequential[model]/Detect[22]/view_0
INFO:nncf:Not adding activation input quantizer for operation: 183 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[1]/Conv[0]/NNCFConv2d[conv]/conv2d_0
184 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[1]/Conv[0]/NNCFBatchNorm2d[bn]/batch_norm_0
185 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[1]/Conv[0]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 186 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[1]/Conv[1]/NNCFConv2d[conv]/conv2d_0
187 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[1]/Conv[1]/NNCFBatchNorm2d[bn]/batch_norm_0
188 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[1]/Conv[1]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 189 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[1]/NNCFConv2d[2]/conv2d_0
INFO:nncf:Not adding activation input quantizer for operation: 190 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[1]/Conv[0]/NNCFConv2d[conv]/conv2d_0
191 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[1]/Conv[0]/NNCFBatchNorm2d[bn]/batch_norm_0
192 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[1]/Conv[0]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 193 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[1]/Conv[1]/NNCFConv2d[conv]/conv2d_0
194 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[1]/Conv[1]/NNCFBatchNorm2d[bn]/batch_norm_0
195 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[1]/Conv[1]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 196 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[1]/NNCFConv2d[2]/conv2d_0
INFO:nncf:Not adding activation input quantizer for operation: 197 DetectionModel/Sequential[model]/Detect[22]/cat_1
INFO:nncf:Not adding activation input quantizer for operation: 214 DetectionModel/Sequential[model]/Detect[22]/view_1
INFO:nncf:Not adding activation input quantizer for operation: 198 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[2]/Conv[0]/NNCFConv2d[conv]/conv2d_0
199 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[2]/Conv[0]/NNCFBatchNorm2d[bn]/batch_norm_0
200 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[2]/Conv[0]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 201 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[2]/Conv[1]/NNCFConv2d[conv]/conv2d_0
202 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[2]/Conv[1]/NNCFBatchNorm2d[bn]/batch_norm_0
203 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[2]/Conv[1]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 204 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv2]/Sequential[2]/NNCFConv2d[2]/conv2d_0
INFO:nncf:Not adding activation input quantizer for operation: 205 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[2]/Conv[0]/NNCFConv2d[conv]/conv2d_0
206 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[2]/Conv[0]/NNCFBatchNorm2d[bn]/batch_norm_0
207 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[2]/Conv[0]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 208 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[2]/Conv[1]/NNCFConv2d[conv]/conv2d_0
209 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[2]/Conv[1]/NNCFBatchNorm2d[bn]/batch_norm_0
210 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[2]/Conv[1]/SiLU[act]/silu_0

INFO:nncf:Not adding activation input quantizer for operation: 211 DetectionModel/Sequential[model]/Detect[22]/ModuleList[cv3]/Sequential[2]/NNCFConv2d[2]/conv2d_0
INFO:nncf:Not adding activation input quantizer for operation: 212 DetectionModel/Sequential[model]/Detect[22]/cat_2
INFO:nncf:Not adding activation input quantizer for operation: 215 DetectionModel/Sequential[model]/Detect[22]/view_2
INFO:nncf:Not adding activation input quantizer for operation: 216 DetectionModel/Sequential[model]/Detect[22]/cat_3
INFO:nncf:Not adding activation input quantizer for operation: 217 DetectionModel/Sequential[model]/Detect[22]/split_0
INFO:nncf:Not adding activation input quantizer for operation: 218 DetectionModel/Sequential[model]/Detect[22]/DFL[dfl]/view_0
INFO:nncf:Not adding activation input quantizer for operation: 219 DetectionModel/Sequential[model]/Detect[22]/DFL[dfl]/transpose_0
INFO:nncf:Not adding activation input quantizer for operation: 220 DetectionModel/Sequential[model]/Detect[22]/DFL[dfl]/softmax_0
INFO:nncf:Not adding activation input quantizer for operation: 221 DetectionModel/Sequential[model]/Detect[22]/DFL[dfl]/NNCFConv2d[conv]/conv2d_0
INFO:nncf:Not adding activation input quantizer for operation: 222 DetectionModel/Sequential[model]/Detect[22]/DFL[dfl]/view_1
INFO:nncf:Not adding activation input quantizer for operation: 223 DetectionModel/Sequential[model]/Detect[22]/chunk_0
INFO:nncf:Not adding activation input quantizer for operation: 224 DetectionModel/Sequential[model]/Detect[22]/__rsub___0
INFO:nncf:Not adding activation input quantizer for operation: 225 DetectionModel/Sequential[model]/Detect[22]/__radd___0
INFO:nncf:Not adding activation input quantizer for operation: 231 DetectionModel/Sequential[model]/Detect[22]/sigmoid_0
INFO:nncf:Not adding activation input quantizer for operation: 226 DetectionModel/Sequential[model]/Detect[22]/__add___0
227 DetectionModel/Sequential[model]/Detect[22]/__truediv___0

INFO:nncf:Not adding activation input quantizer for operation: 228 DetectionModel/Sequential[model]/Detect[22]/__sub___0
INFO:nncf:Not adding activation input quantizer for operation: 229 DetectionModel/Sequential[model]/Detect[22]/cat_4
INFO:nncf:Not adding activation input quantizer for operation: 230 DetectionModel/Sequential[model]/Detect[22]/__mul___0
INFO:nncf:Not adding activation input quantizer for operation: 232 DetectionModel/Sequential[model]/Detect[22]/cat_5
INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[2]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[2]/ModuleList[m]/Bottleneck[0]/__add___0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[4]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[4]/ModuleList[m]/Bottleneck[0]/__add___0|OUTPUT
DetectionModel/Sequential[model]/C2f[4]/ModuleList[m]/Bottleneck[1]/__add___0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[4]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/Upsample[13]/interpolate_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[6]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[6]/ModuleList[m]/Bottleneck[0]/__add___0|OUTPUT
DetectionModel/Sequential[model]/C2f[6]/ModuleList[m]/Bottleneck[1]/__add___0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[6]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/Upsample[10]/interpolate_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[8]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[8]/ModuleList[m]/Bottleneck[0]/__add___0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/Conv[19]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/SPPF[9]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[12]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[12]/ModuleList[m]/Bottleneck[0]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[12]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/Conv[16]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[15]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[15]/ModuleList[m]/Bottleneck[0]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[18]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[18]/ModuleList[m]/Bottleneck[0]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[21]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[21]/ModuleList[m]/Bottleneck[0]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Compiling and loading torch extension: quantized_functions_cuda...
INFO:nncf:Finished loading torch extension: quantized_functions_cuda
Freezing layer 'model.22.dfl.conv.weight'
AMP: running Automatic Mixed Precision (AMP) checks with YOLOv8n...
AMP: checks passed ✅
train: Scanning /home/ze-flyer/ZE/PycharmProjects/yolov5/datasets/coco8/labels/train.cache... 4 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4/4 [00:00<?, ?it/s]
val: Scanning /home/ze-flyer/ZE/PycharmProjects/yolov5/datasets/coco8/labels/val.cache... 4 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4/4 [00:00<?, ?it/s]
Plotting labels to runs/detect/train12/labels.jpg... 
optimizer: 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
optimizer: AdamW(lr=0.000119, momentum=0.9) with parameter groups 57 weight(decay=0.0), 316 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to runs/detect/train12
Starting training for 3 epochs...

    Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      1/3     0.963G     0.8429      3.505      1.324         21        640: 100%|██████████| 1/1 [00:00<00:00,  1.61it/s]
               Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.72it/s]
                 all          4         17      0.814      0.571      0.752       0.51

    Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      2/3     0.998G      1.162      2.869       1.47         36        640: 100%|██████████| 1/1 [00:00<00:00,  8.54it/s]
               Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  6.19it/s]
                 all          4         17      0.814      0.571      0.752       0.51

    Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
      3/3     0.998G      1.026      2.161      1.257         20        640: 100%|██████████| 1/1 [00:00<00:00,  9.49it/s]
               Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  7.07it/s]
                 all          4         17      0.814      0.571      0.752       0.51

3 epochs completed in 0.001 hours.
Optimizer stripped from runs/detect/train12/weights/last.pt, 19.5MB
Optimizer stripped from runs/detect/train12/weights/best.pt, 19.5MB

Validating runs/detect/train12/weights/best.pt...

                 from  n    params  module                                       arguments                     
0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
7                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
8                  -1  1    460288  ultralytics.nn.modules.block.C2f             [256, 256, 1, True]           
9                  -1  1    164608  ultralytics.nn.modules.block.SPPF            [256, 256, 5]                 
10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
12                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384, 128, 1]                 
13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
15                  -1  1     37248  ultralytics.nn.modules.block.C2f             [192, 64, 1]                  
16                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
18                  -1  1    123648  ultralytics.nn.modules.block.C2f             [192, 128, 1]                 
19                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
21                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
22        [15, 18, 21]  1    897664  ultralytics.nn.modules.head.Detect           [80, [64, 128, 256]]          
Model summary: 225 layers, 3,157,200 parameters, 3,157,184 gradients, 8.9 GFLOPs

Transferred 355/355 items from pretrained weights
INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[2]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[2]/ModuleList[m]/Bottleneck[0]/__add___0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[4]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[4]/ModuleList[m]/Bottleneck[0]/__add___0|OUTPUT
DetectionModel/Sequential[model]/C2f[4]/ModuleList[m]/Bottleneck[1]/__add___0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[4]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/Upsample[13]/interpolate_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[6]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[6]/ModuleList[m]/Bottleneck[0]/__add___0|OUTPUT
DetectionModel/Sequential[model]/C2f[6]/ModuleList[m]/Bottleneck[1]/__add___0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[6]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/Upsample[10]/interpolate_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[8]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[8]/ModuleList[m]/Bottleneck[0]/__add___0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/Conv[19]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/SPPF[9]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[12]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[12]/ModuleList[m]/Bottleneck[0]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[12]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/Conv[16]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[15]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[15]/ModuleList[m]/Bottleneck[0]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[18]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[18]/ModuleList[m]/Bottleneck[0]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Scales will be unified for quantizer group:
DetectionModel/Sequential[model]/C2f[21]/Conv[cv1]/SiLU[act]/silu_0|OUTPUT
DetectionModel/Sequential[model]/C2f[21]/ModuleList[m]/Bottleneck[0]/Conv[cv2]/SiLU[act]/silu_0|OUTPUT

INFO:nncf:Compiling and loading torch extension: quantized_functions_cpu...
INFO:nncf:Finished loading torch extension: quantized_functions_cpu
Ultralytics YOLOv8.2.64 🚀 Python-3.10.14 torch-2.1.2+cu121 CUDA:0 (NVIDIA GeForce RTX 4090, 24208MiB)
Model summary (fused): 223 layers, 3,152,021 parameters, 31,959 gradients, 8.6 GFLOPs
               Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00, 19.31it/s]
                 all          4         17       0.86      0.567      0.872        0.6
Speed: 0.3ms preprocess, 8.7ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to runs/detect/train12
Traceback (most recent call last):
File "/home/ze-flyer/ZE/PycharmProjects/ultralytics/nncf_train.py", line 219, in <module>
  main()
File "/home/ze-flyer/ZE/PycharmProjects/ultralytics/nncf_train.py", line 215, in main
  nncf_trainer.export_model_to_openvino(model_path='/home/ze-flyer/openvino_yolov8.xml')
File "/home/ze-flyer/ZE/PycharmProjects/ultralytics/nncf_train.py", line 169, in export_model_to_openvino
  pt_model = nncf.strip(pt_model, do_copy=True)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/common/strip.py", line 37, in strip
  return strip_pt(model, do_copy)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/torch/strip.py", line 25, in strip
  return model.nncf.strip(do_copy)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/torch/nncf_network.py", line 786, in strip
  return self.compression_controller.strip(do_copy)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/api/compression.py", line 266, in strip
  return self.strip_model(self.model, do_copy)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/torch/quantization/algo.py", line 1477, in strip_model
  model = strip_quantized_model(model)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/torch/quantization/strip.py", line 170, in strip_quantized_model
  model = replace_quantizer_to_torch_native_module(model)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/torch/quantization/strip.py", line 44, in replace_quantizer_to_torch_native_module
  nncf_module = model.nncf.get_containing_module(node.node_name)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/torch/nncf_network.py", line 673, in get_containing_module
  return self.get_module_by_scope(scope)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/torch/nncf_network.py", line 658, in get_module_by_scope
  return get_module_by_scope(curr_module, scope)
File "/home/ze-flyer/anaconda3/envs/ultralytics/lib/python3.10/site-packages/nncf/torch/dynamic_graph/scope_access.py", line 29, in get_module_by_scope
  raise RuntimeError(
RuntimeError: Could not find a bn module member in NNCFBatchNorm2d module of scope DetectionModel/Sequential[model]/Conv[0]/NNCFBatchNorm2d[bn] during node search

@alexsu52
Copy link
Contributor

alexsu52 commented Aug 6, 2024

Hello @quinnZE,

After analyzing your code, I found that you are trying to export the model after the final validation. I would note that self.metrics = self.validator(model=self.model) changes the model in place, so the model cannot be processed correctly after this. I would suggest reloading the model after the final validation like this:

    def final_eval(self):
        """Performs final evaluation and validation for object detection YOLO model."""
        for f in self.last, self.best:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
                if f is self.best:
                    LOGGER.info(f"\nValidating {f}...")
                    self.model = f
                    self.setup_model()
                    self.validator.args.plots = self.args.plots
                    self.metrics = self.validator(model=self.model)
                    self.metrics.pop("fitness", None)
                    self.run_callbacks("on_fit_epoch_end")
                    # reload model
                    self.model = f
                    self.setup_model()

@quinnZE
Copy link
Author

quinnZE commented Aug 7, 2024

Ah I completely overlooked that, thank you! Everything works as expected now. Final code snippet of working export setup

from copy import deepcopy
from datetime import datetime
from typing import Any, Dict, Tuple

import torch
from ultralytics.models.yolo.detect import DetectionTrainer
from ultralytics.utils import DEFAULT_CFG
from ultralytics.utils import LOGGER
from ultralytics.utils import RANK
from ultralytics.utils import __version__
from ultralytics.utils.torch_utils import de_parallel
from ultralytics.utils.torch_utils import strip_optimizer

import nncf
from nncf import NNCFConfig
from nncf.torch import create_compressed_model
from nncf.torch import register_default_init_args
from nncf.torch.dynamic_graph.io_handling import nncf_model_input
from nncf.torch.initialization import PTInitializingDataLoader
from nncf.torch.model_creation import is_wrapped_model

import openvino as ov

# 1 integration issue:
# MyInitializingDataLoader must support deep copy because DetectionTrainer does a deep copy
# of the model and MyInitializingDataLoader during training setup. The input data_loader
# of ultralytics.data.build.InfiniteDataLoader type does not support deep copy and
# can not be used directly into MyInitializingDataLoader. The workaround for this limitation is
# to create a deepcopable dataset from the data_loader.
class MyInitializingDataLoader(PTInitializingDataLoader):
    def __init__(self, data_loader, preprocess_batch_fn, num_samples=300):
        super().__init__(data_loader)
        self._batch_size = self._data_loader.batch_size
        # Using list of images instead of 'ultralytics.data.build.InfiniteDataLoader' to support deepcopy.
        self._data_loader = []
        num_samples = num_samples / self._batch_size
        for count, data_item in enumerate(data_loader):
            if count > num_samples:
                break
            batch = preprocess_batch_fn(data_item)
            self._data_loader.append((batch["img"], None))

    @property
    def batch_size(self):
        return self._batch_size

    def get_inputs(self, dataloader_output: Any) -> Tuple[Tuple, Dict]:
        # your implementation - `dataloader_output` is what is returned by your dataloader,
        # and you have to turn it into a (args, kwargs) tuple that is required by your model
        # in this function, for instance, if your dataloader returns dictionaries where
        # the input image is under key `"img"`, and your YOLOv8 model accepts the input
        # images as 0-th `forward` positional arg, you would do:
        return (dataloader_output[0],), {}

    def get_target(self, dataloader_output: Any) -> Any:
        # and in this function you should extract the "ground truth" value from your
        # dataloader, so, for instance, if your dataloader output is a dictionary where
        # ground truth images are under a "gt" key, then here you would write:
        return dataloader_output[1]


class MyTrainer(DetectionTrainer):
    def __init__(self, nncf_config_dict, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
        super().__init__(cfg, overrides, _callbacks)
        self.nncf_config = NNCFConfig.from_dict(nncf_config_dict)
        self.nncf_dataloader = None

    def setup_model(self):
        ckpt = super().setup_model()

        if not is_wrapped_model(self.model):
            # Make copy of model to support `DetectionTrainer` save/load logic
            self.original_model = deepcopy(self.model)
            if ckpt.get("model_compression_state"):
                self.resume_model_for_qat(ckpt)
            else:
                self.prepare_model_for_qat()
        return ckpt

    def _setup_train(self, world_size):
        super()._setup_train(world_size)
        # Disable EMA for QAT. Using EMA may reduce the accuracy of the model during training.
        if self.ema:
            self.ema.enabled = False

    def get_nncf_dataloader(self):
        if self.nncf_dataloader is None:
            num_samples = self.nncf_config["compression"]["initializer"]["range"]["num_init_samples"]
            train_loader = self.get_dataloader(self.trainset, batch_size=1, rank=RANK, mode="train")
            self.nncf_dataloader = MyInitializingDataLoader(train_loader, self.preprocess_batch, num_samples)
        return self.nncf_dataloader

    def create_wrap_inputs_fn(self):
        # 2 integration issue:
        # NNCF requires the same structure of inputs in the forward function during model training
        # for correct model tracing, but the DetectionModel forward function support image tensor
        # or dict as input:
        # def forward(self, x, *args, **kwargs):
        #     if isinstance(x, dict):  # for cases of training and validating while training.
        #         return self.loss(x, *args, **kwargs)
        #     return self.predict(x, *args, **kwargs)
        # In this case, wrap_inputs_fn should be implemented to specify the "original" model input
        def wrap_inputs_fn(args, kwargs):
            if isinstance(args[0], dict):
                return args, kwargs
            args = (nncf_model_input(args[0]),) + args[1:]
            return args, kwargs

        return wrap_inputs_fn

    def prepare_model_for_qat(self):
        nncf_dataloader = self.get_nncf_dataloader()
        self.nncf_config = register_default_init_args(self.nncf_config, nncf_dataloader)

        self.model = self.model.to(self.device)
        _, self.model = create_compressed_model(
            self.model, self.nncf_config, wrap_inputs_fn=self.create_wrap_inputs_fn()
        )

    def resume_model_for_qat(self, ckpt):
        # 3 integration issue:
        # resume QAT model from the model_compression_state
        _, self.model = create_compressed_model(
            self.model,
            self.nncf_config,
            compression_state=ckpt["model_compression_state"],
            wrap_inputs_fn=self.create_wrap_inputs_fn(),
        )
        self.model.load_state_dict(ckpt["model_state_dict"])

    def save_qat_model(self):
        # 4 integration issue:
        # NNCF QAT model is not picklable. Use state dict instead of model pickling.
        import pandas as pd  # scope for faster startup

        metrics = {**self.metrics, **{"fitness": self.fitness}}
        results = {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}

        compression_controller = self.model.nncf.compression_controller
        model_compression_state = {}
        if compression_controller is not None:
            model_compression_state = compression_controller.get_compression_state()

        ckpt = {
            "epoch": self.epoch,
            "best_fitness": self.best_fitness,
            "model": deepcopy(de_parallel(self.original_model)).half(),
            "model_state_dict": de_parallel(self.model).state_dict(),
            "model_compression_state": model_compression_state,
            "optimizer": self.optimizer.state_dict(),
            "train_args": vars(self.args),  # save as dict
            "train_metrics": metrics,
            "train_results": results,
            "date": datetime.now().isoformat(),
            "version": __version__,
        }

        # Save last and best
        torch.save(ckpt, self.last)
        if self.best_fitness == self.fitness:
            torch.save(ckpt, self.best)
        if (self.save_period > 0) and (self.epoch > 0) and (self.epoch % self.save_period == 0):
            torch.save(ckpt, self.wdir / f"epoch{self.epoch}.pt")
        del ckpt

    def export_model_to_openvino(self, model_path):
        pt_model = self.model
        if is_wrapped_model(pt_model):
            pt_model = nncf.strip(pt_model, do_copy=True)
        nncf_dataloader = self.get_nncf_dataloader()
        example_input = next(iter(nncf_dataloader))[0]
        try:
            ov_model = ov.convert_model(pt_model, example_input=example_input)
        except:
            onnx_model = "model.onnx"
            torch.onnx.export(pt_model, example_input, onnx_model)
            ov_model = ov.convert_model(onnx_model)

        ov.save_model(ov_model, model_path, compress_to_fp16=False)

    def final_eval(self):
        """Performs final evaluation and validation for object detection YOLO model."""
        self.export_model_to_openvino(self.best.parent / 'ov_best.xml')
        for f in self.last, self.best:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
                if f is self.best:
                    LOGGER.info(f"\nValidating {f}...")
                    self.model = f
                    self.setup_model()
                    self.validator.args.plots = self.args.plots
                    self.metrics = self.validator(model=self.model)
                    self.metrics.pop("fitness", None)
                    self.run_callbacks("on_fit_epoch_end")

    def save_model(self):
        if is_wrapped_model(self.model):
            self.save_qat_model()
        else:
            super().save_model()


def main():
    args = dict(model="yolov8n.pt", data="coco8.yaml", epochs=3, mode="train", verbose=False)
    nncf_config_dict = {
        "input_info": {"sample_size": [1, 3, 640, 640]},
        "log_dir": "yolov8_output",  # The log directory for NNCF-specific logging outputs.
        "compression": {
            "algorithm": "quantization",
            "ignored_scopes": ["{re}/Detect"],  # ignored the post-processing
            "initializer": {"range": {"num_init_samples": 300}},
        },
    }
    nncf_trainer = MyTrainer(nncf_config_dict, overrides=args)
    nncf_trainer.train()


if __name__ == "__main__":
    main()

@quinnZE quinnZE closed this as completed Aug 7, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants