In [1]:
import torch
from nncf import NNCFConfig
from nncf.common.logging import nncf_logger
from nncf.torch import create_compressed_model, register_default_init_args
from nncf.torch.initialization import PTInitializingDataLoader
from nncf.torch.layer_utils import CompressionParameter

from PIL import Image
import open_clip

INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino


In [2]:
model, train_transform, eval_transform = open_clip.create_model_and_transforms("ViT-B-16-plus-240", pretrained="laion400m_e32")

In [3]:
tokenizer = open_clip.get_tokenizer('ViT-B-16-plus-240')

image = eval_transform(Image.open("../../docs/CLIP.png")).unsqueeze(0)
text = tokenizer(["a diagram", "a dog", "a cat"])

### Prepare data for optimization

In [4]:
import os
import random
from io import BytesIO
import requests
import numpy as np

def get_pil_from_url(url):
    response = requests.get(url)
    image = Image.open(BytesIO(response.content))
    return image.convert("RGB")

BACKUP_PAIR = (
    get_pil_from_url(
        "https://thumbs.dreamstime.com/t/altai-mountains-mountain-lake-russia-siberia-chuya-ridge-49130812.jpg"
    ),
    "Altai mountains Stock Photography",
)
AVAILABLE_EXAMPLES = []

def check_text_data(data):
    if isinstance(data, str):
        return True
    if isinstance(data, list):
        return all(isinstance(x, str) for x in data)
    return False    

def laion2B_preprocess_train(examples, train_transforms, tokenize_captions, image_column="url", text_column="caption"):
    url = examples[image_column]
    try:
        image = get_pil_from_url(url)
        if not check_text_data(examples[text_column]):
            raise ValueError("Text data is not valid")
        AVAILABLE_EXAMPLES.append((url, examples[text_column]))
    except Exception:
        print(f"Can't load image from url: {url}, using cache with size: {len(AVAILABLE_EXAMPLES)}")
        if len(AVAILABLE_EXAMPLES) > 0:
            backup_id = random.randint(0, len(AVAILABLE_EXAMPLES) - 1)
            backup_example = AVAILABLE_EXAMPLES[backup_id]
            try:
                image = get_pil_from_url(backup_example[0])
                examples[text_column] = backup_example[1]
            except Exception:
                print(f"Can't load image from cached url: {backup_example[0]}, using backup")
                image = BACKUP_PAIR[0].copy()
                examples[text_column] = BACKUP_PAIR[1]
        else:
            print(f"Can't load image from url: {url}, using backup")
            image = BACKUP_PAIR[0].copy()
            examples[text_column] = BACKUP_PAIR[1]

    examples["pixel_values"] = train_transforms(image)
    examples["input_ids"] = tokenize_captions(examples)
    return examples

def tokenize_captions(examples, is_train=True):
    caption_column = "caption"
    captions = []
    caption = examples[caption_column]
    if isinstance(caption, str):
        captions.append(caption)
    elif isinstance(caption, (list, np.ndarray)):
        # take a random caption if there are multiple
        captions.append(random.choice(caption) if is_train else caption[0])
    else:
        raise ValueError(f"Caption column `{caption_column}` should contain either strings or lists of strings.")
    #inputs = tokenizer(captions[0], max_length=tokenizer.model_max_length, padding="do_not_pad", truncation=True)
    #input_ids = inputs.input_ids
    input_ids = tokenizer(captions[0])[0]
    return input_ids

In [5]:
from datasets import load_dataset

max_train_samples = 10000
dataset = load_dataset("laion/laion400m", streaming=True)
train_dataset = dataset["train"].shuffle(seed=42, buffer_size=max_train_samples)

In [6]:
def collate_fn_image(examples):
    examples = [laion2B_preprocess_train(example, train_transform, tokenize_captions) for example in examples]
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    pixel_values = pixel_values.to(memory_format=torch.contiguous_format).float()
    
    input_ids = torch.stack([example["input_ids"] for example in examples])
    return {
        "pixel_values": pixel_values,
        "input_ids": input_ids
    }

In [7]:
import itertools
from tqdm.notebook import tqdm

def prepare_nncf_init_data(dataloader, init_steps):
    nncf_init_data = []

    print(f"Fetching {init_steps} for the initialization...")
    for _, batch in tqdm(zip(range(init_steps), itertools.islice(dataloader, 0, init_steps))):
        with torch.no_grad():
            # Convert images to latent space
            
            nncf_init_data.append(
                (
                    batch["pixel_values"].to("cpu"),
                    batch["input_ids"].to("cpu")
                )
            )
    return nncf_init_data

In [8]:
train_batch_size = 1
dataloader_num_workers = 4
train_dataloader = torch.utils.data.DataLoader(
        train_dataset, collate_fn=collate_fn_image, batch_size=train_batch_size, num_workers=dataloader_num_workers
    )

In [9]:
opt_init_steps = 300
init_data = prepare_nncf_init_data(train_dataloader, opt_init_steps)

Fetching 300 for the initialization...


0it [00:00, ?it/s]

Can't load image from url: https://s3.amazonaws.com/whataspace/space_pictures/pictures/000/033/180/fullwidth/_DSC0146.jpg?1591610094, using cache with size: 0
Can't load image from url: https://s3.amazonaws.com/whataspace/space_pictures/pictures/000/033/180/fullwidth/_DSC0146.jpg?1591610094, using backup
Can't load image from url: https://i0.wp.com/des.gearbest.com/uploads/pdm-desc-pic/Electronic/image/2016/11/25/1480056293210706.jpg?w=960, using cache with size: 2
Can't load image from url: http://cdn3.static-homes.com/cgi-bin/readimage/9eb0ae4fa92b5dfd09b03c9e3dc997c5_1_resizeto_193x143x1, using cache with size: 3
Can't load image from url: https://sslh.ulximg.com/image/740x493/cover/1533483761_ac8b9fbc89ca57a7269ec0c7f5947094.jpg/2143af354a3c53d14969369d2c6cbc04/1533483761_cc5f2e1e234c809fa4408488e0b19e4d.jpg, using cache with size: 7
Can't load image from url: https://img.shellporn.com/spcs/thumbs/155/299_hotel_wang_.jpg, using cache with size: 5
Can't load image from url: http://r



Can't load image from url: http://i0.wp.com/www.ewindandsolar.com/i/2015/06/hy-380-led-puck-lights-for-cozy-living-room-design-led-puck-light-kit-under-cabinet-led-puck-lights-12v-led-puck-lights-led-puck-lights-120v-battery-powered-led-puck-lights-battery-led.jpg?w=200&strip=all, using cache with size: 18
Can't load image from url: https://cdn1-www.momtastic.com/assets/uploads/2014/01/old-DIYs-e1389240447729.jpg, using cache with size: 22
Can't load image from url: https://www.belleviemedical.com/wp-content/uploads/2016/03/Belle-Vie-Home-Face.png, using cache with size: 18
Can't load image from url: http://s.en.fishki.net/upload/en/201211/23/2706/9840483e0be437697bcdb6501908d666.jpg, using cache with size: 19
Can't load image from url: https://d2d00szk9na1qq.cloudfront.net/Product/0d387c2a-4687-4900-8861-17a0df831812/Images/Medium_0393423.jpg, using cache with size: 23
Can't load image from url: https://d39eittn6ocpe0.cloudfront.net/media/catalog/product/cache/67df2192fc19fe888eca63b2

In [10]:
class InitDataset(torch.utils.data.Dataset):
    def __init__(self, data):
        super().__init__()
        self.init_data = data

    def __len__(self):
        return len(self.init_data)

    def __getitem__(self, index):
        return self.init_data[index]

Can't load image from url: http://ep.yimg.com/ay/yhst-83532116742892/cherry-blossom-birdies-pink-yellow-framed-art-print-8.jpg, using cache with size: 62


### Apply NNCF optimizations

In [11]:
image_encoder_config = {
    "input_info": [
        {  
            "sample_size": [1, 3, 240, 240]
        },
    ],
    "log_dir": "./",  # The log directory for NNCF-specific logging outputs.
    "compression": [
        {
            "algorithm": "quantization",  # Specify the algorithm here.
            "preset": "mixed",
            "initializer": {
                "range": {"num_init_samples": opt_init_steps},
                "batchnorm_adaptation": {"num_bn_adaptation_samples": opt_init_steps},
            },
            "scope_overrides": {"activations": {"{re}.*__matmul___0": {"mode": "symmetric"}, "{re}.*mean_0": {"mode": "symmetric"}}},
            "ignored_scopes": [
                "{re}.*__add___.*",
                "{re}.*layer_norm_0",
                "{re}.*__truediv__*",
                "{re}.*__mul___.*",
                "{re}.*__matmul___1",
            ],
            "overflow_fix": "disable",
            "export_to_onnx_standard_ops": True,
        },
    ],
}

init_dataloader = torch.utils.data.DataLoader(InitDataset(init_data), batch_size=1, num_workers=1)

class ImageEncoderInitDataLoader(PTInitializingDataLoader):
    def get_inputs(self, dataloader_output):
        image = dataloader_output[0].float().to("cpu", non_blocking=True)
        return (image), {}

    def get_target(self, dataloader_output):
        return dataloader_output[0]

image_encoder_config = NNCFConfig.from_dict(image_encoder_config)
image_encoder_config = register_default_init_args(image_encoder_config, ImageEncoderInitDataLoader(init_dataloader))

In [12]:
import tomeov

tomeov.patch_openclip(model, 4)

  deprecate(


In [13]:
image_controller, image_encoder = create_compressed_model(model.visual, image_encoder_config)

INFO:nncf:Not adding activation input quantizer for operation: 5 ToMeVisionTransformer/__add___0
INFO:nncf:Not adding activation input quantizer for operation: 6 ToMeVisionTransformer/LayerNorm[ln_pre]/layer_norm_0
INFO:nncf:Not adding activation input quantizer for operation: 7 ToMeVisionTransformer/Transformer[transformer]/ModuleList[resblocks]/ToMeResidualAttentionBlock[0]/LayerNorm[ln_1]/layer_norm_0
INFO:nncf:Not adding activation input quantizer for operation: 16 ToMeVisionTransformer/Transformer[transformer]/ModuleList[resblocks]/ToMeResidualAttentionBlock[0]/ToMeAttention[attn]/__mul___0
INFO:nncf:Not adding activation input quantizer for operation: 19 ToMeVisionTransformer/Transformer[transformer]/ModuleList[resblocks]/ToMeResidualAttentionBlock[0]/ToMeAttention[attn]/__matmul___1
INFO:nncf:Not adding activation input quantizer for operation: 25 ToMeVisionTransformer/Transformer[transformer]/ModuleList[resblocks]/ToMeResidualAttentionBlock[0]/__add___0
INFO:nncf:Not adding act

In [14]:
from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk
from nncf.config.structures import BNAdaptationInitArgs
from nncf.config.structures import QuantizationRangeInitArgs

text_encoder_config_dict = {
    "input_info": [
        {  
            "sample_size": [640, 1, 77],
        },
        {
            "sample_size": [77, 77],
        }
    ],
    "log_dir": "./",  # The log directory for NNCF-specific logging outputs.
    "compression": [
        {
            "algorithm": "quantization",  # Specify the algorithm here.
            "preset": "mixed",
            "initializer": {
                "range": {"num_init_samples": opt_init_steps},
                "batchnorm_adaptation": {"num_bn_adaptation_samples": opt_init_steps},
            },
            "scope_overrides": {"activations": {"{re}.*baddbmm_0": {"mode": "symmetric"}}},
            "ignored_scopes": [
                "{re}.*__add___.*",
                "{re}.*layer_norm_.*",
                "{re}.*__truediv__*",
                "{re}.*/bmm_0",
            ],
            "overflow_fix": "disable",
            "export_to_onnx_standard_ops": True,
        },
    ],
}

class TextEncoderInitDataLoader(PTInitializingDataLoader):
    """
    This class wraps the nncf.Dataset.

    This is required for proper initialization of certain compression algorithms.
    """

    def __init__(self, data_loader):
        super().__init__(data_loader)
        self._length = None

    @property
    def batch_size(self):
        return 1

    def __iter__(self):
        return iter(self._data_loader)

    def __len__(self):
        if self._length is None:
            data = self._data_loader
            self._length = TextEncoderInitDataLoader._get_length(data)
        return self._length

    def get_inputs(self, dataloader_output):
        with torch.no_grad():
            text_embeddings = dataloader_output[1].to("cpu", non_blocking=True)
            #text_embeddings = torch.squeeze(text_embeddings, 0)
            print(f"text_embeddings.shape: {text_embeddings.shape}")
            x = model.token_embedding(text_embeddings)
            x = x + model.positional_embedding
            print(f"x.hape: {x.shape}")
            x = x.permute(1, 0, 2)  # NLD -> LND
            print(f"x.hape: {x.shape}")
        return (x, model.attn_mask), {}#{"x": x, "attn_mask": model.attn_mask}

    @staticmethod
    def _get_length(iterable) -> int:
        length = 0
        for _ in iterable:
            length = length + 1

        return length

# class TextEncoderInitDataLoader(PTInitializingDataLoader):
#     def get_inputs(self, dataloader_output):
#         with torch.no_grad():
#             text_embeddings = dataloader_output[1].to("cpu", non_blocking=True)
#             text_embeddings = torch.squeeze(text_embeddings, 0)
#             print(f"text_embeddings.shape: {text_embeddings.shape}")
#             x = model.token_embedding(text_embeddings)
#             x = x + model.positional_embedding
#             print(f"x.hape: {x.shape}")
#             x = x.permute(1, 0, 2)  # NLD -> LND
#             print(f"x.hape: {x.shape}")
#         return (x, model.attn_mask), {}#{"x": x, "attn_mask": model.attn_mask}

text_encoder_config = NNCFConfig.from_dict(text_encoder_config_dict)
text_encoder_dataloader = TextEncoderInitDataLoader(init_data)
# text_encoder_config = register_default_init_args(text_encoder_config, text_encoder_dataloader)
text_encoder_config.register_extra_structs(
        [
            QuantizationRangeInitArgs(data_loader=text_encoder_dataloader),
            BNAdaptationInitArgs(data_loader=text_encoder_dataloader),
        ]
    )

In [15]:
# for i, resblock in enumerate(model.transformer.resblocks):
#     attn = tomeov.openclip.ToMeAttention(resblock.attn.embed_dim, resblock.attn.num_heads, qkv_bias=True)
#     _, device = tomeov.openclip.convert_attention_block(resblock.attn, attn)

In [16]:
from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk
from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_outputs_with_objwalk
from nncf.torch.dynamic_graph.context import no_nncf_trace
from nncf.torch.nested_objects_traversal import objwalk
from nncf.torch.utils import is_tensor
from nncf.torch.dynamic_graph.io_handling import replicate_same_tensors
from nncf.torch.utils import get_model_device

def wrap_inputs(args, kwargs):
        return wrap_nncf_model_inputs_with_objwalk(args, kwargs)

def wrap_outputs(retval):
    return wrap_nncf_model_outputs_with_objwalk(retval)

def create_dummy_forward_fn(data_loader, device):
    def dummy_forward(model):
        with no_nncf_trace():
            data_item = next(iter(data_loader))
            args, kwargs = data_loader.get_inputs(data_item)

            def send_to_device(tensor):
                return tensor.to(device)

            args = objwalk(args, is_tensor, send_to_device)
            kwargs = objwalk(kwargs, is_tensor, send_to_device)

        args, kwargs = wrap_inputs(args, kwargs)
        retval = model(*args, **kwargs)
        retval = replicate_same_tensors(retval)
        return wrap_outputs(retval)

    return dummy_forward

dummy_forward_fn = create_dummy_forward_fn(text_encoder_dataloader, get_model_device(model))

text_controller, text_encoder = create_compressed_model(
    model.transformer, 
    text_encoder_config, 
    dummy_forward_fn=dummy_forward_fn,
    wrap_inputs_fn=wrap_inputs,
    wrap_outputs_fn=wrap_outputs,)


text_embeddings.shape: torch.Size([1, 77])
x.hape: torch.Size([1, 77, 640])
x.hape: torch.Size([77, 1, 640])
text_embeddings.shape: torch.Size([1, 77])
x.hape: torch.Size([1, 77, 640])
x.hape: torch.Size([77, 1, 640])
text_embeddings.shape: torch.Size([1, 77])
x.hape: torch.Size([1, 77, 640])
x.hape: torch.Size([77, 1, 640])
INFO:nncf:Not adding activation input quantizer for operation: 2 Transformer/ModuleList[resblocks]/ResidualAttentionBlock[0]/LayerNorm[ln_1]/layer_norm_0
INFO:nncf:Not adding activation input quantizer for operation: 16 Transformer/ModuleList[resblocks]/ResidualAttentionBlock[0]/MultiheadAttention[attn]/__truediv___0
INFO:nncf:Not adding activation input quantizer for operation: 26 Transformer/ModuleList[resblocks]/ResidualAttentionBlock[0]/__add___0
INFO:nncf:Not adding activation input quantizer for operation: 27 Transformer/ModuleList[resblocks]/ResidualAttentionBlock[0]/LayerNorm[ln_2]/layer_norm_0
INFO:nncf:Not adding activation input quantizer for operation: 

KeyError: 1