In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

mean = torch.tensor([0.485, 0.456, 0.406]).reshape((3, 1, 1))
std = torch.tensor([0.229, 0.224, 0.225]).reshape((3, 1, 1))

class TransformerNet(torch.nn.Module):
    def __init__(self):
        super(TransformerNet, self).__init__()
        self.pool = nn.AvgPool2d(2)
        self.model = nn.Sequential(
            ConvBlock(3, 32, kernel_size=9, stride=1),
            ConvBlock(32, 64, kernel_size=3, stride=2),
            ConvBlock(64, 128, kernel_size=3, stride=2),
            ResidualBlock(128),
            ResidualBlock(128),
            ResidualBlock(128),
            ResidualBlock(128),
            ResidualBlock(128),
            ConvBlock(128, 64, kernel_size=3, upsample=True),
            ConvBlock(64, 32, kernel_size=3, upsample=True),
            ConvBlock(32, 3, kernel_size=9, stride=1, normalize=False, relu=False),
        )

    def forward(self, x):
        x = self.pool(x)
        x = x.transpose(2, 3)
        x = x.flip(3)
        # x = (x - mean) / std
        # y = self.model(x)
        # y = y * std + mean
        # y = torch.clamp(y * 255, 0, 255)
        return x


class ResidualBlock(torch.nn.Module):
    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.block = nn.Sequential(
            ConvBlock(channels, channels, kernel_size=3, stride=1, normalize=True, relu=True),
            ConvBlock(channels, channels, kernel_size=3, stride=1, normalize=True, relu=False),
        )

    def forward(self, x):
        return self.block(x) + x


class ConvBlock(torch.nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, upsample=False, normalize=True, relu=True):
        super(ConvBlock, self).__init__()
        self.upsample = upsample
        self.block = nn.Sequential(
            nn.ReflectionPad2d(kernel_size // 2), nn.Conv2d(in_channels, out_channels, kernel_size, stride)
        )
        self.norm = nn.InstanceNorm2d(out_channels, affine=True) if normalize else None
        self.relu = relu

    def forward(self, x):
        if self.upsample:
            x = F.interpolate(x, scale_factor=2)
        x = self.block(x)
        if self.norm is not None:
            x = self.norm(x)
        if self.relu:
            x = F.relu(x)
        return x

In [5]:
import coremltools as ct

def load_model(checkpoint = None):
    model = TransformerNet()
    if checkpoint is not None:
        model.load_state_dict(torch.load(checkpoint, map_location=torch.device('cpu')))
    return model

def trace(model):
    with torch.no_grad(), torch.inference_mode(), torch.jit.optimized_execution(True):
        model.eval()
        return torch.jit.trace_module(
            model,
            { 'forward': (
                torch.ones((1, 3, 720, 1280), dtype=torch.float32),
            ) },
            strict=True,
            check_trace=True,
            check_tolerance=1e-9,
        )
    
def convert(model):
    return ct.convert(
        model,
        inputs=[
            ct.ImageType(name="source", shape=(1, 3, 720, 1280)),
        ],
        outputs=[
            ct.ImageType(name="styled")
        ],
        compute_units=ct.ComputeUnit.CPU_AND_NE,
        compute_precision=ct.precision.FLOAT16,
        minimum_deployment_target=ct.target.iOS17,
    )

def convert_pipeline(checkpoint, name):
    model = load_model(checkpoint)
    traced = trace(model)
    converted = convert(traced)
    converted.save(f"{name}.mlpackage")
    return converted

# cuphead = convert_pipeline("/Users/zeruichen/Downloads/cuphead_10000.pth", "CupheadStylizer.mlpackage")
# starry_night = convert_pipeline("/Users/zeruichen/Downloads/starry_night_10000.pth", "StarryNightStylizer.mlpackage")
# mosaic = convert_pipeline("/Users/zeruichen/Downloads/mosaic_10000.pth", "MosaicStylizer.mlpackage")
identity = convert_pipeline(None, "IdentityStylizer")

Converting PyTorch Frontend ==> MIL Ops:  93%|█████████▎| 14/15 [00:00<00:00, 7905.26 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 9438.13 passes/s]
Running MIL default pipeline: 100%|██████████| 88/88 [00:00<00:00, 5344.69 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 7518.92 passes/s]


In [20]:
for name, model in [
    ("CupheadStylizer", cuphead),
    ("StarryNightStylizer", starry_night),
    ("MosaicStylizer", mosaic),
]:
    img = Image.open("/Users/zeruichen/Downloads/mountain.png")
    styled = model.predict({"source": img})["styled"]
    styled.save(f"{name}.png")

In [8]:
import coremltools.optimize as cto

activation_config = cto.coreml.OptimizationConfig(
    global_config = cto.coreml.experimental.OpActivationLinearQuantizerConfig(
        mode="linear_symmetric"
    )
)
compressed_model_a8 = cto.coreml.experimental.linear_quantize_activations(
    ct_model, activation_config, sample_images[:10]
)

Running activation compression pass insert_prefix_quantize_dequantize_pair: 100%|██████████| 281/281 [00:00<00:00, 15706.07 ops/s]
Running compression pass linear_quantize_activations: start calibrating 10 samples
Running compression pass linear_quantize_activations: calibration may take a while ...
Running compression pass linear_quantize_activations: calibrating sample 1/10 succeeds.
Running compression pass linear_quantize_activations: calibrating sample 2/10 succeeds.
Running compression pass linear_quantize_activations: calibrating sample 3/10 succeeds.
Running compression pass linear_quantize_activations: calibrating sample 4/10 succeeds.
Running compression pass linear_quantize_activations: calibrating sample 5/10 succeeds.
Running compression pass linear_quantize_activations: calibrating sample 6/10 succeeds.
Running compression pass linear_quantize_activations: calibrating sample 7/10 succeeds.
Running compression pass linear_quantize_activations: calibrating sample 8/10 succe

In [9]:
config = ct.optimize.coreml.OptimizationConfig(
    global_config=ct.optimize.coreml.OpLinearQuantizerConfig(mode="linear", weight_threshold=2048)
)
compressed_model_a8w8 = ct.optimize.coreml.linear_quantize_weights(compressed_model_a8, config=config)

compressed_model_a8w8.save('../chao24/MosaicStylizerQ.mlpackage')

Running compression pass linear_quantize_weights: 100%|██████████| 61/61 [00:00<00:00, 1509.27 ops/s]
Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]
Running MIL default pipeline: 100%|██████████| 86/86 [00:00<00:00, 200.89 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 245.36 passes/s]


In [10]:
from coremltools.optimize.coreml import (
   OptimizationConfig,
   OpPalettizerConfig,
   OpLinearQuantizerConfig,
   palettize_weights,
   linear_quantize_weights,
 ) 
     
# mlmodel: an uncompressed mlpackage, loaded into memory 
                                                                          
# first palettize the model
# this will produce an LUT with Float values
op_config = OpPalettizerConfig(nbits=4)
config = OptimizationConfig(global_config=op_config)
mlmodel_palettized = palettize_weights(ct_model, config)

# now apply weight quantization on the model, 
# with "joint_compression" set to True. 
# this will result in quantizing the LUT to 8 bits. 
# (granularity must be set to "per-tensor" for this scenario) 
op_config = OpLinearQuantizerConfig(mode="linear_symmetric",  
                                    granularity="per_tensor")
linear_weight_quantize_config = OptimizationConfig(global_config=op_config)

mlmodel_palettized_with_8bit_lut = linear_quantize_weights(mlmodel_palettized, 
                                                           linear_weight_quantize_config, 
                                                           joint_compression=True)

Running compression pass palettize_weights: 100%|██████████| 61/61 [00:01<00:00, 57.89 ops/s]
Running MIL frontend_milinternal pipeline: 0 passes [00:00, ? passes/s]
Running MIL default pipeline: 100%|██████████| 86/86 [00:00<00:00, 341.13 passes/s]
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 404.49 passes/s]


ValueError: Joint compression is only supported since iOS18. Please set the minimum deployment target to iOS18 if you want to use it.

In [8]:
mlmodel_palettized_with_8bit_lut.save('../chao24/MosaicStylizer.mlpackage')

In [None]:
from coremltools.optimize.torch.palettization import PostTrainingPalettizer, PostTrainingPalettizerConfig

# load model
palettization_config_dict = {
  "global_config": {"n_bits": 4, "granularity": "per_grouped_channel", "group_size": 4},
}
palettization_config = PostTrainingPalettizerConfig.from_dict(palettization_config_dict)
palettizer = PostTrainingPalettizer(model, palettization_config)

palettized_torch_model = palettizer.compress()

In [None]:
with torch.no_grad(), torch.inference_mode(), torch.jit.optimized_execution(True):
    palettized_torch_model.eval()
    palettized_model_traced = torch.jit.trace_module(
        palettized_torch_model,
        { 'forward': (
            torch.ones((1, 3, 720, 1280), dtype=torch.float32),
        ) },
        strict=True,
        check_trace=True,
        check_tolerance=1e-9,
    )

In [None]:
ct_palettized_model = ct.convert(
    palettized_model_traced,
    inputs=[
        ct.ImageType(name="source", shape=(1, 3, 720, 1280), scale=1/255.0),
    ],
    outputs=[
        ct.ImageType(name="styled"),
    ],
    compute_units=ct.ComputeUnit.CPU_ONLY,
    compute_precision=ct.precision.FLOAT16,
    minimum_deployment_target=ct.target.iOS18,
)

In [7]:
ct_palettized_model.save('../chao24/MosaicStylizer.mlpackage')

In [2]:
import coremltools as ct
mm = ct.models.MLModel('/Users/zeruichen/Downloads/MobileNetV2Alpha1SymmetricPerChannel.mlpackage')