In [None]:
# Requirements: CoreML 7.0+, NumPy < 2.0, Python = 3.10??

In [14]:
import torch
import timm
import coremltools as ct
import json
import numpy as np
import torchvision

In [3]:
print(np.__version__)

1.26.4


In [4]:
# Load model
torch_model = timm.create_model('fastvit_s12.apple_dist_in1k', pretrained=False, num_classes=19)

# Load state dictionary
state_dict = torch.load("cjm_image_classifier/2024-06-28_01-04-01/fastvit_s12.apple_dist_in1k.pth")
torch_model.load_state_dict(state_dict)
torch_model.eval()

FastVit(
  (stem): Sequential(
    (0): MobileOneBlock(
      (se): Identity()
      (conv_kxk): ModuleList(
        (0): ConvNormAct(
          (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn): BatchNormAct2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
        )
      )
      (conv_scale): ConvNormAct(
        (conv): Conv2d(3, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (bn): BatchNormAct2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
      )
      (act): GELU(approximate='none')
    )
    (1): MobileOneBlock(
      (se): Identity()
      (conv_kxk): ModuleList(
        (0): ConvNormAct(
          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
          (bn): Batch

In [5]:
# Create a dummy input with the same size as the model's input
dummy_input = torch.randn(1, 3, 256, 256)

# Trace the model with random data.
traced_model = torch.jit.trace(torch_model, dummy_input)
out = traced_model(dummy_input)

In [6]:
# Download class labels in ImageNetLabel.txt.
with open('cjm_image_classifier/2024-06-28_01-04-01/hagrid-classification-512p-no-gesture-150k-zip-classes.json') as data_file:
	config = json.load(data_file)

class_labels=config['classes']
class_labels

['dislike',
 'like',
 'stop_inverted',
 'ok',
 'mute',
 'two_up',
 'no_gesture',
 'stop',
 'peace',
 'two_up_inverted',
 'three',
 'three2',
 'call',
 'one',
 'rock',
 'peace_inverted',
 'fist',
 'palm',
 'four']

In [7]:
scale = 1/(0.226*255.0)
bias = [- 0.485/(0.229) , - 0.456/(0.224), - 0.406/(0.225)]

image_input = ct.ImageType(name="input_1",
                           shape=dummy_input.shape,
                           scale=scale, bias=bias)

In [8]:
# Using image_input in the inputs parameter:
# Convert to Core ML program using the Unified Conversion API.
model = ct.convert(
    traced_model,
    inputs=[image_input],
    classifier_config = ct.ClassifierConfig(class_labels),
    # compute_units=ct.ComputeUnit.CPU_ONLY,
)


When both 'convert_to' and 'minimum_deployment_target' not specified, 'convert_to' is set to "mlprogram" and 'minimum_deployment_target' is set to ct.target.iOS15 (which is same as ct.target.macOS12). Note: the model will not run on systems older than iOS15/macOS12/watchOS8/tvOS15. In order to make your model run on older system, please set the 'minimum_deployment_target' to iOS14/iOS13. Details please see the link: https://apple.github.io/coremltools/docs-guides/source/target-conversion-formats.html
Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 729/730 [00:00<00:00, 8740.71 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 136.06 passes/s]
Running MIL default pipeline: 100%|██████████| 78/78 [00:01<00:00, 60.26 passes/s] 
Running MIL backend_mlprogram pipeline: 100%|██████████| 12/12 [00:00<00:00, 107.90 passes/s]


In [9]:
# Save the converted model.
model.save("FastViTv1_062824.mlpackage")

In [19]:
from PIL import Image

# Load the test image and resize to 224, 224.
img_path = "pexels-elina-volkova-16191659.jpg"
img = Image.open(img_path)
img = img.resize([256, 256], Image.LANCZOS)

In [20]:
# Get the protobuf spec of the model.
spec = model.get_spec()
for out in spec.description.output:
    if out.type.WhichOneof('Type') == "dictionaryType":
        coreml_dict_name = out.name
        break

In [21]:
import numpy as np

# Make a prediction with the Core ML version of the model.
coreml_out_dict = model.predict({"input_1" : img})
print("coreml predictions: ")
print("top class label: ", coreml_out_dict["classLabel"])

coreml_prob_dict = coreml_out_dict[coreml_dict_name]

values_vector = np.array(list(coreml_prob_dict.values()))
keys_vector = list(coreml_prob_dict.keys())
top_3_indices_coreml = np.argsort(-values_vector)[:3]
for i in range(3):
    idx = top_3_indices_coreml[i]
    score_value = values_vector[idx]
    class_id = keys_vector[idx]
    print("class name: {}, raw score value: {}".format(class_id, score_value))

coreml predictions: 
top class label:  mute
class name: mute, raw score value: 13.0234375
class name: like, raw score value: -1.4931640625
class name: one, raw score value: -1.8330078125


In [22]:
# Make a prediction with the Torch version of the model:
# prepare the input numpy array.
img_np = np.asarray(img).astype(np.float32) # (224, 224, 3)
img_np = img_np[np.newaxis, :, :, :] # (1, 224, 224, 3)
img_np = np.transpose(img_np, [0, 3, 1, 2]) # (1, 3, 224, 224)
img_np = img_np / 255.0
torch_tensor_input = torch.from_numpy(img_np)

In [23]:
# Preprocess model for Torch.
transform_model = torch.nn.Sequential(
    torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                     std=(0.229, 0.224, 0.225)),
)

In [24]:
# Invoke prediction and print outputs.
torch_out = traced_model(transform_model(torch_tensor_input))

torch_out_np = torch_out.detach().numpy().squeeze()
top_3_indices = np.argsort(-torch_out_np)[:3]
print('torch top 3 predictions: ')
for i in range(3):
    idx = top_3_indices[i]
    score_value = torch_out_np[idx]
    class_id = class_labels[idx]
    print("class name: {}, raw score value: {}".format(class_id, score_value))

torch top 3 predictions: 
class name: mute, raw score value: 13.086015701293945
class name: like, raw score value: -1.557065725326538
class name: one, raw score value: -1.9775924682617188
