In [4]:
#code for transforming from pth to onnx
import torch
import os
#used timm to construct model
import timm

#original file pth
pth_input = "predictor.pth"
#new onnx
onnx_output = "predictor.onnx"
#number of labels in dataset
class_count = 41
#each image is 224x224
dimension_size = 224

#constructs a basic model
def build_model(num_classes: int):
    #constructs the model that I used (vit tiny), sets its parameters by new one with number of classes
    model = timm.create_model(
        "vit_tiny_patch16_224",
        pretrained=False,
        num_classes=num_classes,   # <-- FIXED to use the function arg
    )
    return model
    
#loads the model from the checkpoint
def load_checkpoint(path: str, model: torch.nn.Module):
    #takes the checkpoint and loads it through torch to be able to adjust accordingly
    ckpt = torch.load(path, map_location="cpu")
    #assistance with difference checkpoint types used for saving model
    if isinstance(ckpt, torch.nn.Module):
        print("[INFO] Loaded full model from checkpoint (torch.save(model, ...)).")
        return ckpt
    #if the checkpoint is a dictionary instead of a model
    if isinstance(ckpt, dict):
        #check for the models state in model_state_dict and state_dict as precautionary measures
        if "model_state_dict" in ckpt:
            state_dict = ckpt["model_state_dict"]
            print("[INFO] Found 'model_state_dict' in checkpoint.")
        elif "state_dict" in ckpt:   # <-- FIXED syntax
            state_dict = ckpt["state_dict"]
            print("[INFO] Found 'state_dict' in checkpoint.")
        else:
            raise RuntimeError("Checkpoint missing model weights")
        #loads the state from dictionary to model and returns
        model.load_state_dict(state_dict)
        #returns loaded model
        return model
    #raise error if any flags
    raise RuntimeError("Checkpoint Issue with Model")

def main():
    #checks to see if checkpoint exists, if not raises error
    if not os.path.isfile(pth_input):   # <-- FIXED
        raise FileNotFoundError(f"Checkpoint not found: {pth_input}")

    #builds model
    model = build_model(class_count)   # <-- FIXED

    #loads weights onto the model
    model = load_checkpoint(pth_input, model)  # <-- FIXED
    model.eval()

    #onnx needs dummy input to adapt and store information for one forward pass
    dummy_input = torch.randn(1, 3, dimension_size, dimension_size, device="cpu")

    #exports to onnx, uses the model as base, saves name, gives example with a dummy input, etc.
    torch.onnx.export(
        model,
        dummy_input,
        onnx_output,
        input_names=["input"],
        output_names=["logits"],
        dynamic_axes={
            "input": {0: "batch"},
            "logits": {0: "batch"},
        },
        opset_version=17,
    )
    #logs completion of export
    print("Export Complete")

if __name__ == "__main__":
    main()


[INFO] Found 'model_state_dict' in checkpoint.


  torch.onnx.export(
W1127 14:54:07.794000 66614 site-packages/torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 17 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[torch.onnx] Obtain model graph for `VisionTransformer([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `VisionTransformer([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...


The model version conversion is not supported by the onnxscript version converter and fallback is enabled. The model will be converted using the onnx C API (target version: 17).
Failed to convert the model to the target version 17 using the ONNX C API. The model was not modified
Traceback (most recent call last):
  File "/Users/nizswan/miniforge3/envs/cv/lib/python3.10/site-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
  File "/Users/nizswan/miniforge3/envs/cv/lib/python3.10/site-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
  File "/Users/nizswan/miniforge3/envs/cv/lib/python3.10/site-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
  File "/Users/nizswan/miniforge3/envs/cv/lib/python3.10/site-packages/onnx/version_converter.py", line 39, in convert_version
    

[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 27 of general pattern rewrite rules.
Export Complete


In [6]:
#necessary imports
import os
import random
import numpy as np
from PIL import Image
import torch
import torchvision.transforms as T
import onnxruntime as ort

#dummy script to pass
model_dir = "predictor.onnx"
image_dir = "../data/k1/k1_test"

#all label ids for classification in my program
indices = [
    101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
    111, 112, 113,
    201, 202, 203, 204,
    301, 302, 303,
    401, 402, 403, 404, 405,
    501,
    601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612,
    701,
    801, 802,
]

LABEL2IDX = {lab: i for i, lab in enumerate(indices)}
IDX2LABEL = {i: lab for lab, i in LABEL2IDX.items()}

#ids <-> names for easy human legibilitiy
buildings = {
    101: "Classroom Building 1",
    102: "Classroom Building 2",
    103: "College of Arts and Humanities",
    104: "Education Complex",
    105: "Howard Phillips Hall",
    106: "Math and Sciences Building",
    107: "Nicholson School of Communication and Media",
    108: "Teaching Academy",
    109: "Trevor Colbourn Hall",
    110: "Business Administration Buildings",
    111: "Counseling and Psychology Services",
    112: "College of Sciences Building",
    113: "Burnett Honors College",
    201: "Biological Sciences",
    202: "Chemistry Building",
    203: "Physical Sciences",
    204: "Psychology Building",
    301: "Engineering Buildings",
    302: "L3Harris Engineering Center",
    303: "CREOL – College of Optics & Photonics",
    401: "Performing Arts – Music",
    402: "Performing Arts – Theatre",
    403: "Theatre",
    404: "Rehearsal Hall",
    405: "Visual Arts Building",
    501: "John C. Hitt Library",
    601: "Student Union",
    602: "John T. Washington Center",
    603: "63 South",
    604: "Tech Commons Buildings",
    605: "Health Center",
    606: "General Ferrell Commons",
    607: "Live Oak Event Center (Live Oak Ballroom)",
    608: "Knights Pantry",
    609: "Research 1",
    610: "Career Services and Experiential Learning",
    611: "FAIRWINDS Alumni Center",
    612: "UCF Global",
    701: "Millican Hall",
    801: "Health Sciences I",
    802: "Health Sciences II",
}

#sets preprocessing based on image net, what the vit was pretrained on
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]

#forces to resize to 224, tensorize, and normalize for efficient processing
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=imagenet_mean, std=imagenet_std),
])

#takes the first part of the title of the image and classify as the true label
def parse_label_from_filename(filename: str) -> int:
    #takes the filename
    base = os.path.basename(filename)
    #splits the filename
    stem, _ = os.path.splitext(base)
    #takes the first chunk as in we have image input forms labelid_videoid_frameid
    first_chunk = stem.split("_")[0]
    #returns true label
    label_int = int(first_chunk)
    return label_int
    
#picks a random image, 
def pick_random_image(img_dir: str):
    #we only care for jpg in my images, for generlization it would be best to transfer from other images to jpg to match my model
    exts = {".jpg"}
    #collects all cadindates in the directory to see if they are jpg.
    candidates = [os.path.join(img_dir, f)
                  for f in os.listdir(img_dir)
                  if os.path.splitext(f)[1].lower() in exts]
    
    #if none found raise error
    if not candidates:
        raise RuntimeError(f"No images found in {img_dir}")

    #chooses a random one and returns it
    path = random.choice(candidates)
    return path

#main
def main():
    #sets up the model/session
    print("Loading ONNX model")
    sess = ort.InferenceSession(
        model_dir,  # FIXED
        providers=["CPUExecutionProvider"],
    )
    #obtains inputs and outputs from model
    input_name = sess.get_inputs()[0].name
    output_name = sess.get_outputs()[0].name
    
    #picks a random image
    img_path = pick_random_image(image_dir)  # FIXED
    #gets its out id
    true_label_id = parse_label_from_filename(img_path)

    #gets true label idx as it swaps from my hundreds calculator to #1-41
    true_label_idx = LABEL2IDX[true_label_id]
    true_name = buildings.get(true_label_id, f"Building {true_label_id}")  # FIXED

    print(f"Random test image: {img_path}")
    print(f"True label ID: {true_label_id} (idx {true_label_idx})")
    print(f"True building: {true_name}")

    #preprocesses the image, opens image in RGB, transforms image as necessary, and turns into vector for usage.
    img = Image.open(img_path).convert("RGB")
    x = transform(img)
    x = x.unsqueeze(0)
    x_np = x.numpy().astype(np.float32)

    #takes outputs by passing through model
    outputs = sess.run([output_name], {input_name: x_np})
    logits = outputs[0]
    logits = logits[0]

    #makes a prediction
    pred_idx = int(np.argmax(logits))
    pred_label_id = IDX2LABEL[pred_idx]
    pred_name = buildings.get(pred_label_id, f"Building {pred_label_id}")  # FIXED

    #prints results
    print("\nRESULTS")
    print(f"Predicted index: {pred_idx}")
    print(f"Predicted label ID: {pred_label_id}")
    print(f"Predicted building: {pred_name}")

    #shows the five highest logits picked and their probability distribution
    top5_idx = np.argsort(logits)[-5:][::-1]
    print("\nTop-5 classes (idx, label_id, building, logit):")
    for i in top5_idx:
        lab_id = IDX2LABEL[int(i)]
        name = buildings.get(lab_id, f"Building {lab_id}")  # FIXED
        print(f"  {int(i):2d} | {lab_id:3d} | {name} | {logits[i]:.3f}")


if __name__ == "__main__":
    main()

Loading ONNX model
Random test image: ../data/k1/k1_test/301_6183_001764.jpg
True label ID: 301 (idx 17)
True building: Engineering Buildings

RESULTS
Predicted index: 17
Predicted label ID: 301
Predicted building: Engineering Buildings

Top-5 classes (idx, label_id, building, logit):
  17 | 301 | Engineering Buildings | 16.067
  19 | 303 | CREOL – College of Optics & Photonics | 4.002
  22 | 403 | Theatre | 3.562
   1 | 102 | Classroom Building 2 | 3.303
  25 | 501 | John C. Hitt Library | 3.037


In [2]:
!sw_vers

ProductName:		macOS
ProductVersion:		13.2.1
BuildVersion:		22D68


In [None]:

pip uninstall onnxruntime onnxruntime-silicon -y
rm -rf ~/miniforge3/envs/cv/lib/python3.10/site-packages/onnxruntime*

conda install -c conda-forge onnxruntime


In [1]:
import onnxruntime as ort
print(f"ONNX Runtime version: {ort.__version__}")
print(f"Available providers: {ort.get_available_providers()}")

ONNX Runtime version: 1.22.0
Available providers: ['CPUExecutionProvider']


In [None]:
#necessary imports
import os
import sys
import numpy as np
from PIL import Image
import torch
import torchvision.transforms as T
import onnxruntime as ort

#path to the onnx model
model_dir = "predictor.onnx"

#all label ids for classification in my program
indices = [
    101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
    111, 112, 113,
    201, 202, 203, 204,
    301, 302, 303,
    401, 402, 403, 404, 405,
    501,
    601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612,
    701,
    801, 802,
]

#mapping for idx <-> label
LABEL2IDX = {lab: i for i, lab in enumerate(indices)}
IDX2LABEL = {i: lab for lab, i in LABEL2IDX.items()}

#names for buildings based on id
buildings = {
    101: "Classroom Building 1",
    102: "Classroom Building 2",
    103: "College of Arts and Humanities",
    104: "Education Complex",
    105: "Howard Phillips Hall",
    106: "Math and Sciences Building",
    107: "Nicholson School of Communication and Media",
    108: "Teaching Academy",
    109: "Trevor Colbourn Hall",
    110: "Business Administration Buildings",
    111: "Counseling and Psychology Services",
    112: "College of Sciences Building",
    113: "Burnett Honors College",
    201: "Biological Sciences",
    202: "Chemistry Building",
    203: "Physical Sciences",
    204: "Psychology Building",
    301: "Engineering Buildings",
    302: "L3Harris Engineering Center",
    303: "CREOL – College of Optics & Photonics",
    401: "Performing Arts – Music",
    402: "Performing Arts – Theatre",
    403: "Theatre",
    404: "Rehearsal Hall",
    405: "Visual Arts Building",
    501: "John C. Hitt Library",
    601: "Student Union",
    602: "John T. Washington Center",
    603: "63 South",
    604: "Tech Commons Buildings",
    605: "Health Center",
    606: "General Ferrell Commons",
    607: "Live Oak Event Center (Live Oak Ballroom)",
    608: "Knights Pantry",
    609: "Research 1",
    610: "Career Services and Experiential Learning",
    611: "FAIRWINDS Alumni Center",
    612: "UCF Global",
    701: "Millican Hall",
    801: "Health Sciences I",
    802: "Health Sciences II",
}

#preprocessing based on imagenet stats used by vit tiny
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]

#forces to resize to 224x224, convert to tensor, normalize
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=imagenet_mean, std=imagenet_std),
])

#convert png/webp/heic/etc to jpg for consistent model input
def convert_to_jpg(path: str) -> str:
    _, ext = os.path.splitext(path)
    ext = ext.lower()

    #jpg already correct input
    if ext in [".jpg", ".jpeg"]:
        return path

    #convert image to rgb then export as jpg
    img = Image.open(path).convert("RGB")
    new_path = path + ".jpg"
    img.save(new_path, "JPEG")
    return new_path

#main classifier that takes a direct file path
def main():
    #ensures user provided an argument
    if len(sys.argv) < 2:
        raise RuntimeError("Please call: python generalclassifier.py <image_path>")

    #user provided image path
    img_path = sys.argv[1]

    #checks file exists
    if not os.path.isfile(img_path):
        raise FileNotFoundError(f"File not found: {img_path}")

    #load ONNX session
    print(f"[INFO] Loading ONNX model from: {model_dir}")
    sess = ort.InferenceSession(
        model_dir,
        providers=["CPUExecutionProvider"],
    )

    #get model input & output names
    input_name = sess.get_inputs()[0].name
    output_name = sess.get_outputs()[0].name

    #convert strange extension → jpg
    img_path_jpg = convert_to_jpg(img_path)
    print(f"[INFO] Using processed image: {img_path_jpg}")

    #opens image for preprocessing
    img = Image.open(img_path_jpg).convert("RGB")

    #apply 224x224 + normalize
    x = transform(img)
    x = x.unsqueeze(0)
    x_np = x.numpy().astype(np.float32)

    #run through model
    outputs = sess.run([output_name], {input_name: x_np})
    logits = outputs[0][0]

    #get predicted class index
    pred_idx = int(np.argmax(logits))
    pred_label_id = IDX2LABEL[pred_idx]
    pred_name = buildings.get(pred_label_id, f"Building {pred_label_id}")

    #print results just like your UCF-specific classifier
    print("\nRESULTS")
    print(f"Predicted index: {pred_idx}")
    print(f"Predicted label ID: {pred_label_id}")
    print(f"Predicted building: {pred_name}")

    #top 5 logits by descending order
    top5_idx = np.argsort(logits)[-5:][::-1]

    print("\nTop-5 classes (idx, label_id, building, logit):")
    for i in top5_idx:
        lab_id = IDX2LABEL[int(i)]
        name = buildings.get(lab_id, f"Building {lab_id}")
        print(f"  {int(i):2d} | {lab_id:3d} | {name} | {logits[i]:.3f}")


if __name__ == "__main__":
    main()