In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys

# --- Fix path setup ---
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)  # <-- add the ROOT, not the src folder directly!

from src.model_definitions import make_resnet18
from src.video_model_definition import create_mvit_model

import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType


In [12]:
# === Step 2: Load FP32 models ===
import torch
import os

# Base paths
MODEL_DIR = os.path.join(PROJECT_ROOT, "models", "model_final")
QUANT_DIR = os.path.join(PROJECT_ROOT, "models", "model_quantized")
os.makedirs(QUANT_DIR, exist_ok=True)

# ----- Load ResNet18 dynamically -----
resnet_path = os.path.join(MODEL_DIR, "resnet_asl_final.pth")
state = torch.load(resnet_path, map_location="cpu")

# Determine class count automatically from fc layer in checkpoint
if "fc.weight" in state:
    num_classes = state["fc.weight"].shape[0]
else:
    # fallback if checkpoint is wrapped (e.g. from DataParallel or a dict)
    for k, v in state.items():
        if "fc.weight" in k:
            num_classes = v.shape[0]
            break

print(f"Detected {num_classes} output classes for ResNet18")

# Now build model with correct classifier size
resnet = make_resnet18(num_classes=num_classes, pretrained=False)
resnet.load_state_dict(state, strict=True)
resnet.eval()


Detected 30 output classes for ResNet18


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
# ----- Load MViT dynamically -----
mvit_path = os.path.join(MODEL_DIR, "mvit_wlasl_final.pth")
mvit_state = torch.load(mvit_path, map_location="cpu")

if "head.1.weight" in mvit_state:
    mvit_classes = mvit_state["head.1.weight"].shape[0]
else:
    for k, v in mvit_state.items():
        if "head.1.weight" in k:
            mvit_classes = v.shape[0]
            break

print(f"Detected {mvit_classes} output classes for MViT")

mvit = create_mvit_model(num_classes=mvit_classes, pretrained=False)
mvit.load_state_dict(mvit_state, strict=True)
mvit.eval()

Detected 1990 output classes for MViT


MViT(
  (conv_proj): Conv3d(3, 96, kernel_size=(3, 7, 7), stride=(2, 4, 4), padding=(1, 3, 3))
  (pos_encoding): PositionalEncoding()
  (blocks): ModuleList(
    (0): MultiscaleBlock(
      (norm1): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
      (norm2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
      (attn): MultiscaleAttention(
        (qkv): Linear(in_features=96, out_features=288, bias=True)
        (project): Sequential(
          (0): Linear(in_features=96, out_features=96, bias=True)
        )
        (pool_q): Pool(
          (pool): Conv3d(96, 96, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), groups=96, bias=False)
          (norm_act): Sequential(
            (0): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          )
        )
        (pool_k): Pool(
          (pool): Conv3d(96, 96, kernel_size=(3, 3, 3), stride=(1, 8, 8), padding=(1, 1, 1), groups=96, bias=False)
          (norm_act): Sequential(
            (0): LayerNorm((96,

In [18]:
# === Step 3.1: Setup paths and dummy inputs ===
import torch
import os
from torch.ao.quantization import quantize_dynamic  # modern API
from onnxruntime.quantization import QuantType

# üìÇ Folder paths
MODEL_FINAL = os.path.join(PROJECT_ROOT, "models", "model_final")
MODEL_INTER = os.path.join(PROJECT_ROOT, "models", "model_inter")
MODEL_QUANT = os.path.join(PROJECT_ROOT, "models", "model_quantized")

os.makedirs(MODEL_INTER, exist_ok=True)
os.makedirs(MODEL_QUANT, exist_ok=True)

# üß© Dummy inputs for tracing
dummy_resnet = torch.randn(1, 3, 224, 224)
dummy_mvit   = torch.randn(1, 3, 16, 224, 224)

# === Step 3.2: Quantize models inside PyTorch (weights-only) ===
from torch.ao.quantization import quantize_dynamic

# Quantize both models (only Linear layers to avoid conv/bn issues)
resnet_quant = quantize_dynamic(resnet, {torch.nn.Linear}, dtype=torch.qint8)
mvit_quant   = quantize_dynamic(mvit, {torch.nn.Linear}, dtype=torch.qint8)

print("‚úÖ Models quantized in PyTorch (dynamic INT8 for Linear layers).")


For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  resnet_quant = quantize_dynamic(resnet, {torch.nn.Linear}, dtype=torch.qint8)
For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use 

‚úÖ Models quantized in PyTorch (dynamic INT8 for Linear layers).


In [19]:
# === Step 3.3: Export FP32 ONNX models (intermediate) ===
resnet_inter_path = os.path.join(MODEL_INTER, "resnet_asl_fp32.onnx")
mvit_inter_path   = os.path.join(MODEL_INTER, "mvit_wlasl_fp32.onnx")

torch.onnx.export(
    resnet, dummy_resnet, resnet_inter_path,
    input_names=["input"], output_names=["output"],
    opset_version=17, do_constant_folding=True
)
torch.onnx.export(
    mvit, dummy_mvit, mvit_inter_path,
    input_names=["input"], output_names=["output"],
    opset_version=17, do_constant_folding=True
)

print("‚úÖ Exported FP32 reference ONNX models.")


W1028 16:15:51.945000 7529 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 17 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[torch.onnx] Obtain model graph for `ResNet([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `ResNet([...]` with `torch.export.export(..., strict=False)`... ‚úÖ
[torch.onnx] Run decomposition...


The model version conversion is not supported by the onnxscript version converter and fallback is enabled. The model will be converted using the onnx C API (target version: 17).
Failed to convert the model to the target version 17 using the ONNX C API. The model was not modified
Traceback (most recent call last):
  File "/home/genesis/sem1_project/sign-to-speech-with-temporal-correction/env/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/genesis/sem1_project/sign-to-speech-with-temporal-correction/env/lib/python3.12/site-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/home/genesis/sem1_project/sign-to-speech-with-temporal-correction/env/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_vers

[torch.onnx] Run decomposition... ‚úÖ
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ‚úÖ
[torch.onnx] Obtain model graph for `MViT([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `MViT([...]` with `torch.export.export(..., strict=False)`... ‚úÖ
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ‚úÖ
[torch.onnx] Translate the graph into ONNX...


The model version conversion is not supported by the onnxscript version converter and fallback is enabled. The model will be converted using the onnx C API (target version: 17).


[torch.onnx] Translate the graph into ONNX... ‚úÖ


Failed to convert the model to the target version 17 using the ONNX C API. The model was not modified
Traceback (most recent call last):
  File "/home/genesis/sem1_project/sign-to-speech-with-temporal-correction/env/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/genesis/sem1_project/sign-to-speech-with-temporal-correction/env/lib/python3.12/site-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/home/genesis/sem1_project/sign-to-speech-with-temporal-correction/env/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/genesis/sem1_project/sign-to-speech-with-temporal-correcti

Applied 98 of general pattern rewrite rules.
‚úÖ Exported FP32 reference ONNX models.


In [2]:
import sys, os

# Add project root to Python path
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
SRC_PATH = os.path.join(PROJECT_ROOT, "src")

if SRC_PATH not in sys.path:
    sys.path.append(SRC_PATH)

print("‚úÖ Added to sys.path:", SRC_PATH)


‚úÖ Added to sys.path: /home/genesis/sem1_project/sign-to-speech-with-temporal-correction/src


In [3]:
import onnxruntime as ort
print(ort.get_device())

sess = ort.InferenceSession("../models/model_inter/resnet_asl_fp32.onnx")
print(sess.get_providers())


GPU
['CPUExecutionProvider']


In [9]:
import onnxruntime as ort
print("Device:", ort.get_device())
print("Available providers:", ort.get_available_providers())

Device: GPU
Available providers: ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']


In [10]:
import onnxruntime as ort
import numpy as np
import time

# Try to load CUDA provider if available
available = ort.get_available_providers()
preferred_providers = []

if 'CUDAExecutionProvider' in available:
    preferred_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
    print("‚úÖ Using GPU acceleration")
else:
    preferred_providers = ['CPUExecutionProvider']
    print("‚ö†Ô∏è GPU not available, running on CPU")

# Create inference session
sess = ort.InferenceSession(
    "../models/model_inter/resnet_asl_fp32.onnx",
    providers=preferred_providers
)

# Test inference
inp = np.random.randn(1, 3, 224, 224).astype(np.float32)
start = time.time()
out = sess.run(None, {sess.get_inputs()[0].name: inp})
print(f"‚úÖ Ran ONNX model on {sess.get_providers()[0]} in {time.time() - start:.3f}s")


‚úÖ Using GPU acceleration
*************** EP Error ***************
EP Error /onnxruntime_src/onnxruntime/core/providers/cuda/cuda_call.cc:123 std::conditional_t<THRW, void, onnxruntime::common::Status> onnxruntime::CudaCall(ERRTYPE, const char*, const char*, ERRTYPE, const char*, const char*, int) [with ERRTYPE = cudaError; bool THRW = true; std::conditional_t<THRW, void, common::Status> = void] /onnxruntime_src/onnxruntime/core/providers/cuda/cuda_call.cc:116 std::conditional_t<THRW, void, onnxruntime::common::Status> onnxruntime::CudaCall(ERRTYPE, const char*, const char*, ERRTYPE, const char*, const char*, int) [with ERRTYPE = cudaError; bool THRW = true; std::conditional_t<THRW, void, common::Status> = void] CUDA failure 100: no CUDA-capable device is detected ; GPU=-1 ; hostname=genesis-mkiii ; file=/onnxruntime_src/onnxruntime/core/providers/cuda/cuda_execution_provider.cc ; line=280 ; expr=cudaSetDevice(info_.device_id); 

 when using ['CUDAExecutionProvider', 'CPUExecutionProv

In [11]:
import onnxruntime as ort
import numpy as np, time

sess = ort.InferenceSession(
    "../models/model_inter/resnet_asl_fp32.onnx",
    providers=['CUDAExecutionProvider','CPUExecutionProvider']
)

inp = np.random.randn(1,3,224,224).astype(np.float32)
start = time.time()
out = sess.run(None, {sess.get_inputs()[0].name: inp})
print(f"‚úÖ Ran ONNX model on {sess.get_providers()[0]} in {time.time()-start:.3f}s")


*************** EP Error ***************
EP Error /onnxruntime_src/onnxruntime/core/providers/cuda/cuda_call.cc:123 std::conditional_t<THRW, void, onnxruntime::common::Status> onnxruntime::CudaCall(ERRTYPE, const char*, const char*, ERRTYPE, const char*, const char*, int) [with ERRTYPE = cudaError; bool THRW = true; std::conditional_t<THRW, void, common::Status> = void] /onnxruntime_src/onnxruntime/core/providers/cuda/cuda_call.cc:116 std::conditional_t<THRW, void, onnxruntime::common::Status> onnxruntime::CudaCall(ERRTYPE, const char*, const char*, ERRTYPE, const char*, const char*, int) [with ERRTYPE = cudaError; bool THRW = true; std::conditional_t<THRW, void, common::Status> = void] CUDA failure 100: no CUDA-capable device is detected ; GPU=-1 ; hostname=genesis-mkiii ; file=/onnxruntime_src/onnxruntime/core/providers/cuda/cuda_execution_provider.cc ; line=280 ; expr=cudaSetDevice(info_.device_id); 

 when using ['CUDAExecutionProvider', 'CPUExecutionProvider']
Falling back to ['CP

In [3]:
from ensemble_runtime import run_dual_inference
import numpy as np

# Dummy example until you connect webcam
image = np.random.rand(1, 3, 224, 224).astype(np.float32)
clip  = np.random.rand(1, 3, 16, 224, 224).astype(np.float32)

# Dummy label lists
resnet_labels = [f"ASL_{i}" for i in range(30)]
mvit_labels   = [f"WLASL_{i}" for i in range(1990)]

result = run_dual_inference(image, clip, resnet_labels, mvit_labels, threshold=0.7)
print(result)


{'source': 'ResNet', 'label': 'ASL_27', 'confidence': 0.9984676241874695}
