In [3]:
import streamlit as st
import cv2
import numpy as np
from PIL import Image
import torch
from torchvision import transforms

from inference_sdk import InferenceHTTPClient
import supervision as sv

import torch.nn as nn
from torch import Tensor

class PianoModelBlock2D(nn.Module):
    def __init__(self, in_dim, out_dim, ksize=(3,3), stride=(1,1), drop=0.0, pad=True):
        super().__init__()
        padding = (ksize[0]//2, ksize[1]//2) if pad else (0,0)
        self.main = nn.Sequential(
            nn.Conv2d(in_dim, out_dim, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_dim),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(out_dim, out_dim, kernel_size=ksize, stride=stride, padding=padding, bias=False),
            nn.BatchNorm2d(out_dim),
            nn.LeakyReLU(inplace=True),
            nn.Dropout(p=drop),
            nn.Conv2d(out_dim, out_dim, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_dim)
        )
        self.relu = nn.LeakyReLU(inplace=True)
        self.downsample = nn.Sequential(
            nn.Conv2d(in_dim, out_dim, kernel_size=ksize, stride=stride, padding=padding, bias=False),
            nn.BatchNorm2d(out_dim)
        )

    def forward(self, x: Tensor):
        return self.relu(self.main(x) + self.downsample(x))

class PianoModelSmall2D(nn.Module):
    def __init__(self, input_size=(480,640)):
        super().__init__()
        downscale_dim_sizes = [32,32,64,128,128,256]
        self.preprocess = nn.Sequential(
            nn.Conv2d(1, downscale_dim_sizes[0], kernel_size=3, padding=1),
            nn.BatchNorm2d(downscale_dim_sizes[0]),
            nn.LeakyReLU(inplace=True)
        )
        self.blocks = nn.ModuleList([
            PianoModelBlock2D(downscale_dim_sizes[0], downscale_dim_sizes[1], stride=2, drop=0.2),
            PianoModelBlock2D(downscale_dim_sizes[1], downscale_dim_sizes[2], stride=2, drop=0.2),
            PianoModelBlock2D(downscale_dim_sizes[2], downscale_dim_sizes[3], stride=(2,1), drop=0.2),
            PianoModelBlock2D(downscale_dim_sizes[3], downscale_dim_sizes[4], stride=(2,1), drop=0.2),
            PianoModelBlock2D(downscale_dim_sizes[4], downscale_dim_sizes[5], stride=(2,1), drop=0.0)
        ])
        final_conv_dim = 256
        self.final_conv = nn.Conv1d(downscale_dim_sizes[-1], final_conv_dim, kernel_size=3, padding=1)
        self.fc = nn.Linear((input_size[1]//4)*final_conv_dim, 88)

    def forward(self, x: Tensor):
        x = self.preprocess(x)
        for block in self.blocks:
            x = block(x)
        x = x.mean(dim=2)
        x = self.final_conv(x)
        x = x.flatten(1)
        x = self.fc(x)
        return x



device = "cpu"  # use CPU for export
model = torch.load("model/final_model.pt", map_location=device, weights_only=False)
model.eval()


dummy_input = torch.randn(1, 1, 480, 640, device=device)

onnx_file_path = "model/final_model.onnx"

torch.onnx.export(
    model,
    dummy_input,
    onnx_file_path,
    export_params=True,           
    opset_version=17,             
    do_constant_folding=True,     
    input_names=['input'],       
    output_names=['output'],      
    dynamic_axes={
        'input': {0: 'batch_size'}, 
        'output': {0: 'batch_size'}
    }
)


W1210 11:18:50.378000 19328 site-packages\torch\onnx\_internal\exporter\_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 17 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[torch.onnx] Obtain model graph for `PianoModelSmall2D([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `PianoModelSmall2D([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...


The model version conversion is not supported by the onnxscript version converter and fallback is enabled. The model will be converted using the onnx C API (target version: 17).
Failed to convert the model to the target version 17 using the ONNX C API. The model was not modified
Traceback (most recent call last):
  File "c:\Users\unova\anaconda3\envs\piano_app\lib\site-packages\onnxscript\version_converter\__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
  File "c:\Users\unova\anaconda3\envs\piano_app\lib\site-packages\onnxscript\version_converter\_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
  File "c:\Users\unova\anaconda3\envs\piano_app\lib\site-packages\onnxscript\version_converter\__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
  File "c:\Users\unova\anaconda3\envs\piano_app\lib\site-packages\onnx\version_converter.py", line 39, in convert_version
    converted_model_str 

[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 41 of general pattern rewrite rules.


ONNXProgram(
    model=
        <
            ir_version=10,
            opset_imports={'': 18},
            producer_name='pytorch',
            producer_version='2.9.1',
            domain=None,
            model_version=None,
        >
        graph(
            name=main_graph,
            inputs=(
                %"input"<FLOAT,[s77,1,480,640]>
            ),
            outputs=(
                %"output"<FLOAT,[1,88]>
            ),
            initializers=(
                %"preprocess.0.weight"<FLOAT,[32,1,3,3]>{Tensor(...)},
                %"preprocess.0.bias"<FLOAT,[32]>{Tensor(...)},
                %"final_conv.bias"<FLOAT,[256]>{TorchTensor(...)},
                %"fc.bias"<FLOAT,[88]>{TorchTensor(...)},
                %"blocks.0.main.0.weight"<FLOAT,[32,32,1,1]>{Tensor(...)},
                %"blocks.0.main.3.weight"<FLOAT,[32,32,3,3]>{Tensor(...)},
                %"blocks.0.main.7.weight"<FLOAT,[32,32,1,1]>{Tensor(...)},
                %"blocks.0.downsample.0.weigh

In [None]:

import onnx
import onnxruntime as ort
  
onnx_model = onnx.load(onnx_file_path)
onnx.checker.check_model(onnx_model)

sess = ort.InferenceSession(onnx_file_path)

test_input = np.random.randn(1,1,480,640).astype(np.float32)
outputs = sess.run(None, {"input": test_input})
print("Output shape:", outputs[0].shape)


Output shape: (1, 88)


: 