# Models conversion in ONNX format

Useful link:
    
    https://onnx.ai/sklearn-onnx/introduction.html
        
    https://pypi.org/project/onnxmltools/

In [26]:
#!pip install onnxmltools

In [27]:
import site
import sys
sys.path.append(site.getusersitepackages())

In [28]:
import joblib
import onnxmltools
from onnxmltools.convert import convert_xgboost
from onnxmltools.convert.common.data_types import FloatTensorType

grid_search = joblib.load('/eos/user/m/moameen/HjTagger/models/model_xgboost_for_HjTagger_27May2025_a_r3')
model = grid_search.best_estimator_
initial_type = [('float_input', FloatTensorType([None, 4]))]
onnx_model = convert_xgboost(model, initial_types=initial_type)

# Save the ONNX model
output_path = "/eos/user/m/moameen/multilepton-analysis/data/models_hjtagger/model_xgboost_for_HjTagger_27May2025_a_r3.onnx"
with open(output_path, "wb") as f:
    f.write(onnx_model.SerializeToString())

# for all model files *In one Go:*

In [29]:
import os
import joblib
import onnxmltools
from onnxmltools.convert import convert_xgboost
from onnxmltools.convert.common.data_types import FloatTensorType

input_dir = "/eos/user/m/moameen/HjTagger/models/"
output_dir = "/eos/user/m/moameen/multilepton-analysis/data/models_hjtagger/"
os.makedirs(output_dir, exist_ok=True)

for filename in os.listdir(input_dir):
    if filename.startswith("model_"):
        input_path = os.path.join(input_dir, filename)
        print(f"Converting model: {filename}")

        try:
            grid_search = joblib.load(input_path)
            model = grid_search.best_estimator_
            initial_type = [('float_input', FloatTensorType([None, 4]))]
            onnx_model = convert_xgboost(model, initial_types=initial_type)
            base_name = os.path.splitext(filename)[0]
            output_path = os.path.join(output_dir, base_name + ".onnx")
            with open(output_path, "wb") as f:
                f.write(onnx_model.SerializeToString())
            
            print(f"Saved ONNX model to: {output_path}")
        
        except Exception as e:
            print(f"Failed to convert {filename}: {e}")

Converting model: model_xgboost_for_HjTagger_27May2025_a_all5wp_r3
Saved ONNX model to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger/model_xgboost_for_HjTagger_27May2025_a_all5wp_r3.onnx
Converting model: model_xgboost_for_HjTagger_27May2025_a_r3
Saved ONNX model to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger/model_xgboost_for_HjTagger_27May2025_a_r3.onnx
Converting model: model_xgboost_for_HjTagger_27May2025_a_weights_r5
Saved ONNX model to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger/model_xgboost_for_HjTagger_27May2025_a_weights_r5.onnx
Converting model: model_xgboost_for_HjTagger_27May2025_b_all5wp_r3
Saved ONNX model to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger/model_xgboost_for_HjTagger_27May2025_b_all5wp_r3.onnx
Converting model: model_xgboost_for_HjTagger_27May2025_b_r3
Saved ONNX model to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger/model_xgboost_for_HjTagger_27May2025_b_r3.onnx
Conver

In [30]:
import onnx
model = onnx.load("/eos/user/m/moameen/multilepton-analysis/data/models_hjtagger/model_xgboost_for_HjTagger_27May2025_a_r3.onnx")
print("Final outputs:", [o.name for o in model.graph.output])

Final outputs: ['label', 'probabilities']


We see two output with name 'label' and 'probabilities'. The cintent of two output of such onnx format model will not work for our CMGRDF framework. So we need to keep the probabilities output and remove the label

In [34]:
import os
import onnx

input_dir = "/eos/user/m/moameen/multilepton-analysis/data/models_hjtagger"
output_dir = "/eos/user/m/moameen/multilepton-analysis/data/models_hjtagger_probonly"

os.makedirs(output_dir, exist_ok=True)

for filename in os.listdir(input_dir):
    if filename.endswith(".onnx"):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)
        
        print(f"Processing model: {filename}")
        model = onnx.load(input_path)
        
        outputs = model.graph.output
        #print(output)
        new_outputs = [output for output in outputs if output.name == "probabilities"]
        #print(new_outputs)
        model.graph.ClearField("output")
        model.graph.output.extend(new_outputs)
        onnx.save(model, output_path)
        
        print(f"Saved to: {output_path}")

Processing model: model_xgboost_for_HjTagger_27May2025_FIXED.onnx
Saved to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger_probonly/model_xgboost_for_HjTagger_27May2025_FIXED.onnx
Processing model: model_xgboost_for_HjTagger_27May2025_a_all5wp_r3.onnx
Saved to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger_probonly/model_xgboost_for_HjTagger_27May2025_a_all5wp_r3.onnx
Processing model: model_xgboost_for_HjTagger_27May2025_a_r3.onnx
Saved to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger_probonly/model_xgboost_for_HjTagger_27May2025_a_r3.onnx
Processing model: model_xgboost_for_HjTagger_27May2025_a_weights_r5.onnx
Saved to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger_probonly/model_xgboost_for_HjTagger_27May2025_a_weights_r5.onnx
Processing model: model_xgboost_for_HjTagger_27May2025_b_all5wp_r3.onnx
Saved to: /eos/user/m/moameen/multilepton-analysis/data/models_hjtagger_probonly/model_xgboost_for_HjTagger_27May2025_b_all5

In [35]:
# Just to check
import onnx
model = onnx.load("/eos/user/m/moameen/multilepton-analysis/data/models_hjtagger_probonly/model_xgboost_for_HjTagger_27May2025_a_r3.onnx")
print("NOW Final outputs:", [o.name for o in model.graph.output])

NOW Final outputs: ['probabilities']
