In [1]:
%pip install -q xgboost==1.5.0 skl2onnx==1.16.0 onnxruntime==1.16.3 transformers==4.24.0 datasets==2.10.0


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
# Add parent directory to SYS to be able to import modules
import sys
sys.path.append("../")

In [5]:
import os
import boto3
import numpy as np
from pathlib import Path
from training.models import bayes
from skl2onnx import to_onnx
import dataloading
import onnxruntime as rt

In [6]:
MODEL_NAMES = [
    "danceability-bayes-1000_probs-by_track",
    "acousticness-bayes-1000_probs-by_track",
    "energy-bayes-1000_probs-by_track",
    "instrumentalness-bayes-1000_probs-by_track",
    "valence-bayes-1000_probs-by_track",
]
DATA_DIR = Path("../../../data")
key_id = os.getenv("AWS_ACCESS_KEY_ID")
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
endpoint = os.getenv("AWS_S3_ENDPOINT")
bucket_name = os.getenv("AWS_S3_BUCKET")

s3_client = boto3.client(
    "s3",
    aws_access_key_id=key_id,
    aws_secret_access_key=secret_key,
    endpoint_url=endpoint,
    use_ssl=True
)

In [7]:
def export_and_upload(model_name):

    MODEL_PATH = Path(f"../_models/{model_name}.json")
    model = bayes.BayesianRegressor()
    model.load(MODEL_PATH)


    lastfm_tag_probs = dataloading.read_tag_probs(
        DATA_DIR, num_tags=1000, dimension="track", index_col="track"
    )
    lastfm_tag_probs = lastfm_tag_probs.iloc[:1,:-12]
    probs_array = lastfm_tag_probs.to_numpy().astype(np.float32)
    
    ONNX_MODEL_PATH = f"../_models/{model_name}.onnx"

    onx = to_onnx(model._model, probs_array[0])

    with open(ONNX_MODEL_PATH, "wb") as f:
        f.write(onx.SerializeToString())

    # TEST THE MODEL: Compute the prediction with onnxruntime.
    sess = rt.InferenceSession(ONNX_MODEL_PATH, providers=["CPUExecutionProvider"])
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name

    print(input_name)
    print(label_name)

    pred_onx = sess.run([label_name], {
        input_name: probs_array[0].astype(np.float32)
    })[0]
    print(model_name, pred_onx)

    s3_client.upload_file(ONNX_MODEL_PATH, bucket_name, Key=f"{model_name}.onnx")

    print(f"File {ONNX_MODEL_PATH} uploaded to S3!")

In [8]:
for m in MODEL_NAMES:
    export_and_upload(m)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


X
variable
danceability-bayes-1000_probs-by_track [[0.63926417]]
File ../_models/danceability-bayes-1000_probs-by_track.onnx uploaded to S3!


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


X
variable
acousticness-bayes-1000_probs-by_track [[0.13018572]]
File ../_models/acousticness-bayes-1000_probs-by_track.onnx uploaded to S3!


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


X
variable
energy-bayes-1000_probs-by_track [[0.6762643]]
File ../_models/energy-bayes-1000_probs-by_track.onnx uploaded to S3!


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


X
variable
instrumentalness-bayes-1000_probs-by_track [[0.28317472]]
File ../_models/instrumentalness-bayes-1000_probs-by_track.onnx uploaded to S3!


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


X
variable
valence-bayes-1000_probs-by_track [[0.46226376]]
File ../_models/valence-bayes-1000_probs-by_track.onnx uploaded to S3!
