In [None]:
import os
import mlflow
import numpy as np
import pandas as pd
from mlflow.tracking import MlflowClient
from app.preprocessing.text import prepare_text_data
from app.utilities.utils import load_dataset, load_artifact_locally

In [None]:
client = MlflowClient()

mlflow.set_tracking_uri("http://localhost:2020/")

In [None]:
text_inference = load_dataset(filepath="data/inference.csv")["item"]

In [None]:
runs = mlflow.search_runs(experiment_names=["Domain Classification"],
                          order_by=["metrics.f1 DESC"])

In [None]:
local_dir = "inference/"

if not os.path.exists(local_dir):
    os.mkdir(path=local_dir)

In [None]:
metrics_path = "models/"

run_id = runs["run_id"].iloc[0]

local_path = client.download_artifacts(run_id=run_id, path=metrics_path, dst_path=local_dir)

tokenizer = load_artifact_locally(local_path=f"{local_path}/preprocessing/tokenizer.pkl")

In [None]:
prep_data = prepare_text_data(texts=text_inference, tokenizer=tokenizer)

In [None]:
model_type = runs["tags.mlflow.runName"].iloc[0].split("-")[0]

In [None]:
model = mlflow.pyfunc.load_model(f"runs:/{run_id}/models/{model_type}")

In [None]:
pd.DataFrame(data={"prediction": np.argmax(model.predict(prep_data), axis=1)})