In [None]:
!pip install ntropy-sdk pandas scikit-learn

In [None]:
import json
from sklearn.metrics import confusion_matrix
import pandas as pd
import pickle

In [None]:
df_test = pd.read_csv("data/test.csv")
label_names = df_test.labels.unique().tolist()
print("Got", len(df_test), "test samples with labels", label_names)

In [None]:
with open("artifacts/model.pkl", "rb") as model_file:
    model = pickle.load(model_file)

In [None]:
predictions = model.predict(df_test)

In [None]:
metadata_outputs = []

# Predictions table
predictions_df = pd.DataFrame({
    "description": df_test.description.tolist(),
    "prediction": predictions,
    "label": df_test.labels.tolist(),
})

metadata_outputs.append({
    "type": "table",
    "storage": "inline",
    "format": "csv",
    "header": predictions_df.columns.values.tolist(),
    "source": predictions_df.to_csv(index=False, header=False),
})

# Confusion matrix
cm = confusion_matrix(df_test.labels, predictions, labels=label_names)
cm_rows = []
for i, label_name_i in enumerate(label_names):
    for j, label_name_j in enumerate(label_names):
        cm_rows.append([label_name_i, label_name_j, cm[i, j]])
cm_df = pd.DataFrame(cm_rows, columns=["target", "predicted", "count"])
cm_csv = cm_df.to_csv(header=False, index=False)

metadata_outputs.append({
    "type": "confusion_matrix",
    "format": "csv",
    "labels": label_names,
    "storage": "inline",
    "source": cm_csv,
    "schema": [
        {
            "name": "target",
            "type": "CATEGORY",
        },
        {
            "name": "predicted",
            "type": "CATEGORY",
        },
        {
            "name": "count",
            "type": "NUMBER",
        },
    ],
})

with open("mlpipeline-ui-metadata.json", "w", encoding="utf-8") as metadata_file:
    json.dump({ "outputs": metadata_outputs }, metadata_file)