In [12]:
# %load py_src/test_model_and_save_metrics.py
# %load py_src/test_model_and_save_metrics.py


# comment it to ease save/load
# %%writefile py_src/test_model_and_save_metrics.py

from typing import NamedTuple

def test_model_and_save_metrics(args:dict) -> NamedTuple('Output', [('mlpipeline_ui_metadata', 'UI_metadata'),('mlpipeline_metrics', 'Metrics')]) :
    from minio import Minio
    import numpy as np
    import uuid
    import glob
    import pandas as pd
    import json
    import shutil
    import argparse
    import os
    from sklearn.metrics import confusion_matrix as sk_confusion_matrix
    from sklearn.metrics import accuracy_score
    from joblib import load
    
    bucket_name = args.get("bucket_name", None)
    device_name = args.get("device_name", "cpu")
    test_batch_size = args.get("test_batch_size", 1000)
    model_save_prefix = args.get("model_save_prefix", "models/trained/iris")
    version = args.get("version", "1")
    
    
    
    
    def test(model, device, test_loader):
        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
                pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        model_accuracy = 100. * correct / len(test_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset), model_accuracy))
        return {"loss":test_loss, "accuracy":model_accuracy}
    # generate confusion matrix csv
    def gen_cm_csv(y_test=None,test_predictions=None):
        confusion_matrix = sk_confusion_matrix(y_test, test_predictions)
        vocab = list(np.unique(y_test))
        data = []
        for target_index, target_row in enumerate(confusion_matrix):
            for predicted_index, count in enumerate(target_row):
                data.append((vocab[target_index], vocab[predicted_index], count))

        df_cm = pd.DataFrame(data, columns=['target', 'predicted', 'count'])
        cm_csv = df_cm.to_csv(header=False, index=False)
        return cm_csv
    
    
    def get_minio_url():
        minio_host, minio_port = os.environ["MINIO_SERVICE_SERVICE_HOST"], os.environ["MINIO_SERVICE_SERVICE_PORT_HTTP"]
        minio_url= "{}:{}".format(minio_host, minio_port)
        return minio_url
    minio_url = get_minio_url()
    

    config = {"endpoint": minio_url,
        "access_key": "minio",
        "secret_key": "minio123",
        "secure": False}
    minio_client = Minio(**config)
    
    random_prefix=str(uuid.uuid4())
    def download_path(filename):
        return "/tmp/{}_{}.npy".format(random_prefix,filename)

    model_file_name = "iris.model.classifier.joblib"
    model_remote_path="{}/{}".format(model_save_prefix, version)
    model_saved_path="/tmp/{}/{}".format(model_save_prefix, version)
    model_script_remote_path="{}/{}".format(model_remote_path, model_file_name)
    model_script_save_path="{}/{}".format(model_saved_path,model_file_name)

    

    
    
    #download the model
    print(bucket_name,model_script_remote_path,model_script_save_path)
    minio_client.fget_object(bucket_name,model_script_remote_path,model_script_save_path)
    #load model without class prototype
    model_classifier = load(model_script_save_path)

    
    
    #download the data  
    iris_data_dirpath="/tmp/"+str(uuid.uuid4())
    iris_data_path=iris_data_dirpath+"/iris.npz"
    os.makedirs(iris_data_dirpath, exist_ok=True)
    model_data_remote_path="data/original/iris.npz"
    minio_client.fget_object(bucket_name, model_data_remote_path, iris_data_path)
    npzfile = np.load(iris_data_path)
    x_train,y_train,x_test,y_test = npzfile["arr_0"],npzfile["arr_1"], npzfile["arr_2"], npzfile["arr_3"]

    

    # test the model.
    y_pred = model_classifier.predict(x_test)
    #estimate the accuracy
    model_accuracy = accuracy_score(y_test, y_pred)
    #generate confussion matrix
    cm_csv = gen_cm_csv(y_test=y_test,test_predictions=y_pred)
    # cm = confusion_matrix(y_test, y_pred)
    
    

    #cross validation results
    # cv_results = cross_validate(model_classifier, x_test,y_test cv=5,
        # scoring=confusion_matrix_scorer)
    
    
    

    
    
    output_confussion_matrix = {
                "type": "confusion_matrix",
                "format": "csv",
                "schema": [
                    {'name': 'target', 'type': 'CATEGORY'},
                    {'name': 'predicted', 'type': 'CATEGORY'},
                    {'name': 'count', 'type': 'NUMBER'},
                  ],
                "target_col" : "actual",
                "predicted_col" : "predicted",
                "source": cm_csv,
                "storage": "inline",
                "labels": list(np.arange(3)) #0..9 labels
    }
    
    output_model_summary = {
                'type': 'markdown',
                'storage': 'inline',
                'source': f'''# Model Overview
## Model Summary

```
It is an SVM classifier.
```

## Model Performance

**Accuracy**: {model_accuracy}


'''
            }
    
    metadata = {"outputs": [output_confussion_matrix, output_model_summary]}
    metrics = {
      'metrics': [{
          'name': 'model_accuracy',
          'numberValue':  float(model_accuracy),
          'format' : "PERCENTAGE"
        }]}
    
    
    class NpJsonEncoder(json.JSONEncoder):
        """Serializes numpy objects as json."""

        def default(self, obj):
            if isinstance(obj, np.integer):
                return int(obj)
            elif isinstance(obj, np.bool_):
                return bool(obj)
            elif isinstance(obj, np.floating):
                if np.isnan(obj):
                    return None  # Serialized as JSON null.
                return float(obj)
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            else:
                return super().default(obj)
        
    from collections import namedtuple
    output = namedtuple('Output', ['mlpipeline_ui_metadata', 'mlpipeline_metrics'])
    return output(json.dumps(metadata, cls=NpJsonEncoder),json.dumps(metrics, cls=NpJsonEncoder))
