In [1]:
### import libraries
from mlflow.models.signature import infer_signature
import mlflow
from sklearn import datasets
from sklearn import metrics
import requests
import json
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from pathlib import Path
# sasctl interface for importing models
import sasctl.pzmm as pzmm 
from sasctl import Session
import warnings
import getpass
from sasctl import Session
warnings.filterwarnings("ignore")

In [2]:
#### load dataset 
## split data to train and test 
digits = datasets.load_digits() #dataset loading
x = digits.data               #Features stored in X 
y = digits.target 

df = pd.DataFrame(data= np.c_[digits['data'], digits['target']],
                     columns= digits['feature_names'] + ['target'])
df.head()

x_train, x_test, y_train, y_test = train_test_split(df[digits['feature_names']], df['target'], test_size=0.2, random_state=42)

In [3]:
## Launch mflow from command line
## mlflow server --backend-store-uri sqlite:///backend.db --default-artifact-root ./mlruns

In [4]:
## setup mlflow experiment
import mlflow
mlflow.set_tracking_uri("http://127.0.0.1:5000") #  connects to a tracking URI.
mlflow.set_experiment("digits-classification") ## 

<Experiment: artifact_location='file:///C:/code/sascode/python/mlruns/1', creation_time=1695264776384, experiment_id='1', last_update_time=1695264776384, lifecycle_stage='active', name='digits-classification', tags={}>

In [5]:
## define randomforest model 
model = RandomForestClassifier(n_estimators=300, max_depth=20).fit(x_train, y_train)

##Model signature defines schema of model input and output
signature = infer_signature(x_train, model.predict(x_train))

## log model score to mlflow
score = model.score(x_test, y_test)
print("Score: %s" % score)
mlflow.log_metric("score", score)

### log model 
mlflow.sklearn.log_model(model, "model", signature=signature)
print("Model saved in run %s" % mlflow.active_run().info.run_uuid)

Score: 0.975
Model saved in run 0e6e4a52836848cf8ab2d471a79186a0


In [6]:
mlPath = Path(f'./mlruns/1/{mlflow.active_run().info.run_uuid}/artifacts/model')

## get info aboud model variables ,input and output
varDict, inputsDict, outputsDict = pzmm.MLFlowModel.read_mlflow_model_file(mlPath)

In [None]:
# Debug method by jpnpul

import json
from pathlib import Path

class DebugMLFlowModel:
    @classmethod
    def read_mlflow_model_file(cls, m_path=Path.cwd()):
        """
        Read and return model metadata and input/output variables as dictionaries from
        an MLFlow model directory.

        Current implementation only handles simple pickled models. Future feature work
        is required to include more types of MLFlow models.

        Parameters
        ----------
        m_path : str or Path object, optional
        Directory path of the MLFlow model files. Default is the current working
        directory.

        Returns
        -------
        var_dict : dict
            Model properties and metadata
        inputs_dict : list of dicts
            Model input variables
        outputs_dict : list of dicts
            Model output variables
        """
        with open(Path(m_path) / "MLmodel", "r") as m_file:
            m_lines = m_file.readlines()

        # Read in metadata and properties from the MLFlow model
        var_list = ["python_version", "serialization_format", "run_id", "model_path"]
        for i, var_string in enumerate(var_list):
            index = [i for i, s in enumerate(m_lines) if var_string in s]
            if not index:
                raise ValueError("This MLFlow model type is not currently supported.")
            var_list[i] = {var_list[i]: m_lines[index[0]].strip().split(" ")[1]}

        var_dict = {k: v for d in var_list for k, v in d.items()}
        var_dict["mlflowPath"] = m_path

        # Read in the input and output variables
        ind_in = [i for i, s in enumerate(m_lines) if "inputs:" in s]
        ind_out = [i for i, s in enumerate(m_lines) if "outputs:" in s]
        
#        print(m_lines)
#         print(ind_in)
#         print(ind_out)
        
        if ind_in and ind_out:
            inputs = m_lines[ind_in[0] : ind_out[0]]
            outputs = m_lines[ind_out[0] : -1]

#             print(inputs)
            print(outputs)
            
            inputs_dict = json.loads("".join([s.strip() for s in inputs])[9:-1])
            outputs_dict = json.loads("".join([s.strip() for s in outputs])[10:-1])
        else:
            raise ValueError(
                "Improper or unset signature values for model. No input or output "
                "dicts could be generated. "
            )
        return var_dict, inputs_dict, outputs_dict

In [None]:
varDict, inputsDict, outputsDict = DebugMLFlowModel.read_mlflow_model_file(mlPath)

In [None]:
outputsDict