In [1]:
### import libraries
from mlflow.models.signature import infer_signature
import mlflow
from sklearn import datasets
from sklearn import metrics
import requests
import json
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from pathlib import Path
# sasctl interface for importing models
import sasctl.pzmm as pzmm 
from sasctl import Session
import warnings
import getpass
from sasctl import Session
warnings.filterwarnings("ignore")


## Lunch mlflow server 
#### in cmd run 
 mlflow server --backend-store-uri sqlite:///backend.db --default-artifact-root ./mlruns



In [2]:
## setup mlflow experiment
mlflow.set_tracking_uri("http://127.0.0.1:5000") #  connects to a tracking URI.
mlflow.set_experiment("digits-classification-experiment_sasctl") ## 

2023/03/14 17:05:48 INFO mlflow.tracking.fluent: Experiment with name 'digits-classification-experiment_sasctl' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///D:/sasctl/mlruns/1', creation_time=1678827948145, experiment_id='1', last_update_time=1678827948145, lifecycle_stage='active', name='digits-classification-experiment_sasctl', tags={}>

In [3]:
#### load dataset 
## split data to train and test 
digits = datasets.load_digits() #dataset loading
x = digits.data               #Features stored in X 
y = digits.target 

df = pd.DataFrame(data= np.c_[digits['data'], digits['target']],
                     columns= digits['feature_names'] + ['target'])
df.head()

x_train, x_test, y_train, y_test = train_test_split(df[digits['feature_names']], df['target'], test_size=0.2, random_state=42)

In [4]:
x_train.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
1734,0.0,0.0,3.0,14.0,1.0,0.0,0.0,0.0,0.0,0.0,...,11.0,0.0,0.0,0.0,3.0,11.0,16.0,13.0,4.0,0.0
855,0.0,0.0,9.0,9.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,16.0,14.0,3.0,0.0,0.0
1642,0.0,0.0,0.0,10.0,13.0,3.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,2.0,11.0,13.0,6.0,0.0,0.0
175,0.0,1.0,10.0,16.0,16.0,11.0,0.0,0.0,0.0,5.0,...,4.0,0.0,0.0,1.0,15.0,14.0,11.0,4.0,0.0,0.0
925,0.0,0.0,6.0,14.0,13.0,3.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,4.0,15.0,16.0,9.0,0.0,0.0


In [5]:
## define randomforest model 
model = RandomForestClassifier(n_estimators=300).fit(x_train, y_train)

In [6]:
## model evalution on test data
model.score(x_test,y_test)

0.9722222222222222

In [7]:
model.predict([x_train.iloc[102]])

array([8.])

In [8]:
y_train.iloc[102]

8.0

In [9]:
##Model signature defines schema of model input and output
signature = infer_signature(x_train, model.predict(x_train))

In [10]:
signature

inputs: 
  ['pixel_0_0': double, 'pixel_0_1': double, 'pixel_0_2': double, 'pixel_0_3': double, 'pixel_0_4': double, 'pixel_0_5': double, 'pixel_0_6': double, 'pixel_0_7': double, 'pixel_1_0': double, 'pixel_1_1': double, 'pixel_1_2': double, 'pixel_1_3': double, 'pixel_1_4': double, 'pixel_1_5': double, 'pixel_1_6': double, 'pixel_1_7': double, 'pixel_2_0': double, 'pixel_2_1': double, 'pixel_2_2': double, 'pixel_2_3': double, 'pixel_2_4': double, 'pixel_2_5': double, 'pixel_2_6': double, 'pixel_2_7': double, 'pixel_3_0': double, 'pixel_3_1': double, 'pixel_3_2': double, 'pixel_3_3': double, 'pixel_3_4': double, 'pixel_3_5': double, 'pixel_3_6': double, 'pixel_3_7': double, 'pixel_4_0': double, 'pixel_4_1': double, 'pixel_4_2': double, 'pixel_4_3': double, 'pixel_4_4': double, 'pixel_4_5': double, 'pixel_4_6': double, 'pixel_4_7': double, 'pixel_5_0': double, 'pixel_5_1': double, 'pixel_5_2': double, 'pixel_5_3': double, 'pixel_5_4': double, 'pixel_5_5': double, 'pixel_5_6': double, '

In [11]:
## log model score to mlflow
score = model.score(x_test, y_test)
print("Score: %s" % score)
mlflow.log_metric("score", score)


The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh()

All git commands will error until this is rectified.

$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - error|e|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



Score: 0.9722222222222222


In [12]:
### log model 
mlflow.sklearn.log_model(model, "model", signature=signature)
print("Model saved in run %s" % mlflow.active_run().info.run_uuid)

Model saved in run 4fdaa6c1a85b4f59999bfada26bc5cd4


In [13]:
mlPath = Path(f'./mlruns/1/{mlflow.active_run().info.run_uuid}/artifacts/model')

In [14]:
mlPath

WindowsPath('mlruns/1/4fdaa6c1a85b4f59999bfada26bc5cd4/artifacts/model')

In [15]:
## get info aboud model variables ,input and output
varDict, inputsDict, outputsDict = pzmm.MLFlowModel.read_mlflow_model_file( mlPath)

In [16]:
varDict

{'python_version': '3.10.10',
 'serialization_format': 'cloudpickle',
 'run_id': '4fdaa6c1a85b4f59999bfada26bc5cd4',
 'model_path': 'model.pkl',
 'mlflowPath': WindowsPath('mlruns/1/4fdaa6c1a85b4f59999bfada26bc5cd4/artifacts/model')}

In [17]:
inputsDict

[{'name': 'pixel_0_0', 'type': 'double'},
 {'name': 'pixel_0_1', 'type': 'double'},
 {'name': 'pixel_0_2', 'type': 'double'},
 {'name': 'pixel_0_3', 'type': 'double'},
 {'name': 'pixel_0_4', 'type': 'double'},
 {'name': 'pixel_0_5', 'type': 'double'},
 {'name': 'pixel_0_6', 'type': 'double'},
 {'name': 'pixel_0_7', 'type': 'double'},
 {'name': 'pixel_1_0', 'type': 'double'},
 {'name': 'pixel_1_1', 'type': 'double'},
 {'name': 'pixel_1_2', 'type': 'double'},
 {'name': 'pixel_1_3', 'type': 'double'},
 {'name': 'pixel_1_4', 'type': 'double'},
 {'name': 'pixel_1_5', 'type': 'double'},
 {'name': 'pixel_1_6', 'type': 'double'},
 {'name': 'pixel_1_7', 'type': 'double'},
 {'name': 'pixel_2_0', 'type': 'double'},
 {'name': 'pixel_2_1', 'type': 'double'},
 {'name': 'pixel_2_2', 'type': 'double'},
 {'name': 'pixel_2_3', 'type': 'double'},
 {'name': 'pixel_2_4', 'type': 'double'},
 {'name': 'pixel_2_5', 'type': 'double'},
 {'name': 'pixel_2_6', 'type': 'double'},
 {'name': 'pixel_2_7', 'type': 'do

In [18]:
outputsDict

[{'type': 'tensor', 'tensor-spec': {'dtype': 'float64', 'shape': [-1]}}]

In [19]:
## pickle model 
modelPrefix = 'RandomForestClassifier'
zipFolder = Path.cwd() / f'MLFlowModels/{modelPrefix}'
pzmm.PickleModel.pickle_trained_model(trained_model=model,model_prefix=modelPrefix, pickle_path=zipFolder, mlflow_details=varDict)

In [20]:
## josinify inputs and outputs
J = pzmm.JSONFiles()
J.writeVarJSON(inputsDict, isInput=True, jPath=zipFolder)
J.writeVarJSON(outputsDict, isInput=False, jPath=zipFolder)

inputVar.json was successfully written and saved to D:\sasctl\MLFlowModels\RandomForestClassifier\inputVar.json
outputVar.json was successfully written and saved to D:\sasctl\MLFlowModels\RandomForestClassifier\outputVar.json


In [21]:
J.writeModelPropertiesJSON(modelName=modelPrefix,
                            modelDesc='MLFlow Model ',
                            targetVariable='',
                            modelType='RandomForestClassifier',
                            modelPredictors='',
                            targetEvent=1,
                            numTargetCategories=1,
                            eventProbVar='tensor',
                            jPath=zipFolder,
                            modeler='sasdemo')

# Write model metadata to a json file
J.writeFileMetadataJSON(modelPrefix, jPath=zipFolder)

ModelProperties.json was successfully written and saved to D:\sasctl\MLFlowModels\RandomForestClassifier\ModelProperties.json
fileMetaData.json was successfully written and saved to D:\sasctl\MLFlowModels\RandomForestClassifier\fileMetaData.json


In [22]:
## get username , password and host for sas server 
username = getpass.getpass("Username: ")
password = getpass.getpass("Password: ")
host = getpass.getpass("Hostname: ")

Username: ········
Password: ········
Hostname: ········


In [23]:
sess = Session(host,username,password,verify_ssl=False)



In [24]:
## rigister model to sas model mamager 
I = pzmm.ImportModel()
I.pzmmImportModel(zipFolder, modelPrefix, 'MLFlowTest', inputsDict, None, '{}.predict({})', metrics=['tensor'], force=True)

All model files were zipped to D:\sasctl\MLFlowModels\RandomForestClassifier.
A new project named MLFlowTest was created.
Model was successfully imported into SAS Model Manager as RandomForestClassifier with UUID: 969a7450-c8b0-4a0d-a50d-a0b1f9e17f70.
Model score code was written successfully to D:\sasctl\MLFlowModels\RandomForestClassifier\RandomForestClassifierScore.py and uploaded to SAS Model Manager
