In [1]:
import pandas as pd
import seaborn as sns
#
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
#
import mlflow
from mlflow.models.signature import infer_signature
#
import logging
import json 
import os
from sys import version_info
import time

In [2]:
mlflow_uri = "http://mlflow-server:8888"
mlflow.set_tracking_uri(mlflow_uri)
time.sleep(5)
#mlflow.set_experiment(mlflow_uri)
mlflow.set_experiment("v5_unify_model_artifact")

<Experiment: artifact_location='mlflow-artifacts:/850079440052878919', creation_time=1746329653095, experiment_id='850079440052878919', last_update_time=1746329653095, lifecycle_stage='active', name='v5_unify_model_artifact', tags={}>

In [3]:
logging.getLogger("mlflow").setLevel(logging.FATAL)

In [5]:
diamonds_df = sns.load_dataset('diamonds').drop(['cut', 'color', 'clarity'], axis=1)
#
X_train, X_test, y_train, y_test = train_test_split(diamonds_df.drop(["price"], axis=1), diamonds_df["price"], random_state=42)

X_train.head()

Unnamed: 0,carat,depth,table,x,y,z
35965,0.25,64.9,58.0,3.95,3.97,2.57
52281,0.84,61.8,56.0,6.04,6.07,3.74
6957,1.05,61.1,58.0,6.56,6.51,3.99
9163,1.02,60.7,56.0,6.53,6.5,3.95
50598,0.61,61.8,57.0,5.43,5.47,3.37


### Sklearn model

In [7]:
class sklearn_model(mlflow.pyfunc.PythonModel):
    def __init__(self, params):
        self.params = params
        self.rf_model = None
        self.config = None


    def load_context(self,context = None,config_path=None):
        """ When loading a pyfunc, this method runs automatically with the related
        context. This method is designed to perform the same functionality when
        run in a notebook or a downstream operation (like a REST endpoint).
        If the `context` object is provided, it will load the path to a config from 
        that object (this happens with `mlflow.pyfunc.load_model()` is called).
        If the `config_path` argument is provided instead, it uses this argument
        in order to load in the config. """
        #
        if context: # This block executes for server run
            config_path = context.artifacts["config_path"]
        else:       # This block executes for notebook run
            pass
        self.config = json.load(open(config_path))

        

    def fit(self, X_train, y_train):
        rf_model = RandomForestRegressor(**self.params)
        rf_model.fit(X_train, y_train)
        self.rf_model = rf_model

    def predict(self, context, model_input):
        return self.rf_model.predict(model_input)




In [8]:
params_sklearn = {
    "n_estimators": 15, 
    "max_depth": 5
}
#
# Designate a path
config_path_sklearn = "data_sklearn.json"
#
# Save the results
with open(config_path_sklearn, "w") as f:
    json.dump(params_sklearn, f)
#
# Generate an artifact object to saved
# All paths to the associated values will be copied over when saving
artifacts_sklearn = {"config_path": config_path_sklearn}

In [9]:
model_sk = sklearn_model(params_sklearn)
#
model_sk.load_context(config_path=config_path_sklearn) 
#
# Confirm the config has loaded
model_sk.config

{'n_estimators': 15, 'max_depth': 5}

In [10]:
model_sk.fit(X_train, y_train)

In [11]:
predictions_sklearn = model_sk.predict(context=None, model_input=X_test)
pd.DataFrame({'actual prices': list(y_test), 'predictions': list(predictions_sklearn)}).head(5)

Unnamed: 0,actual prices,predictions
0,559,583.298207
1,2201,1830.791949
2,1238,965.162314
3,1304,1028.927971
4,6901,10756.61359


In [12]:
signature_sklearn = infer_signature(X_test, predictions_sklearn)
signature_sklearn

inputs: 
  ['carat': double (required), 'depth': double (required), 'table': double (required), 'x': double (required), 'y': double (required), 'z': double (required)]
outputs: 
  [Tensor('float64', (-1,))]
params: 
  None

In [13]:
conda_env_sklearn = {
    "channels": ["defaults"],
    "dependencies": [
        f"python={version_info.major}.{version_info.minor}.{version_info.micro}",
        "pip",
        {"pip": ["mlflow",
                 f"scikit-learn=={sklearn.__version__}"]
        },
    ],
    "name": "sklearn_env"
}
conda_env_sklearn

{'channels': ['defaults'],
 'dependencies': ['python=3.12.10',
  'pip',
  {'pip': ['mlflow', 'scikit-learn==1.6.1']}],
 'name': 'sklearn_env'}

In [19]:
artifacts_sklearn

{'config_path': 'data_sklearn.json'}

In [18]:
with mlflow.start_run() as run:
    mlflow.pyfunc.log_model(
        "sklearn_RFR", 
        python_model=model_sk, 
        artifacts=artifacts_sklearn,
        conda_env=conda_env_sklearn,
        signature=signature_sklearn,
        input_example=X_test[:3] 
  )

Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 1704.31it/s]

üèÉ View run able-mare-537 at: http://mlflow-server:8888/#/experiments/850079440052878919/runs/42d208a03d1041a99bbedee6aab6e492
üß™ View experiment at: http://mlflow-server:8888/#/experiments/850079440052878919





In [6]:
#mlflow_pyfunc_model_path_sk = f"runs:/{run.info.run_id}/sklearn_RFR"
mlflow_pyfunc_model_path_sk = f"runs:/42d208a03d1041a99bbedee6aab6e492/sklearn_RFR"


print("model_uri : ",mlflow_pyfunc_model_path_sk)
loaded_preprocess_model_sk = mlflow.pyfunc.load_model(mlflow_pyfunc_model_path_sk)

  from .autonotebook import tqdm as notebook_tqdm


model_uri :  runs:/42d208a03d1041a99bbedee6aab6e492/sklearn_RFR


Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [00:00<00:00, 3980.36it/s] 


In [7]:
import pandas as pd
loaded_preprocess_model_sk.predict(pd.DataFrame(X_test))

array([ 583.29820674, 1830.79194908,  965.16231425, ..., 7007.66914078,
       5515.83513632, 5486.8803837 ])

#### Tensorflow model

In [14]:
!pip install tensorflow

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [8]:

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Normalization

print("TF version : ",tf.__version__)

    

TF version :  2.19.0


In [9]:
print("X_train : ",X_train.shape)
print("y_train : ",y_train.shape)

X_train :  (40455, 6)
y_train :  (40455,)


In [13]:
class custom_tf_model(mlflow.pyfunc.PythonModel):
    def __init__(self, params):
        self.params = params
        self.tf_model = None
        self.config = None


    def load_context(self,context = None,config_path=None):
        """ When loading a pyfunc, this method runs automatically with the related
        context. This method is designed to perform the same functionality when
        run in a notebook or a downstream operation (like a REST endpoint).
        If the `context` object is provided, it will load the path to a config from 
        that object (this happens with `mlflow.pyfunc.load_model()` is called).
        If the `config_path` argument is provided instead, it uses this argument
        in order to load in the config. """
        #
        if context: # This block executes for server run
            config_path = context.artifacts["config_path"]
            #self.tf_model = tf.keras.models.load_model(config_path)
        else:       # This block executes for notebook run
            pass
        self.config = json.load(open(config_path))

    def fit(self, X_train, y_train):
        tf_model = self.buildAndCompile_tf_regression_mode(input_shape=self.params["input_shape"])
        tf_model.fit(X_train,y_train,epochs= self.params["epoch"])
        self.tf_model = tf_model

    def predict(self, context, model_input):
        return self.tf_model.predict(model_input)
    

    def buildAndCompile_tf_regression_mode(self, input_shape):
        model = Sequential([
            Normalization(input_shape=input_shape),
            Dense(64, activation='relu'),
            Dense(64, activation='relu'),
            Dense(1)
        ])

        model.compile(loss='mean_absolute_error',
                        optimizer=tf.keras.optimizers.Adam(0.001))
        return model
    

    




In [14]:
params_tf = {
    "input_shape": [6], 
    "epoch": 3
}

config_path_tf = "data_tf.json"

# Save the results
with open(config_path_tf, "w") as f:
    json.dump(params_tf, f)
artifacts_tf = {"config_path": config_path_tf} 

In [15]:
model_tf = custom_tf_model(params_tf)
#
model_tf.load_context(config_path=config_path_tf) 
#
# Confirm the config has loaded
model_tf.config

{'input_shape': [6], 'epoch': 3}

In [16]:
model_tf.fit(X_train,y_train)

  super().__init__(**kwargs)


Epoch 1/3
[1m1265/1265[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 549us/step - loss: 3025.8635
Epoch 2/3
[1m1265/1265[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 592us/step - loss: 1753.0099
Epoch 3/3
[1m1265/1265[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 577us/step - loss: 819.2953


In [17]:
conda_env_tf = {
    "channels": ["defaults"],
    "dependencies": [
        f"python={version_info.major}.{version_info.minor}.{version_info.micro}",
        "pip",
        {"pip": ["mlflow",
                 f"tensorflow=={tf.__version__}"]
        },
    ],
    "name": "tf_env"
}
conda_env_tf

{'channels': ['defaults'],
 'dependencies': ['python=3.12.10',
  'pip',
  {'pip': ['mlflow', 'tensorflow==2.19.0']}],
 'name': 'tf_env'}

In [18]:
predictions_tf = model_tf.predict(context=None, model_input=X_test)
pd.DataFrame({'actual prices': list(y_test), 'predictions': list(predictions_tf)}).head(5)


[1m422/422[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 595us/step


Unnamed: 0,actual prices,predictions
0,559,[488.8025]
1,2201,[2020.9375]
2,1238,[936.3761]
3,1304,[1087.1653]
4,6901,[10265.899]


In [19]:
signature_tf = infer_signature(X_test, predictions_tf)
signature_tf

inputs: 
  ['carat': double (required), 'depth': double (required), 'table': double (required), 'x': double (required), 'y': double (required), 'z': double (required)]
outputs: 
  [Tensor('float32', (-1, 1))]
params: 
  None

In [20]:
artifacts_tf

{'config_path': 'data_tf.json'}

In [21]:
with mlflow.start_run() as run:
    mlflow.pyfunc.log_model(
        "tf_regressor", 
        python_model=model_tf, 
        artifacts=artifacts_tf,
        conda_env=conda_env_tf,
        signature=signature_tf,
        input_example=X_test[:3] 
  )

Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 853.89it/s] 

[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 35ms/step
üèÉ View run serious-stoat-470 at: http://mlflow-server:8888/#/experiments/850079440052878919/runs/1619b229bbf84cd6a3e95e35d1df8057
üß™ View experiment at: http://mlflow-server:8888/#/experiments/850079440052878919





In [23]:
mlflow_pyfunc_model_path_tf = f"runs:/{run.info.run_id}/tf_regressor"
print("mlflow_pyfunc_model_path_tf : ",mlflow_pyfunc_model_path_tf)



loaded_preprocess_model_tf = mlflow.pyfunc.load_model(mlflow_pyfunc_model_path_tf)


mlflow_pyfunc_model_path_tf :  runs:/1619b229bbf84cd6a3e95e35d1df8057/tf_regressor


Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [00:00<00:00, 239.73it/s]  


In [24]:
#
y_pred_tf = loaded_preprocess_model_tf.predict(X_test)
#
pd.DataFrame({'actual prices': list(y_test), 'predictions': list(y_pred_tf)}).head(5)

[1m422/422[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 410us/step


Unnamed: 0,actual prices,predictions
0,559,[488.8025]
1,2201,[2020.9375]
2,1238,[936.3761]
3,1304,[1087.1653]
4,6901,[10265.899]
