<a href="https://colab.research.google.com/github/tmoura/softexIA/blob/main/MLflow-5-Customization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mlflow --quiet
!pip install pyngrok --quiet

In [None]:
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

import pandas as pd

url = "https://raw.githubusercontent.com/tmoura/machinelearning/master/datasets/iris.data"

# Carregar base de dados
# DataFrame
dataset = pd.read_csv(url, header=None)

columns = len(dataset.columns)

y = dataset[0] # extrai a primeira coluna, que é o label
X = dataset.loc[:,1:columns-1]

yNew = y.copy()
yNew[yNew == 1] = 0
yNew[yNew == 2] = 1
yNew[yNew == 3] = 2

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, yNew, test_size=0.2, random_state=None, stratify=y) # 80% treino e 20% teste

# *Model Customization*

In [None]:
import mlflow
import mlflow.sklearn

# Novos pacotes:
import sklearn
import joblib
import cloudpickle
mlflow.end_run()

exp1 = mlflow.set_experiment(experiment_name="experimento_modelcustomization")

mlflow.start_run(run_name="Run1.3",experiment_id=exp1.experiment_id)
model = tree.DecisionTreeClassifier(criterion="entropy",max_depth=2)
model = model.fit(X_train, y_train)

result = model.predict(X_test)
acc = metrics.accuracy_score(result, y_test)

mlflow.sklearn.autolog(
    log_input_examples=False,
    log_model_signatures=False,
    log_models=False
)

#mlflow.sklearn.log_model(model, "treemodel", signature=signature)

######################
#
# Model Customization
#
######################
joblib.dump(model, "sklearn_model.pkl")

artifacts = {
    "sklearn_model" : "sklearn_model.pkl",
}

class SklearnWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        self.sklearn_model = joblib.load(context.artifacts["sklearn_model"])

    def predict(self, context, model_input):
        return self.sklearn_model.predict(model_input.values)


# Create a Conda environment for the new MLflow Model that contains all necessary dependencies.
conda_env = {
    "channels": ["defaults"],
    "dependencies": [
        "python={}".format(3.10),
        "pip",
        {
            "pip": [
                "mlflow=={}".format(mlflow.__version__),
                "scikit-learn=={}".format(sklearn.__version__),
                "cloudpickle=={}".format(cloudpickle.__version__),
            ],
        },
    ],
    "name": "sklearn_env",
}

mlflow.pyfunc.log_model(
    artifact_path="sklear_mlflow_pyfunc",
    python_model=SklearnWrapper(),
    artifacts=artifacts,
    conda_env=conda_env
)

mlflow.end_run()

get_ipython().system_raw("mlflow ui --port 5000 &") # run tracking UI in the background

# *XGBoost Model*

In [None]:
!pip install xgboost

import xgboost as xgb

In [35]:
import mlflow

# Novos pacotes:
import joblib
import cloudpickle
mlflow.end_run()

exp1 = mlflow.set_experiment(experiment_name="experimento_modelcustomization_xgboost")

mlflow.start_run(run_name="Run1.1",experiment_id=exp1.experiment_id)
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

result = model.predict(X_test)
acc = metrics.accuracy_score(result, y_test)

mlflow.sklearn.autolog(
    log_input_examples=False,
    log_model_signatures=False,
    log_models=False
)

######################
#
# Model Customization
#
######################
joblib.dump(model, "xgboost_model.pkl")

artifacts = {
    "xgboost_model" : "xgboost_model.pkl",
}

class XGBoostWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        self.xgboost_model = joblib.load(context.artifacts["xgboost_model"])

    def predict(self, context, model_input):
        return self.xgboost_model.predict(model_input.values)

conda_env = {
    "channels": ["defaults"],
    "dependencies": [
        "python={}".format(3.10),
        "pip",
        {
            "pip": [
                "mlflow=={}".format(mlflow.__version__),
                "scikit-learn=={}".format(xgb.__version__),
                "cloudpickle=={}".format(cloudpickle.__version__),
            ],
        },
    ],
    "name": "xgb_env",
}

mlflow.pyfunc.log_model(
    artifact_path="xgboost_mlflow_pyfunc3",
    python_model=XGBoostWrapper(),
    artifacts=artifacts,
    conda_env=conda_env,
)

mlflow.end_run()

get_ipython().system_raw("mlflow ui --port 5000 &") # run tracking UI in the background

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

 - scikit-learn (current: 1.2.2, required: scikit-learn==2.0.3)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


# *Model Sklearn*

In [None]:
mlflow.end_run()

exp1 = mlflow.set_experiment(experiment_name="experimento_regularmodel")

mlflow.start_run(run_name="Run1.1",experiment_id=exp1.experiment_id)
model = tree.DecisionTreeClassifier()
model = model.fit(X_train, y_train)

result = model.predict(X_test)
acc = metrics.accuracy_score(result, y_test)

mlflow.log_param("Criterion", "entropy")
mlflow.log_param("MAX_DEPTH", 2)
mlflow.log_metric("Acurácia", acc)
mlflow.sklearn.log_model(model, "treemodelENTROPY")

##### Informações sobre o experimento

print("Nome: {}".format(exp1.name))
print("ID do experimento: {}".format(exp1.experiment_id))
print("Local dos Artefatos: {}".format(exp1.artifact_location))
print("Tags: {}".format(exp1.tags))
print("Estágio do Ciclo de Vida: {}".format(exp1.lifecycle_stage))
print("Tempo de criação: {}".format(exp1.creation_time))

mlflow.end_run()

# *Load Model*

In [None]:
# LOAD MODEL SkLEARN

modelNEW = mlflow.sklearn.load_model(model_uri="runs:/094463a8265741528086fbeb5d1f812a/treemodelENTROPY")

result = modelNEW.predict(X_test)
acc = metrics.accuracy_score(result, y_test)

show = round(acc * 100)

print("Acurácia de: {}%".format(show))

#####################

# LOAD MODEL CUSTOM

modelCUSTOM = mlflow.pyfunc.load_model(model_uri="runs:/625f06daa3a345318ad51b940a21d9a4/sklear_mlflow_pyfunc")

result = modelCUSTOM.predict(X_test)
acc = metrics.accuracy_score(result, y_test)

show = round(acc * 100)

print("Acurácia de: {}%".format(show))

# *Model Evaluate*

In [None]:
mlflow.end_run()

exp1 = mlflow.set_experiment(experiment_name="experimento_modelevaluate")

mlflow.start_run(run_name="Run1.1",experiment_id=exp1.experiment_id)
model = tree.DecisionTreeClassifier()
model = model.fit(X_train, y_train)

result = model.predict(X_test)
acc = metrics.accuracy_score(result, y_test)

mlflow.log_param("Criterion", "entropy")
mlflow.log_metric("Acurácia", acc)
mlflow.sklearn.log_model(model, "treemodelENTROPY")

artifacts_uri = mlflow.get_artifact_uri()

eval_data = X_test
eval_data["label"] = y_test

# Evaluate the logged model
result = mlflow.evaluate(
    artifacts_uri,
    eval_data,
    targets="label",
    model_type="classifier", # model_type="regressor"
    evaluators=["default"],
)

mlflow.end_run()

# Uso do NGROK

O Ngrok é uma ferramenta que permite criar um túnel seguro para conectar um servidor local a um servidor remoto, tornando possível expor uma aplicação local para a Internet. Essa ferramenta é muito útil para desenvolvedores que precisam testar suas aplicações localmente antes de colocá-las em produção.

In [28]:
from pyngrok import ngrok

# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken (optional)
# Get your authtoken from https://dashboard.ngrok.com/auth
NGROK_AUTH_TOKEN = "2fbGojiXWPL7flUUKztsxbdTdrP_3DXnMDuQVRd5G5n6Fhr6A"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Open an HTTPs tunnel on port 5000 for http://localhost:5000
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("Endereço do MLflow:", ngrok_tunnel.public_url)

Endereço do MLflow: https://47fc-34-48-106-85.ngrok-free.app
