# Pipeline Example: Experimentation based on CaliforniaHousing Data

## 0) Setting up Modeva

In [None]:
## =============================================================
## Install or update packages(recommended to run in Terminal)
## =============================================================
!pip show modeva
# !pip uninstall modeva
#!pip install modeva

## 1) Build a Target Model without Modeva 

In [None]:
from xgboost import XGBRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

data = fetch_california_housing()
## split 3000 samples to represent OOT dataset.
in_x, oot_x, in_y, oot_y = train_test_split(data.data, data.target, test_size=3000, random_state=0)

## split the rest samples into training and testing
train_x, test_x, train_y, test_y = train_test_split(in_x, in_y, test_size=0.33,
                                                    random_state=0)

xgb_model = XGBRegressor(max_depth=8, n_estimators=100)
xgb_model.fit(train_x, train_y)

In [None]:
## =============================================================
## Save the model into file system for future use
## =============================================================
import pickle
file_name = "ch_xgb.pkl"
pickle.dump(xgb_model, open(file_name, "wb"))

In [None]:
## =============================================================
## Save the data into file system for future use
## =============================================================
import pandas as pd

pd.concat([pd.DataFrame(train_x, columns=data.feature_names),
           pd.DataFrame(train_y, columns=data.target_names)], axis=1).to_csv("ch_train.csv", index=None)

pd.concat([pd.DataFrame(test_x, columns=data.feature_names),
           pd.DataFrame(test_y, columns=data.target_names)], axis=1).to_csv("ch_test.csv", index=None)

pd.concat([pd.DataFrame(oot_x, columns=data.feature_names),
           pd.DataFrame(oot_y, columns=data.target_names)], axis=1).to_csv("ch_oot.csv", index=None)

## 2) Load Data and Model into Modeva

In [None]:
## Create an instance of DataSet class
from modeva import DataSet
ds = DataSet()

In [None]:
## =============================================================
## Load train, test and oot data into Modeva 
## =============================================================

## a) load data from csv files into memory
train = pd.read_csv("ch_train.csv")
test = pd.read_csv("ch_test.csv")
oot = pd.read_csv("ch_oot.csv")

## b) Initialize DataSet with train and test data
ds.load_dataframe_train_test(train=train, test=test)

## c) load oot data
ds.set_raw_extra_data(name="oot", data=oot)

## d) set target feature
ds.set_target(feature="MedHouseVal")

## e) set task type
ds.set_task_type('Regression')

In [None]:
## ----------------------------------------------------------------
## Target Model Wrapping: e.g. pre-trained Sklearn-style model
## ----------------------------------------------------------------

from modeva.models.wrappers.api import modeva_sklearn_regressor

estimator = pickle.load(open(file_name, "rb"))
model_target = modeva_sklearn_regressor(name="WrappedXGB", estimator=estimator)

## 4) Explainability of Target Model

In [None]:
## ----------------------------------------------------------------
## Post-hoc Explainability
##    fs.explain_pfi: permutation feature importance
##    fs.explain_hstatistic: H-statistic for each pair of features
##    fs.explain_pdp: 1D and 2D PDP
##    fs.explain_ale: 1D and 2D ALE
##    fs.explain_lime: LIME for local explanation
##    fs.explain_shap: SHAP for local explanation
## ----------------------------------------------------------------

## Create a factsheet that bundles dataset and model
from modeva import FactSheet
fs = FactSheet(ds, model_target)

In [None]:
## Post-hoc permutation feature importance
result = fs.explain_pfi()
result.plot(figsize=(6, 4))

In [None]:
## Post-hoc H-statistic
result = fs.explain_hstatistic(sample_size=1000, grid_resolution=10)
result.plot(figsize=(6, 5))

In [None]:
## Post-hoc partial dependence plots
result = fs.explain_pdp(features="MedInc")
result.plot(figsize=(6, 4))
result = fs.explain_pdp(features=("Latitude", "Longitude"))
result.plot(figsize=(6, 5))

In [None]:
## Post-hoc accumulated local effects
result = fs.explain_ale(features=("Longitude", "Latitude"), dataset="oot")
result.plot(figsize=(6, 5))

In [None]:
## Post-hoc local explainability (LIME and SHAP)
result = fs.explain_lime(dataset="test", sample_index=0, centered=False)
result.plot(figsize=(6.5, 4))
result = fs.explain_shap(dataset="test", sample_index=0)
result.plot(figsize=(6.5, 4))

## 5) Build Interpretable Benchmark Model using Modeva

In [None]:
## ----------------------------------------------------------------
## Model Training: e.g. LGBM
## ----------------------------------------------------------------

from modeva.models import MoLGBMRegressor
model_lgbm = MoLGBMRegressor(name="LGBM", max_depth=2, n_estimators=500, verbose=-1)
model_lgbm.fit(ds.train_x, ds.train_y.ravel())

In [None]:
## ----------------------------------------------------------------
## Model Training: Modeva's native MoE model
## ----------------------------------------------------------------

from modeva.models import MoMoERegressor
model_moe = MoMoERegressor(name="MOE", max_depth=2, n_estimators=200, verbose=-1)
model_moe.fit(ds.train_x, ds.train_y.ravel())

In [None]:
## ----------------------------------------------------------------
## Model Tuning: e.g. Random Search
## ----------------------------------------------------------------

from modeva.models.tune import ModelTuneRandomSearch
hyperspace = dict(n_clusters=[2, 4, 6, 8, 10],
                  n_estimators=[50, 100, 200],
                  max_depth=[1, 2])
hpo = ModelTuneRandomSearch(dataset=ds,
                          model=MoMoERegressor(verbose=-1))
result = hpo.run(param_distributions=hyperspace,
                 metric="MSE",
                 n_iter=10,
                 cv=5)
result.table

In [None]:
## ----------------------------------------------------------------
## Refit the model using selected hyperparameter
## ----------------------------------------------------------------
import numpy as np
best_param_idx = np.where(result.value["rank_test_MSE"] == 1)[0][0]
model_moe_tuned = MoMoERegressor(**result.value["params"][best_param_idx],
                                 name="MoE-Tuned",
                                 verbose=-1)
model_moe_tuned.fit(ds.train_x, ds.train_y)
model_moe_tuned

## 6) Compare Models

In [None]:
## ----------------------------------------------------------------
## Model bencharmking/comparison tests:
##    fsc.compare_accuracy_table
##    fsc.compare_robustness
##    fsc.compare_reliability
##    fsc.compare_resilience
##    fsc.compare_slicing_accuracy
##    fsc.compare_slicing_overfit
##    fsc.compare_slicing_robustness
##    fsc.compare_slicing_reliability
## ----------------------------------------------------------------

## create FactSheet that bundles dataset and multiple models
fsc = FactSheet(ds, models=[model_lgbm, model_moe, model_moe_tuned, model_target])

In [None]:
result = fsc.compare_accuracy_table(train_dataset="train", test_dataset="test", 
                                    metric="MAE")
result.plot(figsize=(5, 4))

In [None]:
result = fsc.compare_accuracy_table(train_dataset="train", test_dataset="oot", 
                                    metric="MAE")
result.plot(figsize=(5, 4))

In [None]:
result = fsc.compare_reliability(train_dataset='test', test_dataset='test',
                                 test_size=0.5, alpha=0.1)
result.plot(figsize=(6, 4))

In [None]:
result = fsc.compare_resilience(dataset='test', metric="MAE", method='worst-sample')
result.plot(figsize=(6, 4))

In [None]:
result = fsc.compare_robustness(dataset="test",
                                noise_levels=(0.01, 0.02, 0.03, 0.04), 
                                perturb_method="normal", metric="MAE")
result.plot(figsize=(6, 4))

In [None]:
result = fsc.compare_slicing_accuracy(dataset="test",
                                      features="MedInc", method="uniform", bins=5, metric="MAE")
result.plot(figsize=(6, 5))

In [None]:
result = fsc.compare_slicing_overfit(test_dataset="test", 
                                     features="MedInc", method="uniform", bins=5, metric="MAE")
result.plot(figsize=(6, 5))

In [None]:
result = fsc.compare_slicing_robustness(dataset="oot",
                                        features="MedInc", method="uniform", bins=5, 
                                        noise_levels=0.2, 
                                        perturb_method="quantile", metric="MAE")
result.plot(figsize=(6, 4))

In [None]:
result = fsc.compare_slicing_reliability(train_dataset="oot",
                                         test_dataset="oot",
                                         features="MedInc", method="uniform", bins=5, 
                                         test_size=0.5, alpha=0.1)
result.plot(figsize=(6, 4))

## 7) Wrap the Above Steps into Pipeline

In [None]:
## ----------------------------------------------------------------
## Define step functions
## ----------------------------------------------------------------

import pickle
import numpy as np
import pandas as pd

from modeva import DataSet
from modeva import ModelZoo
from modeva import FactSheet
from modeva.models import MoLGBMRegressor
from modeva.models import MoMoERegressor
from modeva.models.tune import ModelTuneRandomSearch
from modeva.models.wrappers.api import modeva_sklearn_regressor

from modeva.automation.pipeline import Pipeline

def load_data():
    train = pd.read_csv("ch_train.csv")
    test = pd.read_csv("ch_test.csv")
    oot = pd.read_csv("ch_oot.csv")
    ds.load_dataframe_train_test(train=train, test=test)
    ds.set_raw_extra_data(name="oot", data=oot)
    ds.set_target(feature="MedHouseVal")
    ds.set_task_type('Regression')
    return ds

def load_target_model(ds):
    estimator = pickle.load(open("ch_xgb.pkl", "rb"))
    model = modeva_sklearn_regressor(name="WrappedXGB", estimator=estimator)
    return model

def train_lgbm(ds):
    model = MoLGBMRegressor(name="LGBM", max_depth=2, n_estimators=500, verbose=-1)
    model.fit(ds.train_x, ds.train_y.ravel())
    return model

def train_moe(ds):
    model = MoMoERegressor(name="MOE", max_depth=2, n_estimators=500, verbose=-1)
    model.fit(ds.train_x, ds.train_y.ravel())
    return model

def train_moe_tuned(ds):
    hyperspace = dict(n_clusters=[2, 4, 6, 8, 10],
                      n_estimators=[50, 100, 200],
                      max_depth=[1, 2])
    hpo = ModelTuneRandomSearch(dataset=ds,
                              model=MoMoERegressor(verbose=-1))
    result = hpo.run(param_distributions=hyperspace,
                     metric="MSE",
                     n_iter=10,
                     cv=5)
    best_param_idx = np.where(result.value["rank_test_MSE"] == 1)[0][0]
    model = MoMoERegressor(**result.value["params"][best_param_idx],
                           name="MoE-Tuned",
                           verbose=-1)
    model.fit(ds.train_x, ds.train_y)
    return model

def explain_model(ds, model):
    fs = FactSheet(ds, model=model)

    result1 = fs.explain_pfi()
    result1.plot(figsize=(6, 4))

    result2 = fs.explain_hstatistic(sample_size=1000, grid_resolution=10)
    result2.plot(figsize=(6, 5))

    result3 = fs.explain_pdp(features="MedInc")
    result3.plot(figsize=(6, 5))

    result4 = fs.explain_lime(dataset="test", sample_index=0, centered=False)
    result4.plot(figsize=(6, 4))
    return result1, result2, result3, result4

def compare_models(ds, model1, model2, model3, model4):
    fsc = FactSheet(ds, models=[model1, model2, model3, model4])

    result1 = fsc.compare_accuracy_table(train_dataset="train", test_dataset="test", metric=("MSE", "MAE"))
    result1.plot(figsize=(6.5, 4))

    result2 = fsc.compare_reliability(train_dataset='test', test_dataset='test',
                                      test_size=0.5, alpha=0.1)
    result2.plot(figsize=(6, 4))

    result3 = fsc.compare_robustness(noise_levels=(0.01, 0.02, 0.03, 0.04), 
                                     perturb_method="normal", metric="MAE")
    result3.plot(figsize=(6.5, 4))

    result4 = fsc.compare_slicing_accuracy(features="MedInc", method="uniform", bins=5, metric="MAE")
    result4.plot(figsize=(6, 5))
    return result1, result2, result3, result4

In [None]:
## ----------------------------------------------------------------
## Construct Pipeline with Step Functions
## ----------------------------------------------------------------

exp = Pipeline(name='CH-Pipeline')

exp.add_step(
    name='load_data',
    func=load_data,
    func_inputs={},
    save_data=True,
)

exp.add_step(
    name='load_target_model', 
    parent='load_data',
    func=load_target_model,
    func_inputs={}, # auto map from parent steps
    save_model=True,
)

exp.add_step(
    name='train_lgbm', 
    parent='load_data',
    func=train_lgbm,
    func_inputs={}, # auto map from parent steps
    save_model=True,
)

exp.add_step(
    name='train_moe', 
    parent='load_data',
    func=train_moe,
    func_inputs={}, # auto map from parent steps
    save_model=True,
)

exp.add_step(
    name='train_moe_tuned',
    parent='load_data',
    func=train_moe_tuned,
    func_inputs={}, # auto map from parent steps
    save_model=True,
)

exp.add_step(
    func=explain_model,
    func_inputs={}, # auto map from parent steps
    name='explain_model', parent=['load_data', 'load_target_model'],
    save_factsheet=True,
)

exp.add_step(
    func=compare_models,
    func_inputs={}, # auto map from parent steps
    name='compare_model', parent=['load_data', 'train_lgbm', 'train_moe', 'train_moe_tuned', 'load_target_model'],
    save_factsheet=True,
)

In [None]:
## ----------------------------------------------------------------
## Run Pipeline
## ----------------------------------------------------------------
exp.run()

In [None]:
## =============================================================
## Factsheet-export API (to be merged to Modeva in next release)
## =============================================================

import json
import pandas as pd
from modeva.dashboard.utils.report import create_html_reprt

def export_report(fs, path: str = "report.html"):
    """Export report to html

    Parameters
    ----------
    path : str, optional
        The export path, by default "report.html"
    """
    names = fs.list_registered_tests().Name.unique().tolist()
    rs = []
    for name in names:
        f = fs.load_registered_test(name=name)
        plots = []
        plot = f['options']
        if plot:
            if 'chart_id' in plot:
                plots.append(plot)
            else:
                for name, option in plot.items():
                    plots.append(option)
        if f['table'] is not None:
            table = f['table'].round(6).to_dict(orient="split")
        else:
            table = {}
        rs.append({
            "name": name,
            "data": json.dumps(f['data']),
            "model": json.dumps(f['model']),
            "inputs": json.dumps(f['inputs']),
            "table": json.dumps(table).replace("nan", "null").replace("NaN", "null"),
            "plots": json.dumps(plots).replace("nan", "null").replace("NaN", "null"),
        })
    html_str = create_html_reprt(fs.name, rs)
    with open(path, 'w', encoding='utf-8') as f:
        f.write(html_str)

In [None]:
fs = FactSheet(name='CH-Pipeline-FactSheet')
export_report(fs, path="report.html")