This notebook is used to show:
1. How to save and reload the Flexcode model so not to have to re-train it every time;
2. How to select the best bump removal parameter from an array of potential value

In [None]:
import flexcode
import numpy as np
import xgboost as xgb
from flexcode.regression_models import XGBoost, CustomModel

from matplotlib import pyplot as plt
%matplotlib inline

## Data Creation

In [None]:
def generate_data(n_draws):
    x = np.random.normal(0, 1, n_draws)
    z = np.random.normal(x, 1, n_draws)
    return x, z

x_train, z_train = generate_data(1000)
x_validation, z_validation = generate_data(1000)
x_test, z_test = generate_data(1000)

## Saving and Reload Model

In [None]:
# Parameterize model
model = flexcode.FlexCodeModel(XGBoost, max_basis=31, basis_system="cosine",
                             regression_params={'max_depth': 3, 'learning_rate': 0.5, 'objective': 'reg:linear'})

# Fit model and predict on test data
model.fit(x_train, z_train)
cdes_predict_xgb, z_grid = model.predict(x_test, n_grid=200)

# Show output some general values of the first two predictions
# for further check
print(np.max(cdes_predict_xgb[7, :]))
print(np.max(cdes_predict_xgb[42, :]))

In [None]:
# Print model parameters
model.__dict__

In [None]:
import pickle

# Saving the model
pickle.dump(file=open('flexcode_model.pkl', 'wb'), obj=model, 
            protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# Reaload the model
model_reloaded = pickle.load(open('flexcode_model.pkl', 'rb'))

# Predict again
cdes_predict_xgb_reloaded, z_grid = model_reloaded.predict(x_test, n_grid=200)

In [None]:
# Same output as above
print(np.max(cdes_predict_xgb_reloaded[7, :]))
print(np.max(cdes_predict_xgb_reloaded[42, :]))

In [None]:
# Print parameters of the reloaded model
model_reloaded.__dict__

## Spurious Bump Removal - Tune Using Validation Data

In [None]:
# Define a grid of values to tune over
bump_removal_grid = np.linspace(0.01, 0.2, 20)
print(bump_removal_grid)

In [None]:
# Parameterize model
model = flexcode.FlexCodeModel(XGBoost, max_basis=31, basis_system="cosine",
                             regression_params={'max_depth': 3, 'learning_rate': 0.5, 'objective': 'reg:linear'})

# Fit model
model.fit(x_train, z_train)

In [None]:
# Tune the model - the bump removal grid is passed directly 
# in the tune function

model.tune(x_validation, z_validation, 
           bump_threshold_grid=bump_removal_grid)

Under the hood, it selects the bump value corresponding to the smallest CDE loss on the validation data.

The best value is accessible among the attributes of the model, as below:

In [None]:
model.bump_threshold