In [None]:
import warnings
warnings.filterwarnings('ignore') # prob not best practice (fix later)

from pathlib import Path
import pickle
from tqdm.notebook import tqdm

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from skopt import BayesSearchCV
from joblib import parallel_backend

from duqling_interface import DuqlingInterface
from model_search_spaces import get_models
from plot_performance import plot_bayes_cv_rmse, heatmap

In [None]:
duq = DuqlingInterface()

univariate_funcs = duq.list_functions(response_type='uni').fname

In [None]:
def record_performance(bayes_search, X_test:np.array, y_test:np.array):
    """
    Compute the test RMSE, standard deviation (sigma), and Pearson's R value
    and record them in the cross validation results dictionary.
    """
    y_pred = bayes_search.best_estimator_.predict(X_test)
    rmse  = np.sqrt(mean_squared_error(y_test, y_pred))
    sigma = y_test.std(ddof=0)
    r     = np.corrcoef(y_pred, y_test)[0, 1]
    bayes_search.cv_results_['test_rmse'] = rmse
    bayes_search.cv_results_['sigma']     = sigma
    bayes_search.cv_results_['r_val']     = r

In [None]:
def save_cv_results(cv_results:dict, savepath:Path):
    savepath.parent.mkdir(parents=True, exist_ok=True)
    with open(savepath, 'wb') as f:
        pickle.dump(cv_results, f)

In [None]:
for duqling_func_name in univariate_funcs[3:5]:
    X, y = duq.generate_data(duqling_func_name, n_samples=100, seed=42)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42
    )
    n_features = X.shape[1]
    models = get_models(n_features)
    for model_name in models:

        # Configure the tqdm progress bar
        total_iters = models[model_name]['n_iter']
        bar = tqdm(total=total_iters, desc=f"{model_name.upper()} on {duqling_func_name}", unit="iter")
        def update_tqdm_bar(_): bar.update()

        bayes_search = BayesSearchCV(
            **models[model_name],
            cv=3,
            scoring='neg_mean_squared_error',
            random_state=42,
            verbose=0,
            n_jobs=-1
        )
        with parallel_backend('threading', n_jobs=-1):
            bayes_search.fit(X_train, y_train, callback=update_tqdm_bar)
        bar.close()
        
        record_performance(bayes_search, X_test, y_test)

        plot_bayes_cv_rmse(bayes_search.cv_results_, model_name.upper(), duqling_func_name)

        savepath = Path("models", model_name, duqling_func_name, f"cv_results.pkl")
        save_cv_results(bayes_search.cv_results_, savepath)

## Visualize Performance Metrics

In [None]:
import xarray as xr

metrics     = ["test_rmse", "sigma", "r_val"]
model_names = list(get_models(1).keys())

arr = np.full((len(metrics), len(model_names), len(univariate_funcs)), np.nan)

for j, model in enumerate(model_names):
    for k, func in enumerate(univariate_funcs):
        pkl = Path('models', model, func, 'cv_results.pkl')
        if not pkl.exists(): # can remove when cube3_rotate gets fixed
            continue
        with pkl.open("rb") as fh:
            data = pickle.load(fh)
        for i, m in enumerate(metrics):
            arr[i, j, k] = data[m]

summary = xr.DataArray(
    arr,
    coords={"metric": metrics, "model": model_names, "function": univariate_funcs},
    dims=["metric", "model", "function"]
)

In [None]:
def metric_df(metric: str) -> pd.DataFrame:
    """Return a model and function DataFrame for a single metric."""
    return summary.sel(metric=metric).to_pandas()

def print_win_rate(df: pd.DataFrame, metric: str, low_wins: bool = True):
    print(f"\n\033[4m{metric}\033[0m")
    print("  \033[1mWin rate\033[0m:")
    idxfunc = df.idxmin if low_wins else df.idxmax
    winners = idxfunc(axis=0)
    for model, count in winners.value_counts().items():
        print(f"    {model}: {count}")

In [None]:
df_rmse = metric_df('test_rmse')
df_r    = metric_df('r_val')
df_std  = metric_df('sigma')

print_win_rate(df_r, "Pearson's R", low_wins=False)
print_win_rate(df_rmse, "Test RMSE")
print_win_rate(df_rmse / df_std, "Test RMSE \u00F7 \u03C3")

In [None]:
cols_to_drop = [
    # 'circuit', 'cantilever_S', 'banana', 'cube3_rotate', 'steel_column',
    'const_fn', 'const_fn3', 'const_fn15', 'cube3_rotate'
]
fig1 = heatmap(df_rmse.drop(cols_to_drop, axis=1), "Test RMSE")
fig2 = heatmap((df_rmse/df_std).drop(cols_to_drop, axis=1), "Test RMSE / \u03C3")
fig3 = heatmap((df_rmse/df_std)[(df_rmse/df_std).drop(cols_to_drop, axis=1)>1].drop(cols_to_drop, axis=1), "(Test RMSE / \u03C3) > 1")

df_filtered = (df_rmse/df_std)[(df_rmse/df_std).drop(cols_to_drop, axis=1)>1].drop(cols_to_drop, axis=1)
drop_cols = df_filtered.columns[df_filtered.isnull().all()]
fig4 = heatmap(df_filtered.drop(drop_cols, axis=1), "(Test RMSE / \u03C3) > 1")

fig1.show()
fig2.show()
fig3.show()
fig4.show()