In [16]:
import numpy as np
import time
import logging
import toolz as fp
import pandas as pd

from sklearn.model_selection import KFold
from datetime import datetime
from scipy import stats

from bayesian_uncertainty.metrics import nlpd, rmse, auc_rmse, auc_rmse_norm
from bayesian_uncertainty.uncertainty_calibration import UncertaintyCalibrator

In [26]:
from bayesian_uncertainty.datasets import make_regression_datasets, concrete
from bayesian_uncertainty.evaluation import eval_dataset_model
from bayesian_uncertainty.shallow_models import LGBMUncertainty

In [12]:
logger = logging.getLogger("bayesian_uncertainty")

In [18]:
datasets = make_regression_datasets(make_year=False, make_flight=False)

In [47]:
lgb = LGBMUncertainty(n_estimators=100, max_depth=6, subsample=0.85)

In [48]:
def eval_dataset_model(dataset, model, calibrate_uncertainty=False, uncertainty_calibrator_cv=None):
    X, y, splits = dataset()

    cv_metrics = []

    print(dataset.__name__)
    
    if type(X) is pd.DataFrame:
        X = X.values
        y = y.values
        
    if calibrate_uncertainty:
        if uncertainty_calibrator_cv is None:
            uncertainty_calibrator_cv = KFold(n_splits=5)
        model = UncertaintyCalibrator(model, uncertainty_calibrator_cv)

    for train_index, test_index in splits:
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        start = time.time()
        model.fit(X_train, y_train)
        train_time = time.time() - start

        start = time.time()
        pred_mean, pred_std = model.predict(X_test)
        test_time = time.time() - start

        cv_metrics.append({
            "train_time": train_time,
            "test_time": test_time,
            "nlpd": nlpd(y_test, pred_mean, pred_std),
            "rmse": rmse(y_test, pred_mean),
            "auc_rmse": auc_rmse(y_test, pred_mean, pred_std),
            "auc_rmse_norm": auc_rmse_norm(y_test, pred_mean, pred_std)
        })

    metrics_df = pd.DataFrame(cv_metrics)
    
    metrics_mean = metrics_df.mean(axis=0)
    metrics_stderr = metrics_df.sem(axis=0)

    results = {
        "current_time": str(datetime.now()),
        "dataset": dataset.__name__,
        "model": type(model).__name__,
        "shape": X.shape,
        "metrics_df": metrics_df.to_dict(),
        "metrics_mean": metrics_mean.to_dict(),
        "metrics_stderr": metrics_stderr.to_dict()
    }

    print(fp.dissoc(results, "metrics_df"))

    return results

In [49]:
results = []
for d in datasets:
    results.append(eval_dataset_model(d, lgb))

boston
{'current_time': '2018-09-16 01:11:15.122984', 'dataset': 'boston', 'model': 'LGBMUncertainty', 'shape': (506, 13), 'metrics_mean': {'auc_rmse': 2.6376590054867592, 'auc_rmse_norm': 0.8392294773525956, 'nlpd': 2.7772280422453854, 'rmse': 3.217213046460972, 'test_time': 0.013479268550872803, 'train_time': 0.08499497771263123}, 'metrics_stderr': {'auc_rmse': 0.10016953649050223, 'auc_rmse_norm': 0.0267455481591808, 'nlpd': 0.08090836813411836, 'rmse': 0.12429380350363005, 'test_time': 2.925744127508729e-05, 'train_time': 0.001028124395576544}}
concrete
{'current_time': '2018-09-16 01:11:20.264351', 'dataset': 'concrete', 'model': 'LGBMUncertainty', 'shape': (1030, 8), 'metrics_mean': {'auc_rmse': 3.5131147910565126, 'auc_rmse_norm': 0.7960911712737291, 'nlpd': 2.9581663555915494, 'rmse': 4.440301187276676, 'test_time': 0.026188218593597413, 'train_time': 0.09676451086997986}, 'metrics_stderr': {'auc_rmse': 0.09297123721889518, 'auc_rmse_norm': 0.01645299749096413, 'nlpd': 0.036651

In [52]:
str(datetime.now().strftime("%Y_%m_%d_%H_%M"))

'2018_09_16_20_15'

In [54]:
str(LGBMUncertainty(n_estimators=100, max_depth=3, learning_rate=0.1, subsample=0.85))


'LGBMUncertainty()'

In [56]:
gb_models = [
    LGBMUncertainty(n_estimators=100, max_depth=3, learning_rate=0.1, subsample=0.85)
]

In [57]:
gb_models

[LGBMUncertainty()]

In [59]:
import inspect
lines = inspect.getsource(gb_models)

TypeError: [LGBMUncertainty()] is not a module, class, method, function, traceback, frame, or code object

In [60]:
from sklearn.ensemble import RandomForestClassifier

In [84]:
from sklearn.base import BaseEstimator, RegressorMixin


In [115]:
class LGBMUncertainty(BaseEstimator, RegressorMixin):

    def __init__(self, n_estimators, **kwargs):
        self.lgb = LGBMRegressor(**kwargs)
        self.n_estimators = n_estimators
        
    def get_params(self, deep=True):
        return self.lgb.get_params()
    
    def set_params(self, **params):
        if "a" in params:
            print(params["a"])
        self.lgb.set_params(**params)
        return self

In [118]:
b=LGBMUncertainty(n_estimators=666).set_params(learning_rate=0.123, a=2).get_params()

2


In [121]:
b

{'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'learning_rate': 0.123,
 'max_depth': -1,
 'min_child_samples': 20,
 'min_child_weight': 0.001,
 'min_split_gain': 0.0,
 'n_estimators': 100,
 'n_jobs': -1,
 'num_leaves': 31,
 'objective': None,
 'random_state': None,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'silent': True,
 'subsample': 1.0,
 'subsample_for_bin': 200000,
 'subsample_freq': 0,
 'a': 2}

In [3]:
names = ["r"]
visited = {}
[name for name in names if not visited[name]]

KeyError: 'r'

In [7]:
import numpy as np
np.exp(-9)

0.00012340980408667956