In [1]:
%load_ext autoreload
%matplotlib inline

In [28]:
from sklearn.model_selection import cross_val_predict, ShuffleSplit, KFold, RepeatedKFold

In [23]:
%autoreload 2
from datasets import make_datasets

In [24]:
datasets = make_datasets()

In [16]:
%autoreload 2
from metrics import normal_nll, rmse, mae, auc_rmse, auc_mae

In [None]:
%autoreload 2
from shallow_models import LinearRegression, BayesianLinearRegression, GBTQuantile, XGBaseline, XGBLogLikelihood
models = LinearRegression, BayesianLinearRegression, GBTQuantile, XGBaseline, XGBLogLikelihood

In [None]:
results = []

Results = namedtuple('Results', 'dataset model shape normal_nll rmse mae auc_rmse auc_mae')

for d, (X,y) in datasets.items():
    if d == 'year':
        cv = ShuffleSplit(1, test_size=0.1)
    elif d == 'protein':
        cv = KFold(n_splits=10)
    elif d.startswith('make'):
        cv = KFold(n_splits=2)
    else:
        cv = RepeatedKFold(n_splits=10, n_repeats=1)
    
    for m in models:
        #pred_mean, pred_std = cross_val_predict(m, X, y, cv=cv)
        reg = m()
        reg.fit(X, y)
        pred_mean, pred_std = reg.predict(X)
        r = Results(
                d, 
                type(m).__name__,
                X.shape,
                normal_nll(y, pred_mean, pred_std),
                rmse(y, pred_mean),
                mae(y, pred_mean),
                auc_rmse(y, pred_mean, pred_std),
                auc_mae(y, pred_mean, pred_std)
            )
        print(r)
        results.append(r)

Results(dataset='boston', model='type', shape=(506, 13), normal_nll=2.96213114625243, rmse=4.679506300635516, mae=3.272944637996936, auc_rmse=4.506588688366445, auc_mae=3.9287349258629796)
Results(dataset='boston', model='type', shape=(506, 13), normal_nll=3.0617501684814825, rmse=4.764112951314464, mae=3.319204466868282, auc_rmse=5.207513332432892, auc_mae=3.586023623364604)
Results(dataset='boston', model='type', shape=(506, 13), normal_nll=2.5514602388358814, rmse=2.8478705778310087, mae=1.4400138963886413, auc_rmse=1.4944086690039522, auc_mae=0.9160432296240739)
Results(dataset='boston', model='type', shape=(506, 13), normal_nll=1.833344382336918, rmse=1.5134712432474189, mae=1.1707054096719494, auc_rmse=1.7435924282794146, auc_mae=1.5128116537331093)
Results(dataset='boston', model='type', shape=(506, 13), normal_nll=1.8738467966956123, rmse=1.5134712432474189, mae=1.1707054096719494, auc_rmse=1.1445712778921473, auc_mae=0.9594783307919768)
Results(dataset='concrete', model='type'