In [1]:
import sys

# setting path
sys.path.append('..')

import numpy as np
import time

from src.models.bocs.LinReg import LinReg
from src.models.GPr import GPr
from ngboost import NGBRegressor
from ngboost.distns import LogNormal, Normal, Exponential
from ngboost.scores import CRPS, LogScore
from xgboost import XGBRegressor

# Learners
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LassoCV
from sklearn.ensemble import RandomForestRegressor

from src.problems.contamination import Contamination
from src.problems.rna import RNA
from src.problems.bqp import BQP
from src.problems.latin_square import LatinSquare

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

from src.uncertainty_metrics import *

import pandas as pd


  warn(
  warn(


In [2]:
def compute_empirical_coverage(opt, n_exp, test_set_size=0.2, nb=30):
    
    
    names  = ["GPr", "BOCS", "NGBlinCV", "NGBdec"]
        
    X = opt.X
    y = opt.y
    
    exp     = np.array([])
    alg     = np.array([])
    score_m = np.array([])
    rmse_m  = np.array([])
    mae_m   = np.array([])
    q_m     = np.array([])
    eq_m    = np.array([])
    
    for j in range(n_exp):
        
        print("Exp", j)
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_set_size)
        
        m1 = GPr()
        m2 = LinReg(nVars=25, order=2)
        learner = LassoCV(cv=5)
        m3 = NGBRegressor(Base=learner)
        learner = DecisionTreeRegressor(criterion='friedman_mse', max_depth=5)
        m4 = NGBRegressor(Base=learner)

        models = [m1, m2, m3, m4]
        
        for i, m in enumerate(models):
            score, rmse, mae, q, eq = get_results_model(m, X_train, y_train, X_test, y_test, nb)
            
            exp     = np.hstack((exp, j*np.ones(nb)))
            alg     = np.hstack((alg, np.array(nb * [names[i]] )))
            score_m = np.hstack((score_m, score*np.ones(nb)))
            rmse_m  = np.hstack((rmse_m, rmse*np.ones(nb)))
            mae_m   = np.hstack((mae_m, mae*np.ones(nb)))
            q_m     = np.hstack((q_m, q))
            eq_m    = np.hstack((eq_m, eq))
            

    df = pd.DataFrame({"Experiment" : exp,
                       "Algorithm"  : alg,
                       "R2"         : score_m,
                       "RMSE"       : rmse_m,
                       "MAE"        : mae_m,
                       "Quantile"   : q_m,
                       "E-quantile" : eq_m})
    
    return df
        

def get_results_model(m, X_train, y_train, X_test, y_test, nb=30):
   
    m.fit(X_train, y_train)
    y_pred = m.predict(X_test)
    y_pred_d = m.pred_dist(X_test)
    
    # Output R^2, RMSE and MAE on the test set
    score = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    
    scorer = CoveragePlot()
    q, eq = scorer.compute(y_test, y_pred_d, num_bins=nb)
    
    return score, rmse, mae, q, eq
    

# Accuracy

# Contamination problem

In [3]:
N = 50
opt = Contamination(n=N, lamda=0.0001)
df = compute_empirical_coverage(opt, 10, test_set_size=0.2, nb=30)
df.to_csv("acc_CON_ss50.csv", index=False)

Exp 0


  warn(


Attempt Gibbs 1
Iter Gibbs 0
Iter Gibbs 100
Iter Gibbs 200
Iter Gibbs 300
Iter Gibbs 400
Iter Gibbs 500
Iter Gibbs 600
Iter Gibbs 700
Iter Gibbs 800
Iter Gibbs 900
Iter Gibbs 1000
Iter Gibbs 1100
Iter Gibbs 1200
Iter Gibbs 1300
Iter Gibbs 1400
Iter Gibbs 1500
Iter Gibbs 1600
Iter Gibbs 1700
Iter Gibbs 1800
Iter Gibbs 1900
[iter 0] loss=0.8798 val_loss=0.0000 scale=2.0000 norm=1.4328
[iter 100] loss=0.0889 val_loss=0.0000 scale=2.0000 norm=0.9209
[iter 200] loss=0.0113 val_loss=0.0000 scale=2.0000 norm=1.0451
[iter 300] loss=0.0085 val_loss=0.0000 scale=2.0000 norm=1.0952
[iter 400] loss=0.0085 val_loss=0.0000 scale=2.0000 norm=1.1030
[iter 0] loss=0.8798 val_loss=0.0000 scale=2.0000 norm=1.4328
[iter 100] loss=-0.3332 val_loss=0.0000 scale=2.0000 norm=0.9565
[iter 200] loss=-1.3522 val_loss=0.0000 scale=4.0000 norm=1.9846
[iter 300] loss=-3.3519 val_loss=0.0000 scale=4.0000 norm=1.9996
[iter 400] loss=-6.6519 val_loss=0.0000 scale=8.0000 norm=4.0000
Exp 1
Attempt Gibbs 1
Iter Gibbs 0
I

Iter Gibbs 1600
Iter Gibbs 1700
Iter Gibbs 1800
Iter Gibbs 1900
[iter 0] loss=0.9279 val_loss=0.0000 scale=2.0000 norm=1.4474
[iter 100] loss=0.4030 val_loss=0.0000 scale=2.0000 norm=1.0342
[iter 200] loss=0.3825 val_loss=0.0000 scale=2.0000 norm=1.1005
[iter 300] loss=0.3820 val_loss=0.0000 scale=2.0000 norm=1.1165
[iter 400] loss=0.3820 val_loss=0.0000 scale=2.0000 norm=1.1188
== Quitting at iteration / GRAD 471
[iter 0] loss=0.9279 val_loss=0.0000 scale=2.0000 norm=1.4474
[iter 100] loss=-0.2807 val_loss=0.0000 scale=2.0000 norm=0.9594
[iter 200] loss=-1.3408 val_loss=0.0000 scale=4.0000 norm=1.9872
[iter 300] loss=-3.3406 val_loss=0.0000 scale=4.0000 norm=1.9997
[iter 400] loss=-6.8606 val_loss=0.0000 scale=8.0000 norm=4.0000
Exp 9
Attempt Gibbs 1
Iter Gibbs 0
Iter Gibbs 100
Iter Gibbs 200
Iter Gibbs 300
Iter Gibbs 400
Iter Gibbs 500
Iter Gibbs 600
Iter Gibbs 700
Iter Gibbs 800
Iter Gibbs 900
Iter Gibbs 1000
Iter Gibbs 1100
Iter Gibbs 1200
Iter Gibbs 1300
Iter Gibbs 1400
Iter Gibbs

In [4]:
N = 200
opt = Contamination(n=N, lamda=0.0001)

df = compute_empirical_coverage(opt, 10, test_set_size=0.2, nb=30)

df.to_csv("acc_CON_ss200.csv", index=False)

Exp 0
Attempt Gibbs 1
Iter Gibbs 0
Iter Gibbs 100
Iter Gibbs 200
Iter Gibbs 300
Iter Gibbs 400
Iter Gibbs 500
Iter Gibbs 600
Iter Gibbs 700
Iter Gibbs 800
Iter Gibbs 900
Iter Gibbs 1000
Iter Gibbs 1100
Iter Gibbs 1200
Iter Gibbs 1300
Iter Gibbs 1400
Iter Gibbs 1500
Iter Gibbs 1600
Iter Gibbs 1700
Iter Gibbs 1800
Iter Gibbs 1900
[iter 0] loss=0.7873 val_loss=0.0000 scale=2.0000 norm=1.3413
[iter 100] loss=0.2622 val_loss=0.0000 scale=2.0000 norm=0.9626
[iter 200] loss=0.2301 val_loss=0.0000 scale=2.0000 norm=1.0339
[iter 300] loss=0.2294 val_loss=0.0000 scale=2.0000 norm=1.0535
[iter 400] loss=0.2293 val_loss=0.0000 scale=2.0000 norm=1.0564
== Quitting at iteration / GRAD 481
[iter 0] loss=0.7873 val_loss=0.0000 scale=1.0000 norm=0.6706
[iter 100] loss=0.1947 val_loss=0.0000 scale=1.0000 norm=0.4568
[iter 200] loss=-0.6467 val_loss=0.0000 scale=2.0000 norm=0.9006
[iter 300] loss=-1.5797 val_loss=0.0000 scale=2.0000 norm=0.9295
[iter 400] loss=-2.5123 val_loss=0.0000 scale=2.0000 norm=0.

Iter Gibbs 900
Iter Gibbs 1000
Iter Gibbs 1100
Iter Gibbs 1200
Iter Gibbs 1300
Iter Gibbs 1400
Iter Gibbs 1500
Iter Gibbs 1600
Iter Gibbs 1700
Iter Gibbs 1800
Iter Gibbs 1900
[iter 0] loss=0.8313 val_loss=0.0000 scale=2.0000 norm=1.3814
[iter 100] loss=0.2422 val_loss=0.0000 scale=2.0000 norm=0.9844
[iter 200] loss=0.1989 val_loss=0.0000 scale=2.0000 norm=1.0795
[iter 300] loss=0.1976 val_loss=0.0000 scale=2.0000 norm=1.1074
[iter 400] loss=0.1976 val_loss=0.0000 scale=2.0000 norm=1.1115
== Quitting at iteration / GRAD 494
[iter 0] loss=0.8313 val_loss=0.0000 scale=1.0000 norm=0.6907
[iter 100] loss=0.2385 val_loss=0.0000 scale=1.0000 norm=0.4662
[iter 200] loss=-0.5177 val_loss=0.0000 scale=2.0000 norm=0.8993
[iter 300] loss=-1.4378 val_loss=0.0000 scale=2.0000 norm=0.9193
[iter 400] loss=-2.3721 val_loss=0.0000 scale=2.0000 norm=0.9330
Exp 9
Attempt Gibbs 1
Iter Gibbs 0
Iter Gibbs 100
Iter Gibbs 200
Iter Gibbs 300
Iter Gibbs 400
Iter Gibbs 500
Iter Gibbs 600
Iter Gibbs 700
Iter Gibbs

In [5]:
N = 400
opt = Contamination(n=N, lamda=0.0001)

df = compute_empirical_coverage(opt, 10, test_set_size=0.2, nb=30)

df.to_csv("acc_CON_ss400.csv", index=False)

Exp 0
Attempt Gibbs 1
Iter Gibbs 0
Iter Gibbs 100
Iter Gibbs 200
Iter Gibbs 300
Iter Gibbs 400
Iter Gibbs 500
Iter Gibbs 600
Iter Gibbs 700
Iter Gibbs 800
Iter Gibbs 900
Iter Gibbs 1000
Iter Gibbs 1100
Iter Gibbs 1200
Iter Gibbs 1300
Iter Gibbs 1400
Iter Gibbs 1500
Iter Gibbs 1600
Iter Gibbs 1700
Iter Gibbs 1800
Iter Gibbs 1900
[iter 0] loss=0.7643 val_loss=0.0000 scale=2.0000 norm=1.3545
[iter 100] loss=0.1567 val_loss=0.0000 scale=2.0000 norm=0.9565
[iter 200] loss=0.1089 val_loss=0.0000 scale=2.0000 norm=1.0630
[iter 300] loss=0.1074 val_loss=0.0000 scale=2.0000 norm=1.0944
[iter 400] loss=0.1074 val_loss=0.0000 scale=2.0000 norm=1.0991
== Quitting at iteration / GRAD 498
[iter 0] loss=0.7643 val_loss=0.0000 scale=1.0000 norm=0.6773
[iter 100] loss=0.2677 val_loss=0.0000 scale=1.0000 norm=0.4630
[iter 200] loss=-0.1141 val_loss=0.0000 scale=1.0000 norm=0.4220
[iter 300] loss=-0.4878 val_loss=0.0000 scale=1.0000 norm=0.4051
[iter 400] loss=-0.8517 val_loss=0.0000 scale=1.0000 norm=0.

Iter Gibbs 1600
Iter Gibbs 1700
Iter Gibbs 1800
Iter Gibbs 1900
[iter 0] loss=0.7473 val_loss=0.0000 scale=2.0000 norm=1.3433
[iter 100] loss=0.1782 val_loss=0.0000 scale=2.0000 norm=0.9590
[iter 200] loss=0.1368 val_loss=0.0000 scale=2.0000 norm=1.0511
[iter 300] loss=0.1356 val_loss=0.0000 scale=2.0000 norm=1.0782
[iter 400] loss=0.1356 val_loss=0.0000 scale=2.0000 norm=1.0822
== Quitting at iteration / GRAD 492
[iter 0] loss=0.7473 val_loss=0.0000 scale=1.0000 norm=0.6716
[iter 100] loss=0.2464 val_loss=0.0000 scale=1.0000 norm=0.4586
[iter 200] loss=-0.1388 val_loss=0.0000 scale=1.0000 norm=0.4139
[iter 300] loss=-0.5902 val_loss=0.0000 scale=1.0000 norm=0.3924
[iter 400] loss=-0.9540 val_loss=0.0000 scale=1.0000 norm=0.3776
Exp 9
Attempt Gibbs 1
Iter Gibbs 0
Iter Gibbs 100
Iter Gibbs 200
Iter Gibbs 300
Iter Gibbs 400
Iter Gibbs 500
Iter Gibbs 600
Iter Gibbs 700
Iter Gibbs 800
Iter Gibbs 900
Iter Gibbs 1000
Iter Gibbs 1100
Iter Gibbs 1200
Iter Gibbs 1300
Iter Gibbs 1400
Iter Gibbs 

# TRASH - Accuracy More

In [16]:
N = 500
opt = Contamination(n=N, lamda=0.0001)
X = opt.X
y = opt.y
test_set_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_set_size)

## GP

In [42]:
m1 = GPr()
m1.fit(X_train, y_train)
y_pred = m1.predict(X_test)
y_pred_d = m1.pred_dist(X_test)

# Output R^2, RMSE and MAE on the test set
score = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print("\nTest R^2: {:.3f}".format(score))
print("Test RMSE: {:.3f} units".format(rmse))
print("Test MAE: {:.3f} units".format(mae))


Test R^2: 0.754
Test RMSE: 0.227 units
Test MAE: 0.181 units


In [142]:
scorer = CVPPDiagram()
qs, Cqs = scorer.compute(y_test, y_pred_d.mean(), y_pred_d.std(), num_bins=20)
print(qs)
print(Cqs)

[0.         0.05263158 0.10526316 0.15789474 0.21052632 0.26315789
 0.31578947 0.36842105 0.42105263 0.47368421 0.52631579 0.57894737
 0.63157895 0.68421053 0.73684211 0.78947368 0.84210526 0.89473684
 0.94736842 1.        ]
[0.   0.08 0.16 0.26 0.35 0.45 0.51 0.59 0.68 0.76 0.81 0.84 0.85 0.87
 0.94 0.96 0.96 0.98 1.   1.  ]


In [44]:
scorer = AbsoluteMiscalibrationArea()
scorer.compute(y_test, y_pred_d.mean(), y_pred_d.var, num_bins=20)

0.11412973222530007

In [23]:
scorer = CoveragePlot()
scorer.compute(y_test, y_pred_d, num_bins=20)

(array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
        0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
        0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
        0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ]),
 array([0.03, 0.09, 0.15, 0.23, 0.4 , 0.45, 0.51, 0.61, 0.69, 0.74, 0.81,
        0.82, 0.85, 0.91, 0.93, 0.96, 0.96, 0.99, 1.  , 1.  ]))

In [91]:
def compute_intervals(predictions, q):

    low = np.zeros(len(predictions))
    up = np.zeros(len(predictions))

    for i, pred in enumerate(predictions):
        sample = pred.sample(10**3)
        low[i] = np.quantile(sample, (1.0-q)/2.0)
        up[i]  = np.quantile(sample, 1.0 - (1.0-q)/2.0)

    return low, up

def empirical_coverage(y_true, y_pred_d, q):

    low, up = compute_intervals(y_pred_d, q)
    comp = np.logical_and((y_true < up) , (y_true > low))

    return np.mean(comp)

def compute(y_true, y_pred_d, num_bins=10):
    qs = np.linspace(0, 1, num_bins)
    Cqs = np.empty(qs.shape)
    for ix, q in enumerate(qs):
        Cqs[ix] = empirical_coverage(y_true, y_pred_d, q)

    return qs, Cqs


In [89]:
compute(y_test, y_pred_d, num_bins=10)

KeyboardInterrupt: 

In [128]:
empirical_coverage(y_test, y_pred_d, 0.2)

0.33

In [138]:
import scipy.stats

low = np.zeros(len(y_pred_d))
up = np.zeros(len(y_pred_d))
q = 0.5

for i in range(len(y_pred_d)): 

    m = y_pred_d[i].mean()
    st = y_pred_d[i].std()

    low[i] = scipy.stats.norm.ppf( (1 - q)/2, loc=m, scale=st)
    up[i]  = scipy.stats.norm.ppf( 1 - (1 - q)/2, loc=m, scale=st)



In [139]:
np.mean(np.logical_or(y_test < low, y_test > up))

0.22

In [140]:
np.mean(np.logical_and(y_test > low, y_test<up))

0.78

In [84]:
compute_intervals(y_pred_d[:2], 0.9)

(array([-0.07610014,  1.40723512]), array([1.57532338, 2.53376406]))

## BOCS

In [14]:
m2 = LinReg(nVars=25, order=2)
m2.fit(X_train, y_train)
y_pred = m2.predict(X_test)
y_pred_d = m2.pred_dist(X_test)

# Output R^2, RMSE and MAE on the test set
score = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print("\nTest R^2: {:.3f}".format(score))
print("Test RMSE: {:.3f} units".format(rmse))
print("Test MAE: {:.3f} units".format(mae))

Attempt Gibbs 1
Iter Gibbs 0
Iter Gibbs 100
Iter Gibbs 200
Iter Gibbs 300
Iter Gibbs 400
Iter Gibbs 500
Iter Gibbs 600
Iter Gibbs 700
Iter Gibbs 800
Iter Gibbs 900
Iter Gibbs 1000
Iter Gibbs 1100
Iter Gibbs 1200
Iter Gibbs 1300
Iter Gibbs 1400
Iter Gibbs 1500
Iter Gibbs 1600
Iter Gibbs 1700
Iter Gibbs 1800
Iter Gibbs 1900

Test R^2: 0.364
Test RMSE: 0.428 units
Test MAE: 0.366 units


In [15]:
scorer = CoveragePlot()
scorer.compute(y_test, y_pred_d)

(array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
        0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ]),
 array([0. , 0.1, 0.2, 0.2, 0.4, 0.4, 0.6, 0.7, 0.9, 1. ]))

## NGBoost - LinCV

In [26]:
learner = LassoCV(cv=5)
learner = DecisionTreeRegressor(criterion='friedman_mse', max_depth=5)  
m3 = NGBRegressor(Base=learner)
m3.fit(X_train, y_train)

y_pred = m3.predict(X_test)

# Output R^2, RMSE and MAE on the test set
score = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print("\nTest R^2: {:.3f}".format(score))
print("Test RMSE: {:.3f} units".format(rmse))
print("Test MAE: {:.3f} units".format(mae))

[iter 0] loss=0.7373 val_loss=0.0000 scale=1.0000 norm=0.6722
[iter 100] loss=0.2726 val_loss=0.0000 scale=1.0000 norm=0.4678
[iter 200] loss=-0.0798 val_loss=0.0000 scale=1.0000 norm=0.4156
[iter 300] loss=-0.4203 val_loss=0.0000 scale=1.0000 norm=0.3880
[iter 400] loss=-0.7433 val_loss=0.0000 scale=1.0000 norm=0.3668

Test R^2: 0.674
Test RMSE: 0.315 units
Test MAE: 0.255 units


In [27]:
y_pred_d = m3.pred_dist(X_test)
scorer = CVPPDiagram()
qs, Cqs = scorer.compute(y_test, y_pred_d.mean(), y_pred_d.std(), num_bins=20)
print(Cqs)

[0.   0.   0.02 0.03 0.04 0.06 0.07 0.09 0.14 0.17 0.2  0.24 0.24 0.28
 0.31 0.36 0.39 0.45 0.51 1.  ]


In [28]:
scorer = AbsoluteMiscalibrationArea()
scorer.compute(y_test, y_pred_d.mean(), y_pred_d.std(), num_bins=20)

0.286359649122807

In [29]:
scorer = CoveragePlot()
scorer.compute(y_test, y_pred_d)

(array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
        0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ]),
 array([0.  , 0.03, 0.04, 0.08, 0.16, 0.21, 0.26, 0.35, 0.43, 0.81]))

In [24]:
y_pred_d = m3.pred_dist(X_test)

y_pred_d[0].sample(10000).var()

3.5141965434704472e-12

# BQP problem

In [None]:
N = 200
opt = BQP(n=N)
X = opt.X
y = opt.y + 10.0
test_set_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_set_size, random_state=0)

## NGBoost - LinCV

In [None]:
linCV = LassoCV(cv=5)
lin = LinearRegression()
dec = DecisionTreeRegressor(criterion='friedman_mse', max_depth=5)
xgb = XGBRegressor()
m3 = NGBRegressor(Base=lin)
m3 = NGBRegressor(Base=xgb, learning_rate=0.1, verbose_eval=5, n_estimators=30, Dist=Normal, Score=LogScore)

m3.fit(X_train, y_train)

y_pred = m3.predict(X_test)

# Output R^2, RMSE and MAE on the test set
score = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print("\nTest R^2: {:.3f}".format(score))
print("Test RMSE: {:.3f} units".format(rmse))
print("Test MAE: {:.3f} units".format(mae))

In [None]:
import matplotlib.pyplot as plt
plt.scatter(y_test, y_pred)

## GP

In [None]:
m1 = GPr()
m1.fit(X_train, y_train)
y_pred = m1.predict(X_test)

# Output R^2, RMSE and MAE on the test set
score = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print("\nTest R^2: {:.3f}".format(score))
print("Test RMSE: {:.3f} units".format(rmse))
print("Test MAE: {:.3f} units".format(mae))

In [None]:
import matplotlib.pyplot as plt
plt.scatter(y_test, y_pred)

## BOCS

In [None]:
m2 = LinReg(nVars=10, order=2,  nGibbs=500)
m2.fit(X_train, y_train)
y_pred = m2.predict(X_test)

# Output R^2, RMSE and MAE on the test set
score = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print("\nTest R^2: {:.3f}".format(score))
print("Test RMSE: {:.3f} units".format(rmse))
print("Test MAE: {:.3f} units".format(mae))