In [6]:
import numpy as np

# Normalize the data.
from sklearn import preprocessing
from numpy.linalg import cholesky, det, lstsq
from scipy.optimize import minimize
import scipy.spatial.distance as spdist

def pass_arg(nsim, tr_size):

    # Compute the RMSE
    def root_mean_squared_error(y_true, y_pred):
        return np.sqrt(np.mean((y_pred-y_true)**2))
    
    print("tr_Size:",tr_size)
    # Load labeled data
    data = np.loadtxt('../data/labeled_data.dat')
    x_labeled = data[:, :2].astype(np.float64) # -2 because we do not need porosity predictions
    y_labeled = data[:, -2:-1].astype(np.float64) # dimensionless bond length and porosity measurements

    # normalize dataset with MinMaxScaler
    scaler = preprocessing.MinMaxScaler(feature_range=(0.0, 1.0))
    x_labeled = scaler.fit_transform(x_labeled)
    # y_labeled = scaler.fit_transform(y_labeled)

    tr_size = int(tr_size)

    # train and test data
    trainX, trainY = x_labeled[:tr_size,:], y_labeled[:tr_size]
    testX, testY = x_labeled[30:,:], y_labeled[30:]
    
    def covSEard(hyp=None, x=None, z=None):
        ''' Squared Exponential covariance function with Automatic Relevance Detemination
         (ARD) distance measure. The covariance function is parameterized as:

         k(x^p,x^q) = sf2 * exp(-(x^p - x^q)' * inv(P) * (x^p - x^q)/2)

         where the P matrix is diagonal with ARD parameters ell_1^2,...,ell_D^2, where
         D is the dimension of the input space and sf2 is the signal variance.

         The hyperparameters are:

         hyp = [ log(ell_1)
                 log(ell_2)
                 ...
                 log(ell_D)
                 log(sqrt(sf2)) ]
        '''

        [n, D] = x.shape
        ell = 1/np.array(hyp[0:D])        # characteristic length scale
        
        
        sf2 = np.array(hyp[D])**2         # signal variance
        tmp = np.dot(np.diag(ell),x.T).T
        A = spdist.cdist(np.dot(np.diag(ell),x.T).T, np.dot(np.diag(ell),z.T).T, 'sqeuclidean') # cross covariances

        A = sf2*np.exp(-0.5*A)  

        return A


    def posterior_predictive(X_s, X_train, Y_train, l1=.1, l2=.1, sigma_f=.1, sigma_y=1e-5):
        '''  
        Computes the suffifient statistics of the GP posterior predictive distribution 
        from m training data X_train and Y_train and n new inputs X_s.

        Args:
            X_s: New input locations (n x d).
            X_train: Training locations (m x d).
            Y_train: Training targets (m x 1).
            l: Kernel length parameter.
            sigma_f: Kernel vertical variation parameter.
            sigma_y: Noise parameter.

        Returns:
            Posterior mean vector (n x d) and covariance matrix (n x n).
        '''


        K = covSEard(hyp=[l1,l2,sigma_f], x=X_train, z=X_train) + sigma_y**2 * np.eye(len(X_train))
        K_s = covSEard(hyp=[l1,l2,sigma_f], x=X_train, z=X_s)
        K_ss = covSEard(hyp=[l1,l2,sigma_f], x=X_s, z=X_s)  + 1e-8 * np.eye(len(X_s))
#         K_inv = inv(K)
        K_inv = np.linalg.pinv(K)
    
        # Equation (4)
        mu_s = K_s.T.dot(K_inv).dot(Y_train)

        # Equation (5)
        cov_s = K_ss - K_s.T.dot(K_inv).dot(K_s)
        
        return mu_s, cov_s


    def nll_fn(X_train, Y_train, noise=0, naive=False):
        '''
        Returns a function that computes the negative log marginal
        likelihood for training data X_train and Y_train and given 
        noise level.

        Args:
            X_train: training locations (m x d).
            Y_train: training targets (m x 1).
            noise: known noise level of Y_train.
            naive: if True use a naive implementation of Eq. (7), if 
                   False use a numerically more stable implementation. 

        Returns:
            Minimization objective.
        '''

        def nll_stable(theta):
            # Numerically more stable implementation of Eq. (7) as described
            # in http://www.gaussianprocess.org/gpml/chapters/RW2.pdf, Section
            # 2.2, Algorithm 2.1.
            K = covSEard(hyp=[theta[0],theta[1],theta[2]], x=X_train, z=X_train) + \
                theta[3]**2 * np.eye(len(X_train))
            
            K += 1e-6 * np.eye(*K.shape)
            L = cholesky(K)
            return np.sum(np.log(np.diagonal(L))) + \
                   0.5 * Y_train.T.dot(lstsq(L.T, lstsq(L, Y_train)[0])[0]) + \
                   0.5 * len(X_train) * np.log(2*np.pi)

        if naive:
            return nll_naive
        else:
            return nll_stable

    
    # Optimization
    res = minimize(nll_fn(trainX, trainY), x0 = [.1, .1, .1, 1e-3], 
                   bounds=((1e-5, None), (1e-5, None), (1e-5, None), (1e-7, None)),
                    method='L-BFGS-B')
    
#     print(f'After parameter optimization: l1={res.x[0]:.5f} l2={res.x[1]:.5f} sigma_f={res.x[2]:.5f}')
#     print(np.exp(res.x[0]),np.exp(res.x[1]), np.exp(res.x[2]))
    mu_s, cov_s = posterior_predictive(testX, trainX, trainY, *res.x)
    
    RMSE = []
    for ii in range(int(nsim)):
        samples = np.random.multivariate_normal(mu_s.ravel(), cov_s, 1)
        RMSE.append(root_mean_squared_error(testY, samples))
        
        print("RMSE:", root_mean_squared_error(testY, samples))

#     return samples, RMSE
    return RMSE

In [7]:
mean_rmses=[]
std_rmses=[]
for ii in ([5,10,15,20,30]):
    test_rmse = pass_arg(50, ii)
    mean_rmse = np.mean(test_rmse)
    std_rmse = np.std(test_rmse)
    mean_rmses.append(mean_rmse)
    std_rmses.append(std_rmse)

tr_Size: 5




RMSE: 0.027027905901676058
RMSE: 0.01714394393147273
RMSE: 0.018545586619206814
RMSE: 0.026281620536819317
RMSE: 0.025066555641211057
RMSE: 0.014465376641390918
RMSE: 0.018769846549247193
RMSE: 0.014526540341766899
RMSE: 0.016365160727556394
RMSE: 0.016511860479849906
RMSE: 0.027987180153448896
RMSE: 0.018958127624924662
RMSE: 0.014730910939789364
RMSE: 0.018437527328018635
RMSE: 0.02318305687190542
RMSE: 0.026344662196104568
RMSE: 0.023858438584830617
RMSE: 0.01884229440746558
RMSE: 0.02437960781678391
RMSE: 0.01803862394141083
RMSE: 0.02531082362141573
RMSE: 0.021258782124905964
RMSE: 0.015511124577019121
RMSE: 0.01570715995258283
RMSE: 0.01668747656103418
RMSE: 0.018590139739929908
RMSE: 0.01783084296255611
RMSE: 0.020174365204823432
RMSE: 0.024397798088720193
RMSE: 0.01850981121813804
RMSE: 0.01698690115194551
RMSE: 0.01675384917754963
RMSE: 0.017792831295161696
RMSE: 0.0179966263151936
RMSE: 0.014531580018729437
RMSE: 0.019609596880988577
RMSE: 0.025140387281164363
RMSE: 0.0168600



RMSE: 0.03227088308838807
RMSE: 0.02133180417634371
RMSE: 0.018913413770027834
RMSE: 0.030405486445009264
RMSE: 0.02235869523776532
RMSE: 0.03824211489353301
RMSE: 0.024956249777480552
RMSE: 0.02984183520811936
RMSE: 0.027122073103034387
RMSE: 0.02229559545815276
RMSE: 0.024493398678648238
RMSE: 0.04001555964434458
RMSE: 0.023897530012853208
RMSE: 0.02629964105718791
RMSE: 0.027943782111428936
RMSE: 0.0263037619797892
RMSE: 0.020697116056345593
RMSE: 0.020478312632853473
RMSE: 0.025183182676376266
RMSE: 0.03488966899576814
RMSE: 0.03576241716274692
RMSE: 0.025809197715454195
RMSE: 0.027308596868947474
RMSE: 0.026241134852645658
RMSE: 0.023279153123987043
RMSE: 0.027597441154120488
RMSE: 0.03723843030490467
RMSE: 0.024396961212250403
RMSE: 0.0280481701816023
RMSE: 0.026748386733876854
RMSE: 0.025160669045666526
RMSE: 0.028467517987904627
RMSE: 0.025134615369947787
RMSE: 0.024885451550663032
RMSE: 0.030954349471669796
RMSE: 0.028012328081711998
RMSE: 0.024887386622501756
RMSE: 0.02909631



RMSE: 0.01999862749495391
RMSE: 0.020491960875719382
RMSE: 0.019202944881323188
RMSE: 0.020296122773223173
RMSE: 0.01938416757152528
RMSE: 0.02075265807153201
RMSE: 0.020187516161089013
RMSE: 0.020264117032933238
RMSE: 0.01920952794989154
RMSE: 0.02039569026944138
RMSE: 0.020964649666218693
RMSE: 0.02226380774969205
RMSE: 0.020342440148583753
RMSE: 0.022529368447364116
RMSE: 0.022041328655954828
RMSE: 0.020979564789495763
RMSE: 0.0201535273499614
RMSE: 0.021004962882560332
RMSE: 0.021004776439301775
RMSE: 0.020198295957516266
RMSE: 0.01879535075504482
RMSE: 0.01837057407131159
RMSE: 0.019531245214521233
RMSE: 0.021729067646561497
RMSE: 0.020747687110352266
RMSE: 0.019195461046746198
RMSE: 0.022388398207899573
RMSE: 0.02357938865605702
RMSE: 0.023547658796892145
RMSE: 0.020636281453993235
RMSE: 0.021382951024947413
RMSE: 0.022102507076600678
RMSE: 0.01952984418752677
RMSE: 0.024052823956076542
RMSE: 0.018707351180565737
RMSE: 0.0211352833846946
RMSE: 0.01944673005891606
RMSE: 0.02242617



RMSE: 0.020910482978315322
RMSE: 0.018977195873950654
RMSE: 0.018366764714599408
RMSE: 0.020163062635177
RMSE: 0.020149532464518557
RMSE: 0.02022074231543613
RMSE: 0.02081351214209011
RMSE: 0.020057549818527188
RMSE: 0.020840944907328934
RMSE: 0.021122290941176066
RMSE: 0.019702462433910946
RMSE: 0.028095144408096993
RMSE: 0.01995904415610856
RMSE: 0.01956207838312998
RMSE: 0.020865922344448173
RMSE: 0.017160756055775772
RMSE: 0.01957828621409918
RMSE: 0.02075798422178368
RMSE: 0.02195502413998216
RMSE: 0.02851931234584972
RMSE: 0.02094272341420805
RMSE: 0.022478933643817832
RMSE: 0.023542850545723
RMSE: 0.02018750313184136
RMSE: 0.02308758580492878
RMSE: 0.02364681357582168
RMSE: 0.02147952660446386
RMSE: 0.024328254021404226
RMSE: 0.020375608792353287
RMSE: 0.026045690741460646
RMSE: 0.020012181341121323
RMSE: 0.01860220214890335
RMSE: 0.02274005018592045
RMSE: 0.019376319928583453
RMSE: 0.02236202076259398
RMSE: 0.022692670315440266
RMSE: 0.019885893153351958
RMSE: 0.021022747927306



RMSE: 0.0201648547292034
RMSE: 0.020604764218976432
RMSE: 0.021426451107107895
RMSE: 0.01835456384587132
RMSE: 0.019900471171553982
RMSE: 0.019450823626707477
RMSE: 0.021155971165086586
RMSE: 0.02089063626834277
RMSE: 0.02045450001062229
RMSE: 0.02238353102478848
RMSE: 0.01940303581187608
RMSE: 0.020266158298408728
RMSE: 0.020268972556636823
RMSE: 0.019014745829929076
RMSE: 0.02072353910581799
RMSE: 0.019276093902063324
RMSE: 0.018478313893957602
RMSE: 0.020365201056754198
RMSE: 0.020658346792378313
RMSE: 0.020286956995121553
RMSE: 0.019124536700807267
RMSE: 0.019167671701481627
RMSE: 0.020769374896299306
RMSE: 0.019144592241191537
RMSE: 0.022150920992936026
RMSE: 0.02054584345806228
RMSE: 0.020660877803744013
RMSE: 0.02053572494428795
RMSE: 0.020753557024939646
RMSE: 0.01788476065668469
RMSE: 0.02031066215268216
RMSE: 0.0180626441010984
RMSE: 0.018905675307131765
RMSE: 0.017990957823301144
RMSE: 0.01952103912726786
RMSE: 0.019280905469486204
RMSE: 0.018954394367180417
RMSE: 0.02014029

In [8]:
mean_rmses

[0.01986586605177601,
 0.026897626171632667,
 0.020921003864365052,
 0.021282411381359514,
 0.019950627477809842]

In [9]:
std_rmses

[0.003690854091086057,
 0.004739962702273939,
 0.0014428756784224052,
 0.0021704610408837953,
 0.001011121563293683]

In [22]:
Xx = np.random.uniform(size=(1, 2))
ss, rmse = pass_arg(Xx, 100, 30)

tr_Size: 30




RMSE: 0.020927216653475734
RMSE: 0.018441895119178184
RMSE: 0.019271874735847325
RMSE: 0.019980323115343465
RMSE: 0.020794386542645333
RMSE: 0.019890442340386325
RMSE: 0.020534284069476876
RMSE: 0.018888610532204704
RMSE: 0.021266036277031938
RMSE: 0.020651745011600157
RMSE: 0.0205613334349761
RMSE: 0.01910176355986001
RMSE: 0.018871909291490303
RMSE: 0.01986569859217515
RMSE: 0.01967895101831376
RMSE: 0.01872113803789104
RMSE: 0.02077159112523238
RMSE: 0.019381342147298242
RMSE: 0.01987038397350114
RMSE: 0.018772186965917
RMSE: 0.019157390766193425
RMSE: 0.0205449178007946
RMSE: 0.022111427056225763
RMSE: 0.019713421343422424
RMSE: 0.01840591548761856
RMSE: 0.01979945357780922
RMSE: 0.02031470664153936
RMSE: 0.018441229878817803
RMSE: 0.021041937567796595
RMSE: 0.020289436292011652
RMSE: 0.020731274708010332
RMSE: 0.01906711227001113
RMSE: 0.020848201042930073
RMSE: 0.02098213361843857
RMSE: 0.017446400055922972
RMSE: 0.020488030857892546
RMSE: 0.021296020927538708
RMSE: 0.01929681131

In [23]:
np.mean(rmse)

0.019855815085648133

In [None]:
Xx = np.random.uniform(size=(3, 2))
ss = pass_arg(Xx, 1, 30)
# print(ss)