In [1]:
import numpy as np
import numpy.polynomial as P
import scipy as sp
from matplotlib import pyplot as plt
from tqdm import tqdm
#from sklearn.preprocessing import PolynomialFeatures
from multiprocessing import Pool
import multiprocessing
import ZVnbrosse
from potentials import GaussPotential,GaussMixture,GausMixtureIdent,GausMixtureSame,potentialRegression
from samplers import MCMC_sampler,Generate_train,ULA_light
from baselines import set_function,construct_ESVM_kernel,GenerateSigma,standartize
from martingale import approx_q,test_traj
from optimize import Run_eval_test,optimize_parallel_new 
from utils import *
import pandas as pd
import copy

In [2]:
dataset = "swiss" # Switch between "eeg" and "susy" 
intercept = False # Do we include the intercept
degree = 1 #order of CV's: 1 or 2 expected
typ = "logistic" #logistic or probit are expected

sampler = {"sampler":"ULA","burn_type":"full","main_type":"full"} # Sampling method

# Switch between "posterior_prob_point", "posterior_prob_mean", "posterior_prob_variance", "posterior_mean"
if typ == "logistic":
    f_type = "sum"
elif typ == "probit":
    f_type = "posterior_prob_mean_probit"

### Loading data

In [3]:
if (dataset == "swiss"):
    data = pd.read_csv("./data/swiss.csv",header=None)
    outliers_inds = np.array([])
    Y = data.iloc[:,-1]
    X = data.iloc[:,:-1]
elif (dataset == "eeg"):   
    data = pd.read_csv("../iZAV_code/data/eeg.csv",header=None)
    outliers_inds = np.array([13179,11509,898,10386])
    Y = data.iloc[:,-1]
    X = data.iloc[:,:-1]
elif (dataset == "pima"):
    data = pd.read_csv("../iZAV_code/data/pima.csv",header=None)
    outliers_inds = np.array([])
    Y = data.iloc[:,-1]
    X = data.iloc[:,:-1]
elif (dataset == "susy"): 
    data = pd.read_csv("data/susy.csv",header=None)
    outliers_inds = np.array([267630])
    Y = data.iloc[:,0]
    X = data.iloc[:,1:]

### Preprocessing data

In [4]:
# Removing the outliers
if (outliers_inds.size!=0):
    X_processed = np.delete(np.asarray(X),outliers_inds,0)
    mask = np.ones(len(Y),dtype = bool)
    mask[outliers_inds] = False
    Y_processed = Y[mask]
    Y_processed = np.asarray(Y_processed)
    X_processed = np.asarray(X_processed)
else:
    Y_processed = np.asarray(Y)
    X_processed = np.asarray(X)

In [5]:
#X_train,X_train = standartize(X_processed,X_processed,intercept=intercept)
#Y_train = Y_processed

In [6]:
X_train = copy.deepcopy(X_processed)
Y_train = copy.deepcopy(Y_processed)
#poor man's normalization
X_train = np.dot(X_train - np.mean(X_train, axis=0), np.diag(1./np.std(X_train, axis=0)))

In [7]:
print(X_train)
print(Y_train)

[[-0.25558323  2.43945206  2.83704322 -0.28973195]
 [-0.78804829 -1.17043716 -0.63638092 -0.91430383]
 [-0.25558323 -1.17043716 -0.63638092 -0.49792258]
 [-0.25558323 -1.17043716 -0.88448265 -1.33068508]
 [ 0.27688183 -1.44812094 -0.63638092  0.6818243 ]
 [ 2.14050953  1.88408449  1.34843287 -0.28973195]
 [ 1.60804448 -1.72580473 -0.63638092 -1.05309758]
 [-1.05428082 -1.44812094 -1.87688955 -1.5388757 ]
 [ 0.0106493  -2.00348852 -0.63638092 -0.84490695]
 [ 0.80934689  0.77334934  0.85222942 -0.1509382 ]
 [ 1.07557942  0.77334934  0.85222942 -1.05309758]
 [ 0.54311436 -1.72580473 -0.88448265 -1.19189133]
 [ 0.80934689  1.88408449 -0.88448265 -1.05309758]
 [-0.52181576 -1.17043716 -0.63638092 -1.19189133]
 [ 0.54311436 -0.61506959 -0.63638092 -1.19189133]
 [-1.05428082 -0.89275337 -0.3882792  -0.08154133]
 [-0.78804829 -0.61506959  0.35602597 -0.84490695]
 [ 0.27688183 -0.61506959 -0.63638092 -0.28973195]
 [ 0.80934689 -1.44812094 -0.88448265 -1.40008195]
 [-0.52181576  0.21798177 -0.14

### Creating potential

In [9]:
# Creating potential
Cur_pot = potentialRegression(Y_train, X_train, typ, print_info = True)
d = Cur_pot.d
print(d)

4


### Data generation

In [10]:
N_burn = 1*10**3 # Burn in period
N_train = 5*10**4 # Number of samples on which we optimize
N_test = 1*10**3 # Number of samples
step = 0.1 # Step size
n_traj_train = 5
n_traj_test = 24 # Number of independent MCMC trajectories for test
#f_type = "sum"
K_max = 2 #max degree of Hermite polynomial
S_max = 2 #max degree of polynomial during regression stage
lag = 20 #maximal lag order
b_n_train = 10 #lag-window size
b_n_test = int(np.round(N_test**(0.33)))
print(b_n_test)
degree = 2
x0 = np.zeros(d, dtype = float)
fixed_start = True

10


### Generate data

In [11]:
r_seed = 777
traj = np.zeros((n_traj_train,N_train,d),dtype = float)
for i in range(n_traj_train):
    cur_traj = ULA_light(r_seed+i,Cur_pot,step, N_burn, N_train, d, return_noise = False, x0 = x0, fixed_start = fixed_start)
    traj[i] = copy.deepcopy(cur_traj)
print(traj.shape)
#traj = np.expand_dims(traj, axis=0)
#rint(traj.shape)

(5, 50000, 4)


In [12]:
inds_arr = np.array([0]) # Taking the second index (not intercept)
params = None    
f_vals = set_function(f_type,traj,inds_arr,params) 
#f_vals = traj[:,:,0]
#f_vals = np.expand_dims(f_vals, axis=2)
print(f_vals.shape)

(5, 50000, 1)


### Evaluate baselines (EVM and ESVM methods)

In [13]:
if sampler["sampler"] == "ULA":
    res = Generate_train(1, sampler, Cur_pot, step, N_burn, N_train, d)
    res = np.asarray(res)
    traj_evm,traj_grad = res[:,0,:,:],res[:,1,:,:]
else:
    raise "You should use ULA!"

ncores =  12


In [14]:
print(traj.shape)
inds_arr = np.array([0])#Taking the second index
params = None
f_vals_evm = set_function(f_type,traj_evm,inds_arr,params)

(5, 50000, 4)


In [15]:
print(f_vals_evm)
print(f_vals)

[[[5.86418853]
  [1.61118385]
  [8.07010493]
  ...
  [7.16677461]
  [4.44797531]
  [6.19278468]]]
[[[ 0.        ]
  [15.75033592]
  [12.71266701]
  ...
  [ 7.16677461]
  [ 4.44797531]
  [ 6.19278468]]

 [[ 0.        ]
  [18.00881206]
  [14.61843094]
  ...
  [ 4.89078705]
  [ 3.0732205 ]
  [ 5.72044547]]

 [[ 0.        ]
  [16.61485678]
  [12.8697776 ]
  ...
  [ 4.3657963 ]
  [ 1.55385609]
  [ 9.25187056]]

 [[ 0.        ]
  [16.61055218]
  [13.9851717 ]
  ...
  [ 4.67871897]
  [ 5.8192349 ]
  [ 7.89063468]]

 [[ 0.        ]
  [15.94480239]
  [11.9729956 ]
  ...
  [ 3.64900268]
  [ 2.64915295]
  [ 7.76775168]]]


In [16]:
W_train_spec = construct_ESVM_kernel(N_train,b_n_train) #weight matrix for train
W_test_spec = construct_ESVM_kernel(N_test,b_n_test) #weight matrix for test
opt_structure_train = {
    "W":W_train_spec,
    "n_restarts": 3, # Number of restarts during optimization,
    "sigma": 1.0, # Deviation of starting points
    "tol": 1e-5, # Tolerance (for the norm of gradient)
    "alpha": 0.0, # Ridge penalty for 2nd order control functionals
    "beta": 10000.0 # smoothing parameter in the softmax
}
methods = ["ESVM","EVM"]

In [17]:
coef_dict = optimize_parallel_new(degree,inds_arr,f_vals_evm,traj_evm,traj_grad,opt_structure_train,methods)
print(coef_dict)

2 degree optimization terminated succesfully
Jacobian matrix at termination: 
[ 7.95308328e-09  1.38607916e-08  3.67231255e-09  1.14090530e-09
 -2.57578029e-08 -3.54702211e-09  2.76165481e-08  3.77193953e-08
 -3.54702211e-09 -3.36968217e-09  2.10648457e-08  1.47192501e-08
  2.76165481e-08  2.10648457e-08  3.06880775e-08 -5.38894609e-10
  3.77193953e-08  1.47192501e-08 -5.38894609e-10 -1.70323040e-08]
2 degree optimization terminated succesfully
Jacobian matrix at termination: 
[-3.06460437e-08  1.31345100e-08  1.41270642e-08  3.47587115e-08
 -1.62606039e-07 -1.78902916e-07 -1.45801285e-07 -1.11254553e-08
 -1.78902916e-07 -1.67636199e-07 -1.16133364e-07  6.33199611e-08
 -1.45801285e-07 -1.16133364e-07 -8.00510431e-08  8.54476185e-08
 -1.11254553e-08  6.33199611e-08  8.54476185e-08  1.87343382e-07]
{'ESVM': array([[ 0.07367765, -0.04268702, -0.06515394, -0.00942727,  0.02130477,
        -0.00202012,  0.00161007, -0.00151837, -0.00202012,  0.03007818,
         0.00161021,  0.01279422,  0.

In [18]:
#Create a dictionary and put respective matrices into it
test_params = {
    "W":W_test_spec,
    "step":step,
    "burn_in":N_burn,
    "n_test":N_test,
    "dim":d
}

nbcores = multiprocessing.cpu_count()
trav = Pool(nbcores)
res = trav.starmap(Run_eval_test, [(i,degree,sampler,methods,inds_arr,Cur_pot,test_params,coef_dict,params,f_type) for i in range (n_traj_test)])
trav.close()

In [19]:
methods_enh = ['Vanilla'] + methods
print(methods_enh)
ints_result = {key: [] for key in methods_enh}
vars_result = {key: [] for key in methods_enh}

['Vanilla', 'ESVM', 'EVM']


In [20]:
for i in range(len(res)):
    for j in range(len(methods_enh)):
        ints_result[methods_enh[j]].append(res[i][0][methods_enh[j]][0])
        vars_result[methods_enh[j]].append(res[i][1][methods_enh[j]][0])
for key in methods_enh:
    ints_result[key] = np.asarray(ints_result[key])
    vars_result[key] = np.asarray(vars_result[key])

### Bernoulli:: Optimize coefficients by solving regression with polynomial features

In [21]:
#polynomial coefficients
coefs_poly = approx_q(traj,f_vals,n_traj_train,lag,S_max)
print(coefs_poly.shape)

dimension =  50000
(250000, 15)
(249995, 15)
(249990, 15)
(249985, 15)
(249980, 15)
(249975, 15)
(249970, 15)
(249965, 15)
(249960, 15)
(249955, 15)
(249950, 15)
(249945, 15)
(249940, 15)
(249935, 15)
(249930, 15)
(249925, 15)
(249920, 15)
(249915, 15)
(249910, 15)
(249905, 15)
(20, 15)


In [None]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(S_max)
X_features = poly.fit_transform(traj[0])
poly_1 = PolynomialFeatures(1)
X_features_1 = poly_1.fit_transform(traj[0])

In [22]:
print(X_features.shape)

NameError: name 'X_features' is not defined

Test our regressors

In [None]:
cur_lag = 1
N_pts = 100
plt.figure(figsize=(10, 10))
plt.title("Testing regression model",fontsize=20)
plt.plot(f_vals[0,cur_lag:N_pts,0],color='r',label='true function')
plt.plot(X_features[:(N_pts-cur_lag),:] @ coefs_poly[cur_lag,:],color='g',label = '2-order approximation')
plt.plot(X_features_1[:(N_pts-cur_lag),:] @ coefs_poly_1[cur_lag,:],color='b',label = '1-order approximation')
plt.legend(loc = 'lower right',fontsize = 16)
plt.show()

In [None]:
print("4-nd order error: ",np.linalg.norm(f_vals[0,cur_lag:N_pts,0]-X_features[:(N_pts-cur_lag),:] @ coefs_poly[cur_lag,:]))
print("1-st order error: ",np.linalg.norm(f_vals[0,cur_lag:N_pts,0]-X_features_1[:(N_pts-cur_lag),:] @ coefs_poly_1[cur_lag,:]))

In [None]:
test_seed = 1453
nbcores = multiprocessing.cpu_count()
trav = Pool(nbcores)
res = trav.starmap(test_traj, [(Cur_pot,coefs_poly,step,test_seed+i,lag,K_max,S_max,N_burn,N_test,d,f_type,inds_arr,params, x0, fixed_start) for i in range (n_traj_test)])
trav.close()

[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]


In [None]:
res_new = np.asarray(res)
print(res_new.shape)

In [None]:
vars_vanilla = np.var(res_new[:,0,:],axis = 0)
vars_adj = np.var(res_new[:,1,:],axis = 0)
#print(vars_vanilla)
#print(vars_adj)
print(np.mean(vars_adj[1:]/vars_vanilla[1:]))

### Comparison plots

In [None]:
title = ""
labels = ['Vanilla\n ULA', 'ULA \nwith MDCV', 'ULA \nwith EVM','ULA\nwith ESVM']

In [None]:
data = [ints_result['Vanilla'][:,0],res_new[:,1,-1],ints_result['EVM'][:,0],ints_result['ESVM'][:,0]] 
boxplot_ind(data, title, labels)

In [None]:
title = ""
labels = ['ULA \nwith MDCV', 'ULA \nwith EVM','ULA\nwith ESVM']

In [None]:
data = [res_new[:,1,-1],ints_result['EVM'][:,0],ints_result['ESVM'][:,0]] 
boxplot_ind(data, title, labels)