In [1]:
#import modules and set up environment 
import os
import sys
path = "../../src/"

sys.path.append(path)
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import jax.numpy as jnp
from jax import jit, grad, vmap, hessian, scipy, random

#import sgmcmc code 
import models.logistic_regression.logistic_regression as lr
import samplers.sgd as sgd
import samplers.sgld as sgld
import samplers.sgldps as sgldps

from metrics import imq_KSD

In [2]:
#load in the data
file_path = "../../data/real/covtype_train.csv"
test_path = "../../data/real/covtype_test.csv"
data = pd.read_csv(file_path)
dat_array = data.values[:]
N = dat_array.shape[0]
x = np.column_stack([np.ones(N), dat_array[:, 1:]])
y = dat_array[:,0]
test_data = pd.read_csv(test_path)
test_array = test_data.values[:]
N_test = test_array.shape[0]
x_test = np.column_stack([np.ones(N_test), test_array[:, 1:]])
y_test = test_array[:,0]

#set up model parameters
dim = x.shape[1] 

#priors
mu_0 = np.zeros(dim) #prior mean
lambda_0 = 10.0*np.eye(dim)  #prior covariance matrix

### Importing csv files

In [3]:
methods = ["sgld_1", "sgldps_1", "sgld_5", "sgldps_5","sgld_10", "sgldps_10"]
sgld_batches =[0.01, 0.01, 0.05, 0.05, 0.1, 0.1]
samples_csv = dict()
for i in range(len(methods)):
    method = methods[i]
    items = glob.glob(f"./out/lrb_{method}_samples.csv")[0]
    idx = (np.int64(500/sgld_batches[i])+1)
    samples_csv[method] = pd.read_csv(items).iloc[idx:].reset_index(drop=True)

In [4]:
grads_csv = dict()
sgld_methods = ["sgld_1", "sgld_5", "sgld_10"]
grad_batches =[0.01, 0.05, 0.1]
for i in range(len(sgld_methods)):
    method = sgld_methods[i]
    items = glob.glob(f"./out/lrb_{method}_grads.csv")[0]
    grads_csv[method] = pd.read_csv(items).iloc[(np.int64(500/grad_batches[i])+1):].reset_index(drop=True)

sgldps_methods = ["sgldps_1", "sgldps_5", "sgldps_10"]
for i in range(len(sgldps_methods)):
    method = sgldps_methods[i]
    items = glob.glob(f"./out/lrb_{method}_grads.csv")[0]
    grads_csv[method] = pd.read_csv(items)


### KSD evaluation

In [5]:
Nrep = 10
ksd= dict()
methods = ["sgld_1", "sgldps_1", "sgld_5", "sgldps_5","sgld_10", "sgldps_10"]
sgld_batches =[0.01, 0.01, 0.05, 0.05, 0.1, 0.1]

In [6]:
for k in tqdm(range(len(methods)), desc= "Methods"):
    method = methods[k]
    idx=np.arange(np.int64(500/sgld_batches[k]), 0, -np.int64(10/sgld_batches[k]))[::-1]-1
    ksd_arr = np.zeros((idx.shape[0], Nrep))
    for j in range(Nrep):
        for i in range(idx.shape[0]):
            index = idx[i]
            samples = samples_csv[method].values[:index, (dim*(j+1)):dim*(j+2)]
            grads = grads_csv[method].values[:index, (dim*(j+1)):dim*(j+2)]
            ksd_arr[i, j] = np.log10(imq_KSD(samples, grads))
        
    ksd_arr_df = pd.DataFrame(ksd_arr)
    ksd[method] = ksd_arr_df
    ksd_arr_df.to_csv(f"./out/ksdcover_{method}.csv", index=False)

Methods: 100%|██████████| 6/6 [56:08:18<00:00, 33683.08s/it]    
