In [36]:
# In this tutorial, we will implement a simple linear regression model 
# with PAC Bayesian SGD method
import numpy as np
np.random.seed(41)

In [41]:
# configs
n_sample = 100  # number of samples in training set
dim = 5  # dimension of feature vector for each sample 

In [64]:
# Section 1: Data preparation
# generate X: n samples of dimension d ~ N(0, diag(sigma_1^2, sigma_2^2, ..., sigma_d^2))
# generate y: n samples of dimension 1 ~ X^T . w* + epsilon. with epsilon ~ N(0, I)

mean_x = np.zeros(dim, dtype=np.float32)
diag_x = np.zeros([dim, dim], dtype=np.float32)
for i, sigma in enumerate(np.sort(np.random.rand(dim))):
    diag_x[i, i] = sigma ** 2
print(diag_x)

[[0.02416408 0.         0.         0.         0.        ]
 [0.         0.02430416 0.         0.         0.        ]
 [0.         0.         0.41832593 0.         0.        ]
 [0.         0.         0.         0.43920645 0.        ]
 [0.         0.         0.         0.         0.57919186]]


In [65]:
X = np.random.multivariate_normal(mean_x, diag_x, n_sample)
print(X.shape)
print(X[0:2])

(100, 5)
[[ 0.13642804 -0.04913764 -0.25911475 -0.22302076  0.75739196]
 [-0.10383092 -0.12020316 -0.81173074 -0.55669558 -0.27395614]]


In [66]:
w_star = 3 * np.random.rand(dim)
print(w_star)

[2.4660421  0.47616529 0.6545285  2.61863652 0.24577484]


In [67]:
epsilon = np.random.normal(0, 1, n_sample)

In [68]:
y = np.dot(X, w_star) + epsilon
print(y[0:10])

[-0.24868233 -1.18693009 -2.72361888  2.04369675  3.5666234   4.00559659
 -0.31415208  1.06377222 -0.59170569  2.64931045]


In [None]:
# Section 2: 