In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
%matplotlib inline

# Linear Regression

# Univariate case

## 1D data generation

Define linear model $f(x) = ax + b$.

Observe data $y = f(x) + \epsilon$ with Gaussian noise $\epsilon \sim \mathcal{N}(0,1)$.

In [None]:
def f1(x,a,b):
    return a*x+b

a = 5.
b = 3.

x = np.linspace(-1,1,100)
y = f1(x,a,b)

n = 10
x_data = np.linspace(-1,1,n)
y_data = f1(x_data,a,b) + np.random.randn(n)

plt.plot(x,y,"k-",label="Ground Truth")
plt.plot(x_data,y_data,"r.",label="Data")
plt.legend()
plt.xlim([-1,1])
plt.ylim([-2,8])
plt.show()

## Tensorflow Graph Construction

Define tensorflow graph for training linear regressor

- placholder : $x, y$, shape (None,)

- tensorflow variables : $\hat{a},\hat{b}$, shape ()

- linear model : $\hat{y} = \hat{a}x + \hat{b}$

- loss : $\frac{1}{N}\sum_{i=1}^{N} (y_{i} - \hat{y_{i}})$

- optimizer : gradient descent


In [None]:
tf.reset_default_graph()

x_ph = 
y_ph = 

a_hat = 
b_hat = 

y_pred = tf.multiply(x_ph,a_hat) + b_hat

loss = 0.5 * tf.reduce_mean( tf.square(y_pred - y_ph) )

# define optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train = optimizer.minimize(loss)

print("Tensorflow graph is contructed!!!")
print("x_ph is:")
print("type: "+str(type(x_ph)))
print("shape: "+str(x_ph.shape))
print("a_hat is:")
print("type: "+str(type(a_hat)))
print("shape: "+str(a_hat.shape))
print("y_pred is:")
print("type: "+str(type(y_pred)))
print("shape: "+str(y_pred.shape))

## Initialize TF variables

In [None]:
# initialize variables
init = 

sess = 
sess.run(init)

a_np, b_np = sess.run([a_hat,b_hat])
print("Randomly initialized parameters")
print("a: %f"%a_np)
print("b: %f"%b_np)

## Optimize linear model ( Find the best parameters )

In [None]:
nepoch = 3000

feed_dict = {x_ph:    ,y_ph:    }

for epoch in range(nepoch):
    loss_np,_ =
    if (epoch%300) == 0:
        print("[%d/%d] loss : %f"%(epoch,nepoch,loss_np))
print()
    
a_np, b_np = sess.run([a_hat,b_hat])
print("Optimized parameters")
print("a: %f"%a_np)
print("b: %f\n"%b_np)
print("Original parameters")
print("a: %f"%a)
print("b: %f"%b)

feed_dict = {x_ph:x}
y_pred_np = sess.run(y_pred,feed_dict=feed_dict)
plt.plot(x,y,"k-",label="Ground Truth")
plt.plot(x_data,y_data,"r.",label="Data")
plt.plot(x,y_pred_np,"b-",label="Linear Regression")
plt.legend()
plt.xlim([-1,1])
plt.ylim([-2,8])
plt.show()

# Multivariate case

## 2D data generation

Now $x$ is a $d$-dimensional vector.

Define linear model $f(x) = a^{t}x + b$.

Observe data $y = f(x) + \epsilon$ with Gaussian noise $\epsilon \sim \mathcal{N}(0,1)$.

In [None]:
def f2(x,a,b):
    return np.matmul(x,a)+b

a = np.array([[2.],[1.]])
b = 1.

x1 = np.linspace(-1,1,100)
x2 = np.linspace(-1,1,100)
X1,X2 = np.meshgrid(x1,x2)
x = np.concatenate([np.reshape(X1,[-1,1]),np.reshape(X2,[-1,1])],axis=1)
y = f2(x,a,b)
Y = np.reshape(y,[100,100])

n = 50
x_data = np.random.uniform(-1,1,size=(n,2))
y_data = f2(x_data,a,b) + np.random.randn(n,1)

fig = plt.figure()
ax = fig.gca(projection='3d')

ax.plot_surface(X1,X2,Y)
ax.plot3D(x_data[:,0].flatten(),x_data[:,1].flatten(),y_data.flatten(),'r.',label="Data")
plt.legend()
ax.set_xlim3d(-1,1)
ax.set_ylim3d(-1,1)
ax.set_zlim3d(-3,4)
plt.show()

## Tensorflow Graph Construction

Define tensorflow graph for training linear regressor

- placholder : $x, y$, shape: $x$ (None,2), $y$ (None,1)

- tensorflow variables : $\hat{a},\hat{b}$, shape: $a$ (2,1), $b$ ()

- linear model : $\hat{y} = \hat{a}x + \hat{b}$

- loss : $\frac{1}{N}\sum_{i=1}^{N} (y_{i} - \hat{y_{i}})$

- optimizer : gradient descent

## Initialize graph
## Train it

In [None]:
tf.reset_default_graph()

x_ph = 
y_ph = 

a_hat = 
b_hat =

y_pred =

loss =

# define optimizer
optimizer =
train =

# initialize variables
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

a_np, b_np = sess.run([a_hat,b_hat])
print("Randomly initialized parameters")
print("a: {:}".format(a_np.flatten()))
print("b: %f\n"%b_np)

nepoch = 10000
feed_dict = {x_ph:,y_ph:}
for epoch in range(nepoch):
    loss_np,_ =
    if (epoch%1000) == 0:
        print("[%d/%d] loss : %f"%(epoch,nepoch,loss_np))
    
a_np, b_np = sess.run([a_hat,b_hat])
print("Optimized parameters")
print("a: {:}".format(a_np.flatten()))
print("b: %f\n"%b_np)

print("Original parameters")
print("a: {:}".format(a.flatten()))
print("b: %f"%b)

# Prediction
feed_dict = {x_ph:}
y_pred_np = 
Y_pred_np= np.reshape(y_pred_np,[100,100])

fig = plt.figure()
ax = fig.gca(projection='3d')

ax.plot_surface(X1,X2,Y,color="k")
ax.plot3D(x_data[:,0].flatten(),x_data[:,1].flatten(),y_data.flatten(),'r.',label="Data")
plt.legend()
ax.set_xlim3d(-1,1)
ax.set_ylim3d(-1,1)
ax.set_zlim3d(-3,4)
plt.show()

fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X1,X2,Y_pred_np,color="b")
ax.plot3D(x_data[:,0].flatten(),x_data[:,1].flatten(),y_data.flatten(),'r.',label="Data")

plt.legend()
ax.set_xlim3d(-1,1)
ax.set_ylim3d(-1,1)
ax.set_zlim3d(-3,4)
plt.show()

# Kernel Regression (Non-linear Regression)

## Non-linear 1D data generation

Now, our underlying model is not a linear model

In [None]:
def f3(x):
    return np.sinc(x)

x = np.linspace(-2*np.pi,2*np.pi,100)
y = f3(x)

n = 500
x_data = np.linspace(-2*np.pi,2*np.pi,n) + 0.05*np.random.randn(n)
y_data = f3(x_data) + 0.05*np.random.randn(n)

plt.plot(x,y,"k-",label="Ground Truth")
plt.plot(x_data,y_data,"r.",label="Data")
plt.legend()
plt.xlim([-2*np.pi,2*np.pi])
plt.ylim([-1,1.5])
plt.show()

## Kernel Regression

<img src="img/kr_img0.jpg">

$K(x,y) \triangleq \exp\left(-0.5\lambda^{-1}(x-y)^{2}\right)$

$f(x) = \sum_{i=1}^{k} w_{i} K(x,\mu_{i})$

### Matrix form

$f(x) = k(x) w $

where $w = [ w_{i} ]_{i=1}^{k}$ and $k(x)$ is $k$-dimensional vector

$[k(x)] = k_{i}(x)$

$k_{i}(x) = k(x,\mu_{i})$

## Tensorflow Graph Construction

Define tensorflow graph for training kernel regressor

- placholder : $x, y$

- placholder : $\mu = [ \mu_{i} ]_{i=1}^{k}, \lambda^{-1}$

- tensorflow variables : $w = [ w_{i} ]_{i=1}^{k}$

- kernel regressor : $\hat{y} = \sum_{i=1}^{k} w_{i} k(x,x_{i})$

- loss : $\frac{1}{N}\sum_{i=1}^{N} (y_{i} - \hat{y_{i}})$

- optimizer : gradient descent

In [None]:
tf.reset_default_graph()

n_kernel = 20 # the number of mu
mu = np.linspace(-2*np.pi,2*np.pi,n_kernel) # make mu
inv_lambda = 1e1

x_ph = tf.placeholder(tf.float32,shape=(None,1))
y_ph = tf.placeholder(tf.float32,shape=(None,1))

mu_ph = tf.placeholder(tf.float32,shape=(n_kernel,1))
inv_lambda_ph = tf.placeholder(tf.float32,shape=())

print("Define place holders")

### Vector Form

Let $\hat{Y} = [\hat{y_{i}}] = [k(x_{i})w]$.

Then,

$\hat{Y} = Kw$ where $K_{ij} = k(x_{i},\mu_{j})$

### Distance Matrix $D$

$D$ is a $n\times k$ matrix whose element is

$D_{ij} = (x_i - \mu_j)^{2}$

### Tensorflow style

Let $X = [x_{i}]^{n}_{i=1}$ (vector)

$D = X^{2} - 2X\mu^{t} + (\mu^{2})^{t}$

Then, the kernel matrix is $K=\exp(-0.5\lambda^{-1}D)$

In [None]:
x_norm = # X^2
mu_norm = # mu^2

x_norm = tf.reshape(x_norm, [-1, 1])
mu_norm = tf.reshape(mu_norm, [1, -1])

squared_dist = # X^2 - 2 X mu + mu^2

### Define Regressor and Loss function

In [None]:
kernel = # exp(-0.5*inv_lambda*D)
    
w_hat = # define w as tensorflow variable

y_pred = # kernel * w_hat

loss = # loss

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train = optimizer.minimize(loss)

# Train

In [None]:
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

w_np = sess.run(w_hat)
print("Randomly initialized parameters")
print("w: {:}\n".format(w_np.flatten()))

nepoch = 10000
feed_dict = {x_ph:x_data[:,np.newaxis],y_ph:y_data[:,np.newaxis],mu_ph:   ,inv_lambda_ph:   }

for epoch in range(nepoch):
    loss_np,_ = sess.run([loss,train],feed_dict=feed_dict)
    if (epoch%1000) == 0:
        print("[%d/%d] loss : %f"%(epoch,nepoch,loss_np))

w_np = sess.run(w_hat)
print("\nOptimized parameters")
print("w: {:}".format(w_np.flatten()))

feed_dict = {x_ph:x[:,np.newaxis],mu_ph:mu[:,np.newaxis],inv_lambda_ph:inv_lambda}
y_pred_np = sess.run(y_pred,feed_dict=feed_dict)

plt.plot(x,y,"k-",label="Ground Truth")
plt.plot(x_data,y_data,"r.",label="Data")
plt.plot(x,y_pred_np,"b-",label="Kernel Regression")
plt.legend()
plt.xlim([-2*np.pi,2*np.pi])
plt.ylim([-1,1.5])
plt.show()