In [None]:
import numpy as np
from sklearn import datasets
import theano
import theano.tensor as T
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

theano.config.floatX = 'float32'

%matplotlib inline

In [None]:
# Create data
n_samples = 100
n_features = 1
n_out_dim = 1
true_bias = 100
train_X, train_y, true_coef = datasets.make_regression(n_samples=n_samples,
                                                  n_features=n_features, 
                                                  n_targets=n_out_dim,
                                                  n_informative=1,
                                                  noise=10,
                                                  bias=true_bias,
                                                  coef=True,
                                                  random_state=0)
# convert array to 1-dim matrix
train_y = train_y.reshape(n_samples,1)

# plot data
fig, ax = plt.subplots(figsize=(10, 6))
ax.set_title('Synthetic data for linear regression of 2-dim points')
ax.scatter(train_X, train_y, color='blue')

In [None]:
# Define model
X = theano.shared(train_X.astype('float32'))
y = theano.shared(train_y.astype('float32'))

a = theano.shared(np.random.randn(n_features, n_out_dim).astype('float32'), name='a')
b = theano.shared(np.zeros(n_out_dim).astype('float32'), name='b')

y_hat = X.dot(a) + b
loss = T.mean(T.sqr(y-y_hat))

In [None]:
# Setup gradient descent
da = T.grad(loss, a)
db = T.grad(loss, b)

epsilon = np.float32(0.01)
gradient_step = theano.function([], 
                                updates=((a, a - epsilon*da),
                                         (b, b - epsilon*db)))

# define functions in order to access shared variables
f_loss = theano.function([], loss)
f_y_hat = theano.function([], y_hat)

In [None]:
# build model
n_epochs = 500
import timeit
for i in xrange(n_epochs):
    gradient_step()
    if i % 100 == 0:
        print i, 'loss=',f_loss()

In [None]:
# plot output
fig, ax = plt.subplots(figsize=(10, 6))
ax.set_title('Linear fit')
ax.scatter(train_X, train_y,  color='b', label='2-dimensional data')
ax.plot(train_X, f_y_hat(), color='r', label='linear fit')
ax.legend()

In [None]:
# Compare learned parameters a, b with those used for data generation
f_a = theano.function([], a)
f_b = theano.function([], b)
print 'Model coefficients: a={}, b={}'.format(f_a()[0][0], f_b()[0])
print 'True coefficients:  a={}, b={}'.format(true_coef, true_bias)