In [None]:
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import matplotlib
import pydot
import graphviz
import pydot
import theano
import theano.tensor as T
from IPython.display import Image
from IPython.display import SVG
import timeit

In [2]:
#Generate dataset
np.random.seed(0)
train_X, train_y = sklearn.datasets.make_moons(200, noise = 0.2)
train_X = train_X.astype(np.float32)
train_y = train_y.astype(np.int32)
plt.scatter(train_X[:,0], train_X[:,1], s = 40, c = train_y, cmap = plt.cm.Spectral)


<matplotlib.collections.PathCollection at 0x8a749d0>

In [3]:
# Helper function to plot a decision boundary.
# If you don't fully understand this function don't worry, it just generates the contour plot.
def plot_decision_boundary(pred_func):
    # Set min and max values and give it some padding
    x_min, x_max = train_X[:, 0].min() - .5, train_X[:, 0].max() + .5
    y_min, y_max = train_X[:, 1].min() - .5, train_X[:, 1].max() + .5
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
    Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(train_X[:, 0], train_X[:, 1], c=train_y, cmap=plt.cm.Spectral)

In [4]:
# Size definitions
num_examples = len(train_X) # training set size
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
nn_hdim = 100 # hiden layer dimensionality

# Gradient descent parameters (I picked these by hand)
epsilon = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength 

In [None]:
#Define variable using theano
X = T.matrix('X')
y = T.lvector('y')

(X * 2).eval({X:[[1,1],[2,2]]})

In [None]:
#define shared variable in theano: these are shared variable. Do not need to redefine them 
#with theano
W1 = theano.shared(np.random.randn(nn_input_dim, nn_hdim), name = 'W1')
b1 = theano.shared(np.zeros(nn_hdim),name = 'b1')
W2 = theano.shared(np.random.randn(nn_hdim, nn_output_dim),name = 'W2')
b2 = theano.shared(np.zeros(nn_output_dim),name = 'b2')

In [None]:
#Forward propogation: only expression is defined here
# It is not evaluated here
z1 = X.dot(W1) + b1
a1 = T.tanh(z1)
z2 = a1.dot(W2) + b2
y_hat = T.nnet.softmax(z2)

#The regularization term
loss_reg = 1./num_examples * reg_lambda/2*(T.sum(T.sqr(W1)) + T.sum(T.sqr(W2)))
#total loss function
loss = T.nnet.categorical_crossentropy(y_hat, y).mean() + loss_reg

#Return a class prediction
prediction = T.argmax(y_hat, axis = 1)

In [None]:
#Thean functions that can be called from our python code
#First parameter: if the true value of the variables used in the expression, which 
# is given by the second argument
forward_prop = theano.function([X], y_hat)
calculate_loss = theano.function([X,y],loss)
predict = theano.function([X],prediction)

In [None]:
#Visualize the computational graph
theano.printing.pydotprint(forward_prop, var_with_name_simple = True, compact = True, 
                           outfile = 'img/nn-theano-forward_prop.png', format = 'png')

SVG(theano.printing.pydotprint(forward_prop, var_with_name_simple = True, compact = True, return_image = True, 
                              format = 'svg'))

In [None]:
#Print out the textual description of the computational graph
theano.printing.debugprint(forward_prop)

In [None]:
#Defining the gradient of the expression
#First argument is the loss function, the second is the parameter with respect to which you want to take the deritative
dW2 = T.grad(loss, W2)
db2 = T.grad(loss, b2)
dW1 = T.grad(loss, W1)
db1 = T.grad(loss, b1)

In [None]:
#Update the variables using theano
gradient_step = theano.function(
[X,y], updates = ((W2, W2 - epsilon*dW2),
                 (W1, W1 - epsilon*dW1),
                 (b2, b2 - epsilon*db2),
                 (b1, b1 - epsilon*db1)))

In [None]:
## Train a neural network with the gradient_step funtion

def build_model(num_passes = 20000, print_loss = False):
    np.random.seed(0)
    W1.set_value(np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim))
    b1.set_value(np.zeros(nn_hdim))
    W2.set_value(np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim))
    b2.set_value(np.zeros(nn_output_dim))
    
    #Gradient descent 
    for i in xrange(0, num_passes):
        gradient_step(train_X, train_y)
        
        if print_loss and i % 1000 == 0:
            print "Loss after iteration %i = %f" %(i, calculate_loss(train_X, train_y))

In [None]:
#Build a model with a 3- dimensional hidden layer
build_model(print_loss = True)

plot_decision_boundary(lambda x : predict(x))
plt.title("Decision Boundary for the hidden layer size 3")