3.2 Classification and regression with a two-layer perceptron




Generate some data 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm import trange
from abc import ABCMeta, abstractmethod

def generate_data(mA=(1, 1), mB=(5, 1), sigmaA=1, sigmaB=1, type='random'):
    np.random.seed(100)
    A = np.random.randn(256, 2) * sigmaA + mA
    B = np.random.randn(256, 2) * sigmaB + mB
    labels = np.concatenate((np.ones(A.shape[0]), -np.ones(B.shape[0])))
    samples = np.concatenate((A, B))
    permute = np.random.permutation(A.shape[0] + B.shape[0])
    return samples[permute, :], labels[permute]

samples,labels = generate_data()
for i, it in enumerate(samples):
    if labels[i] == -1:
        plt.plot(samples[i][0], samples[i][1], 'r.')
    else:
        plt.plot(samples[i][0], samples[i][1], 'b.')
plt.show()

Def of NN

In [None]:
# def sigmoid(x):
#     return 2 / (1 + np.exp(-x))-1

# def sigmoid_derivative(x):	
#     return (1+sigmoid(x))* (1 - sigmoid(x))/2

# class NN:
#     def __init__(self,num_nodes=[2,4,4,1]):
#         self.weights= []
#         self.activation = sigmoid
#         self.activation_deriv = sigmoid_derivative
#         self.num_nodes = num_nodes
#         self.bias=[]
#         self.hout_batch=[]
#         for i in range(1,len(num_nodes)):
#             self.weights.append(np.random.randn(num_nodes[i-1],num_nodes[i]))
#             self.bias.append(np.random.randn(num_nodes[i]))
                
#         return 
#     def forward_pass(self,x):
#         result = x
#         hout= []
#         for i,matrix in enumerate(self.weights):
#             result=self.activation(np.dot(result,matrix))+self.bias[i]
#             hout.append(result)
#         return result,hout
#     def train(self,X,Y,learning_rate = 0.4, epochs=5):
#         n = len(Y)
#         for k in trange(epochs):
#             learning_rate*=0.5
#             for x,y in zip(X,Y):
#                 result,hout = self.forward_pass(x)
#                 deltas =[(hout[-1]-y)*self.activation_deriv(result)).item()]

#                 layer_num = len(hout)-2
#                 for j in range(layer_num,0,-1):
#                     deltas.append(np.dot(deltas[-1],self.weights[j].T)* self.activation_deriv(hout[j]))
#                 deltas.reverse()
#                 for i in range(len(self.weights)):
#                     layer = np.atleast_2d(hout[i])
#                     delta = np.atleast_2d(deltas[i])
#                     self.weights[i] += learning_rate * layer.T.dot(delta)
#                     self.bias[i] += learning_rate * deltas[i]
#         return 
#     def show(self):
#         for i in self.weights:
#             print(i.shape)

#     def predict(self,x):
#         result=x
#         for i,matrix in enumerate(self.weights):
#             result=self.activation(np.dot(result,matrix))+self.bias[i]
#         return np.sign(result)

In [None]:

class Parameter(object):
    def __init__(self, data, requires_grad, skip_decay=False):
        self.data = data
        self.grad = None
        self.skip_decay = skip_decay
        self.requires_grad = requires_grad

    @property
    def T(self):
        return self.data.T

class SGD(object):
    def __init__(self, parameters, lr, decay=0):
        self.parameters = [p for p in parameters if p.requires_grad]   
        self.lr = lr
        self.decay_rate = 1.0 - decay

    def update(self):
        for p in self.parameters:
            if self.decay_rate < 1 and not p.skip_decay: p.data *= self.decay_rate
           # print("data",p.data,"grad",p.grad)
            p.data -= self.lr * p.grad


class MSE:
    def __init__(self):
        pass
    def gradient(self):
        return self.a - self.y
    
    def __call__(self, output, target, requires_acc=True):
        self.a = output
        self.y = np.reshape(target,(-1,1))
        loss = 0.5*np.multiply(self.a-self.y,self.a-self.y).mean()
        if requires_acc:
            acc = np.sum(np.sign(output)==self.y)/output.shape[0]
            return loss,acc
        return loss

class Layer():
    @abstractmethod
    def forward(self, *args):
        pass

    @abstractmethod
    def backward(self, *args):
        pass

class Tanh(Layer):
    def forward(self, x):
        ex = np.exp(x)
        esx = np.exp(-x)
        self.y = (ex - esx) / (ex + esx)
        return self.y

    def backward(self, eta):
        return np.einsum('...,...,...->...', 1 - self.y, 1 + self.y, eta, optimize=True)

class Linear(Layer):
    def __init__(self, shape, requires_grad=True, bias=True, **kwargs):
        '''
        shape = (in_size, out_size)
        '''
        W = np.random.randn(*shape) * (2 / shape[0]**0.5)
      #  print(W.shape)
        self.W = Parameter(W, requires_grad)
        self.b = Parameter(np.zeros(shape[-1]), requires_grad) if bias else None
        self.require_grad = requires_grad

    def forward(self, x):
        if self.require_grad: self.x = x
        out = np.dot(x, self.W.data)
        if self.b is not None: out = out + self.b.data
        return out

    def backward(self, eta):
       if self.require_grad:
            batch_size = eta.shape[0]
            self.W.grad = np.dot(self.x.T, eta) / batch_size
            if self.b is not None: self.b.grad = np.sum(eta, axis=0) / batch_size
       return np.dot(eta, self.W.T)
class Net(Layer):
    def __init__(self, layer_configures):
        self.layers = []
        self.parameters=[]
        for config in layer_configures:
            self.layers.append(self.createLayer(config))

    def createLayer(self, config):
        return self.getDefaultLayer(config)

    def getDefaultLayer(self, config):
        t = config['type']
        if t == 'linear':
            layer = Linear(**config)
            self.parameters.append(layer.W)
            if layer.b is not None: self.parameters.append(layer.b)
        elif t == 'tanh':
            layer = Tanh()
        return layer

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def backward(self, eta):
        for layer in self.layers[::-1]:
            eta = layer.backward(eta)
        return eta
    
    def train(self,X,Y,optimizer,batch_size=16,epochs=500,loss=MSE()):
        n = len(Y)
        for epoch in trange(epochs):
            i = 0 
            while i<= n-batch_size:
                x,y=X[i:i+batch_size,],Y[i:i+batch_size,]
                i+=batch_size
                output = self.forward(x)
                batch_loss, batch_acc = loss(output,y)
                eta = loss.gradient()
                self.backward(eta)
                optimizer.update()
                if epoch % 40:
                    print("epoch: %d, batch: %5d, batch_acc:    %.2f,batch loss: %.2f" % \
                    (epoch, i/batch_size,batch_acc*100,batch_loss))



In [None]:

X,Y = generate_data()
layers =[
    {'type':'linear','shape':(2,6)},
    {'type':'tanh'},
    {'type':'linear','shape':(6,6)},
    {'type':'tanh'},
    {'type':'linear','shape':(6,1)},
    {'type':'tanh'}
]
net = Net(layers)
opt = SGD(net.parameters,lr=1e-3)
net.train(X,Y,opt)



epoch: 128, batch:    25, batch_acc:    93.75,batch loss: 0.11
epoch: 128, batch:    26, batch_acc:    100.00,batch loss: 0.04
epoch: 128, batch:    27, batch_acc:    100.00,batch loss: 0.03
epoch: 128, batch:    28, batch_acc:    100.00,batch loss: 0.07
epoch: 128, batch:    29, batch_acc:    100.00,batch loss: 0.06
epoch: 128, batch:    30, batch_acc:    87.50,batch loss: 0.16
epoch: 128, batch:    31, batch_acc:    93.75,batch loss: 0.09
epoch: 128, batch:    32, batch_acc:    93.75,batch loss: 0.09
epoch: 129, batch:     1, batch_acc:    87.50,batch loss: 0.15
epoch: 129, batch:     2, batch_acc:    100.00,batch loss: 0.02
epoch: 129, batch:     3, batch_acc:    93.75,batch loss: 0.08
epoch: 129, batch:     4, batch_acc:    100.00,batch loss: 0.06
epoch: 129, batch:     5, batch_acc:    81.25,batch loss: 0.26
epoch: 129, batch:     6, batch_acc:    93.75,batch loss: 0.11
epoch: 129, batch:     7, batch_acc:    87.50,batch loss: 0.19
epoch: 129, batch:     8, batch_acc:    93.75,ba

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD

X,Y = generate_data()
model = Sequential()
model.add(Dense(4, input_dim=2, activation='tanh'))
model.add(Dense(4, activation='tanh'))
model.add(Dense(1, activation='tanh'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='MSE',
              optimizer=sgd,
              metrics=['accuracy'])
model.fit(X, Y,
          epochs=200,
          batch_size=16)