In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

In [2]:
from util import getBinaryData, sigmoid, sigmoid_cost, error_rate, relu

In [5]:
class ANN(object):
    def __init__(self, M):
        self.M = M
        
    def fit(self, X, Y, learning_rate = 5*10e-7, reg = 1.0, epochs = 10000, show_fig = False):
        X, Y = shuffle(X,Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]
        
        N, D = X.shape
        self.W1 = np.random.randn(D, self.M)/ np.sqrt( D + self.M)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M)/np.sqrt(self.M)
        self.b2 = 0
        
        costs = []
        best_validation_error = 1
        for i in range(epochs):
            pY, Z = self.forward(X)
            
            # gradient descent step
            pY_Y = pY -Y # pred - target
            self.W2 -= learning_rate*(Z.T.dot(pY-Y) + reg*self.W2)  # gradient descent + lasso (l1) regularization
            self.b2 -= learning_rate*((pY-Y).sum() + reg*self.b2)
            
            dZ = np.outer(pY_Y, self.W2) * (1 - Z*Z)  # backprop for tanh function
            self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1)
            self.b1 -= learning_rate*(np.sum(dZ, axis = 0) + reg*self.b1)
            
            if i%100 == 0:
                pYvalid,_ = self.forward(Xvalid)
                c = sigmoid_cost(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.round(pYvalid))
                print("i:",i,"cost:",c,"error:",e)
                if e<best_validation_error:
                    best_validation_error = e
        print("best error:",best_validation_error)
    
    def forward(self, X):
        # Z = relu(X.dot(self.W1) + self.b1)
        Z = np.tanh(X.dot(self.W1) + self.b1)
        return sigmoid(Z.dot(self.W2) + self.b2), Z


    def predict(self, X):
        pY = self.forward(X)
        return np.round(pY)


    def score(self, X, Y):
        prediction = self.predict(X)
        return 1 - error_rate(Y, prediction)


def main():
    X, Y = getBinaryData()

    X0 = X[Y==0, :]
    X1 = X[Y==1, :]
    X1 = np.repeat(X1, 9, axis=0)
    X = np.vstack([X0, X1])
    Y = np.array([0]*len(X0) + [1]*len(X1))
    
    model = ANN(100)
    model.fit(X, Y, show_fig=True)

if __name__ == '__main__':
    main()


i: 0 cost: 907.8751994052423 error: 0.519
i: 100 cost: 667.5915214736713 error: 0.431
i: 200 cost: 649.3395538601296 error: 0.393
i: 300 cost: 639.8385758571776 error: 0.369
i: 400 cost: 632.0090405309058 error: 0.361
i: 500 cost: 624.7495642287267 error: 0.361
i: 600 cost: 617.6699455518261 error: 0.354
i: 700 cost: 610.6422743556225 error: 0.348
i: 800 cost: 603.6538415783082 error: 0.345
i: 900 cost: 596.7091023699525 error: 0.341
i: 1000 cost: 589.7532763395357 error: 0.341
i: 1100 cost: 582.7830654582108 error: 0.331
i: 1200 cost: 575.80502279928 error: 0.323
i: 1300 cost: 568.8013894047479 error: 0.317
i: 1400 cost: 561.744569606612 error: 0.306
i: 1500 cost: 554.6147670557027 error: 0.301
i: 1600 cost: 547.405986090305 error: 0.297
i: 1700 cost: 540.1205568760452 error: 0.294
i: 1800 cost: 532.7592194980574 error: 0.288
i: 1900 cost: 525.3169194531662 error: 0.281
i: 2000 cost: 517.7831036208745 error: 0.275
i: 2100 cost: 510.14259891105155 error: 0.261
i: 2200 cost: 502.3796341