In [1]:
import numpy as np 
import math

In [2]:
def relu(X):
    return np.maximum(X, 0)

In [3]:
def build_model(X, hidden_nodes , output_dim=2):
    model = {} 
    input_dim = X.shape[1]
    print(input_dim)    
    model['W1'] = np.random.randn(input_dim, hidden_nodes) / np.sqrt(input_dim)
    model['b1'] = np.zeros((1, hidden_nodes))
    model['W2'] = np.random.randn(hidden_nodes, output_dim) / np.sqrt(hidden_nodes)
    model['b2'] = np.zeros((1, output_dim))
    return model

In [4]:
def feed_forward(model, x):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    z1 = x.dot(W1) + b1
    a1 = relu(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    out = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    return z1, a1, z2, out

In [5]:
def calculate_loss(model,X,y,reg_lambda):
    num_examples = X.shape[0]
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    z1, a1, z2, out = feed_forward(model, X)
    probs = out / np.sum(out, axis=1, keepdims=True)    
    corect_logprobs = -np.log(probs[range(num_examples), y])
    loss = np.sum(corect_logprobs)    
####Add regulatization term to loss (optional)
    loss = loss + reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
    return 1./num_examples * loss

In [6]:
def relu_derivative(X):
    return 1. * (X > 0)

In [7]:
def backprop(X,y,model,z1,a1,z2,output,reg_lambda):
    delta3 = output
    delta3[range(X.shape[0]), y] -= 1      #yhat - y
    dW2 = (a1.T).dot(delta3)
    db2 = np.sum(delta3, axis=0, keepdims=True)
    
    delta2 = delta3.dot(model['W2'].T) * relu_derivative(a1) 
    dW1 = np.dot(X.T, delta2)
    db1 = np.sum(delta2, axis=0)
   
    # Add regularization terms
    dW2 = dW2 + reg_lambda * model['W2']
    dW1 = dW1 + reg_lambda * model['W1']
    return dW1, dW2, db1, db2

In [8]:
def train(model, X, y, num_passes=10000, reg_lambda = .1, learning_rate=0.1):
    # Batch gradient descent
    done = False
    previous_loss = float('inf')
    i = 0
    losses = []
    
    while done == False:  
        z1,a1,z2,output = feed_forward(model, X)
        dW1, dW2, db1, db2 = backprop(X,y,model,z1,a1,z2,output,reg_lambda)
        
        model['W1'] -= learning_rate * dW1
        model['b1'] -= learning_rate * db1
        model['W2'] -= learning_rate * dW2
        model['b2'] -= learning_rate * db2
        
        if i % 1000 == 0:
            loss = calculate_loss(model, X, y, reg_lambda)
            losses.append(loss)
            print( "Loss after iteration %i: %f" %(i, loss)  )
            if (previous_loss-loss)/previous_loss < 0.01:
                   done = True
                    #print i
            previous_loss = loss
        i += 1
    return model, losses

In [9]:
X = np.array( [[ 1 , 1], [ 2 , 2],[ 7 , 7], [ 8 , 8], [ 9 , 9]] )
y = np.array([0,0,1,1,1])

##train
model = build_model(X , 3 , 2)
model, losses = train ( model, X , y , reg_lambda = 0.01, learning_rate = 0.01 )
output = feed_forward(model, X)
preds  = np.argmax(output[3] , axis=1)
preds

2
Loss after iteration 0: 0.800472
Loss after iteration 1000: 0.014265
Loss after iteration 2000: 0.009723


  if (previous_loss-loss)/previous_loss < 0.01:


Loss after iteration 3000: 0.008285
Loss after iteration 4000: 0.007401
Loss after iteration 5000: 0.006741
Loss after iteration 6000: 0.006227
Loss after iteration 7000: 0.005820
Loss after iteration 8000: 0.005484
Loss after iteration 9000: 0.005229
Loss after iteration 10000: 0.005022
Loss after iteration 11000: 0.004850
Loss after iteration 12000: 0.004703
Loss after iteration 13000: 0.004575
Loss after iteration 14000: 0.004464
Loss after iteration 15000: 0.004366
Loss after iteration 16000: 0.004281
Loss after iteration 17000: 0.004205
Loss after iteration 18000: 0.004137
Loss after iteration 19000: 0.004077
Loss after iteration 20000: 0.004023
Loss after iteration 21000: 0.003975
Loss after iteration 22000: 0.003931
Loss after iteration 23000: 0.003891
Loss after iteration 24000: 0.003854


array([0, 0, 1, 1, 1])

In [10]:
from sklearn.neural_network import MLPClassifier
import numpy as np 

model = MLPClassifier(hidden_layer_sizes=(8) , max_iter=110 , alpha=1e-4, solver='sgd', random_state=1 
                     ,verbose=True , learning_rate_init=.1 )
##train
X_train = np.array( [[ 1 , 1], [3 , 3],[ 6 , 6], [ 7 , 7], [ 9 , 9]] )
y_train = np.array([0,0,1,1,1])
model.fit(X_train, y_train);
print(model.score(X_train, y_train))

##test data
X_test = np.array( [[ 2 , 2] , [ 0 , 0], [ 11 , 11]] )
y_test = np.array ([0 , 0 , 1])
y_pred = model.predict(X_test)
print(model.score(X_test, y_test))

Iteration 1, loss = 0.77242729
Iteration 2, loss = 0.68148155
Iteration 3, loss = 0.62225463
Iteration 4, loss = 0.59184064
Iteration 5, loss = 0.55574346
Iteration 6, loss = 0.52604574
Iteration 7, loss = 0.49736578
Iteration 8, loss = 0.46831510
Iteration 9, loss = 0.43823781
Iteration 10, loss = 0.40642844
Iteration 11, loss = 0.37520567
Iteration 12, loss = 0.34415212
Iteration 13, loss = 0.31474519
Iteration 14, loss = 0.28753417
Iteration 15, loss = 0.26192975
Iteration 16, loss = 0.23746029
Iteration 17, loss = 0.21458947
Iteration 18, loss = 0.19340743
Iteration 19, loss = 0.17390211
Iteration 20, loss = 0.15610620
Iteration 21, loss = 0.13998697
Iteration 22, loss = 0.12547184
Iteration 23, loss = 0.11246640
Iteration 24, loss = 0.10086182
Iteration 25, loss = 0.09054159
Iteration 26, loss = 0.08138709
Iteration 27, loss = 0.07328203
Iteration 28, loss = 0.06611547
Iteration 29, loss = 0.05978374
Iteration 30, loss = 0.05419142
Iteration 31, loss = 0.04925177
Iteration 32, los