In [1]:
import numpy as np
from sklearn import datasets
%config IPCompleter.greedy=True

In [21]:
def sigmoid(x):
    x = np.dot(x, -1)
    return 1/(np.exp(x) + 1)

def relu(x):
    return np.maximum(x, 0)

def sigmoid_derivative(x):
    sig_x = sigmoid(x)
    return sig_x*(1-sig_x)

def relu_derivative(x):
    y = x
    y[x>=0] = 1
    y[y<0] = 0
    return y


[0.73105858 0.88079708 0.95257413 0.00669285 0.98201379] [1 2 3 0 4] [0.19661193 0.10499359 0.04517666 0.00664806 0.01766271] [1 1 1 0 1]


In [4]:
iris = datasets.load_iris()

In [261]:
def initialize_params(l_dims, b_with_zero, m):
    W = []
    B = []
    for i in range(1, len(l_dims)):
        w = np.random.randn(l_dims[i], l_dims[i-1])*0.01
        if b_with_zero:
            b = np.zeros((l_dims[i], 1))
        else:
            b = np.random.randn(l_dims[i], 1)*0.01
        W.append(w)
        B.append(b)
    params = {
        'W': W,
        'B': B
    }
    return params

In [262]:
def forward(X, params):
    caches = []
    A = X
    L = len(l_dims)
    for l in range(1, L):
        W = params['W'][l-1]
        B = params['B'][l-1]
        Z = np.dot(W, A) + B
        
        if l == L - 1:
            A = sigmoid(Z)
        else:
            A = relu(Z)
        caches.append((Z, A))
    return A, caches

In [263]:
def cost(A, Y, m):
    log_A = np.log(A)
    x = np.dot(Y, log_A) + np.dot((1-Y), (1-log_A))
    return -x/m

In [264]:
def backward(dA, caches, params, l_dims, m, alpha=0.0075):
    L = len(l_dims)
    for l in reversed(range(L)):
        if l ==0 :
            continue
        if l == L-1:
            derivative = sigmoid_derivative
        else:
            derivative = relu_derivative
        
        Z = caches[l-1][0]
        A_prev = caches[l-2][1]
        dZ = dA*derivative(Z)
        dW = np.dot(dZ, A_prev.T) / m
        dB = np.sum(dZ, axis=1, keepdims=True)/m
        dA = np.dot(params['W'][l-1].T, dZ)
        params['W'][l-1] = params['W'][l-1] - alpha*dW
        params['B'][l-1] = params['B'][l-1] - alpha*dB
    return True

In [271]:
def train(x_train, y_train, l_dims, params, v = 300):
    costs = []
    for i in range(iter_num):
        A, caches = forward(x_train, params)
        costs.append(cost(A.T, y_train.T, x_train.shape[1]))
        dA = y_train - A[0].reshape(A.shape[1], 1)
        backward(dA.T, caches, params,  l_dims,x_train.shape[0])
    return costs

In [289]:
data = iris.data[:100]
target = iris.target[:100]
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]
data, target = unison_shuffled_copies(data, target)

x_train = np.array(data[:60])
y_train = np.array(target[:60]).reshape(60, 1)
x_test = np.array(data[61:])
y_test = np.array(target[61:]).reshape(39, 1)
m = x_train.shape[0]

l_dims = [x_train.shape[1],4, 3, 1]
params = initialize_params(l_dims, True, x_train.shape[1])
costs = train(x_train.T, y_train, l_dims, params, iter_num=250)

print(costs)

A, caches = forward(x_test.T, params)


[array([[-0.65908373]]), array([[-0.6589587]]), array([[-0.6588329]]), array([[-0.65870634]]), array([[-0.658579]]), array([[-0.65845089]]), array([[-0.65832201]]), array([[-0.65819235]]), array([[-0.65806191]]), array([[-0.65793068]]), array([[-0.65779867]]), array([[-0.65766586]]), array([[-0.65753226]]), array([[-0.65739786]]), array([[-0.65726266]]), array([[-0.65712665]]), array([[-0.65698983]]), array([[-0.65685221]]), array([[-0.65671376]]), array([[-0.6565745]]), array([[-0.65643442]]), array([[-0.65629351]]), array([[-0.65615178]]), array([[-0.65600922]]), array([[-0.65586582]]), array([[-0.65572158]]), array([[-0.65557651]]), array([[-0.65543059]]), array([[-0.65528382]]), array([[-0.65513621]]), array([[-0.65498775]]), array([[-0.65483843]]), array([[-0.65468825]]), array([[-0.65453721]]), array([[-0.65438531]]), array([[-0.65423254]]), array([[-0.65407891]]), array([[-0.6539244]]), array([[-0.65376902]]), array([[-0.65361276]]), array([[-0.65345562]]), array([[-0.6532976]])