In [21]:
import numpy as np
np.random.seed(1)
X=np.random.randn(2,3)
print(X)

[[ 1.62434536 -0.61175641 -0.52817175]
 [-1.07296862  0.86540763 -2.3015387 ]]


In [22]:
def sigmoid(z):
        s=1/(1+np.exp(-z))
        return s

In [23]:
s=sigmoid(X)
print(s)

[[0.83539354 0.35165864 0.3709434 ]
 [0.25483894 0.70378922 0.09099561]]


In [24]:
def tan(z):    
    s = (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))
    return s

In [25]:
t=tan(X)
print(t)

[[ 0.92525207 -0.5453623  -0.48398233]
 [-0.79057703  0.69903334 -0.98015695]]


In [26]:
def relu(z):
    s = np.maximum(0,z)
    return s

In [27]:
r=relu(X)
print(r)

[[1.62434536 0.         0.        ]
 [0.         0.86540763 0.        ]]


In [28]:
def lrelu(z):
    s = np.maximum(0.01,z)
    return s

In [29]:
ler=lrelu(X)
print(ler)

[[1.62434536 0.01       0.01      ]
 [0.01       0.86540763 0.01      ]]


In [30]:
def softmax(vector):
    e = np.exp(vector)
    s= e / e.sum()
    return s

In [31]:
sof=softmax(X)
print(sof)

[[0.56232214 0.06009779 0.06533695]
 [0.03789279 0.26325869 0.01109163]]


In [32]:
##derivatives
def dsig(s):
    das = (s)*(1-s)
    return das

In [33]:
ds=dsig(s)
print(ds)

[[0.13751118 0.22799484 0.23334439]
 [0.18989606 0.20846995 0.08271541]]


In [34]:
def dthan(s):
    dat = (1-s**2)
    return dat

In [35]:
dt=dthan(t)
print(t)

[[ 0.92525207 -0.5453623  -0.48398233]
 [-0.79057703  0.69903334 -0.98015695]]


In [36]:
def drelu(s):
    dar=(np.int64(s>0))
    return dar

In [37]:
dr = drelu(r)
print(dr)

[[1 0 0]
 [0 1 0]]


In [38]:
def dlrelu(s,alpha=0.01):
    dal=np.where(s >0, 1, alpha)
    return dal

In [39]:
dler=dlrelu(ler,0.01)
print(dler)

[[1. 1. 1.]
 [1. 1. 1.]]


# Neural network model

In [None]:
#1.Input X and Y
np.random.seed(1)
X=np.random.randn(2,3)
Y=(np.random.randn(1,3)>0)

In [41]:
#2.Define shape
def layer_sizes(X, Y,layers):
    n_x=X.shape[0]
    n_h=layers
    n_y=Y.shape[0]
    return (n_x,n_h,n_y)

In [42]:
n_x,n_h,n_y = layer_sizes(X, Y,4)

In [43]:
print(n_x,n_h,n_y)

2 4 1


In [44]:
#3. Initialize the parameters
def initialize_parameters(n_x, n_h, n_y):
    np.random.seed(2)
    
    W1 = np.random.randn(n_h,n_x)*0.01
    b1 = np.zeros((n_h,1))          
    W2 = np.random.randn(n_y,n_h)*0.01   
    b2 = np.zeros((n_y,1))            

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    return parameters

In [45]:
parameters=initialize_parameters(n_x, n_h, n_y)
print(parameters)

{'W1': array([[-0.00416758, -0.00056267],
       [-0.02136196,  0.01640271],
       [-0.01793436, -0.00841747],
       [ 0.00502881, -0.01245288]]), 'b1': array([[0.],
       [0.],
       [0.],
       [0.]]), 'W2': array([[-0.01057952, -0.00909008,  0.00551454,  0.02292208]]), 'b2': array([[0.]])}


In [46]:
#4. Forward Propagation
def forward_propagation(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 = np.dot(W1,X)+b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2,A1)+b2
    A2 = sigmoid(Z2)

    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return A2, cache

In [47]:
A2, cache=forward_propagation(X, parameters)
print(A2, cache)

[[0.5002307  0.49985831 0.50023963]] {'Z1': array([[-0.00616586,  0.00206261,  0.0034962 ],
       [-0.05229879,  0.02726335, -0.02646868],
       [-0.0200999 ,  0.00368691,  0.02884556],
       [ 0.02153008, -0.01385323,  0.02600471]]), 'A1': array([[-0.00616578,  0.0020626 ,  0.00349619],
       [-0.05225116,  0.02725659, -0.0264625 ],
       [-0.0200972 ,  0.00368689,  0.02883756],
       [ 0.02152676, -0.01385234,  0.02599885]]), 'Z2': array([[ 0.00092281, -0.00056678,  0.00095853]]), 'A2': array([[0.5002307 , 0.49985831, 0.50023963]])}


In [48]:
#5.Compute Cost
def compute_cost(A2, Y, parameters):
    m=Y.shape[1]
    logprobs = np.multiply(np.log(A2),Y)+np.multiply(np.log(1-A2),(1-Y))
    cost = -np.sum(logprobs)/m
    cost = float(np.squeeze(cost))
    return cost

In [49]:
compute_cost(A2, Y, parameters)

0.6927392477233995

In [50]:
#6. Backward Propagation
def BWP(parameters,cache,X,Y):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]

    A1=cache["A1"]
    A2=cache["A2"]

    m=Y.size

    dZ2=A2-Y
    dW2=np.dot(dZ2,A1.T)/m
    db2=np.sum(dZ2,axis=1,keepdims=True)/m
    dA1=np.dot(W2.T,dZ2)
    dZ1=dA1*(1-np.power(A1,2))
    dW1=np.dot(dZ1,X.T)/m
    db1=np.sum(dZ1,axis=1,keepdims=True)/m

    grades={"dW2":dW2,"db2":db2,"dW1":dW1,"db1":db1}

    return grades


In [51]:
grades=BWP(parameters,cache,X,Y)
print(grades)

{'dW2': array([[ 0.00078841,  0.01765429, -0.00084166, -0.01022527]]), 'db2': array([[-0.16655712]]), 'dW1': array([[ 0.00301023, -0.00747267],
       [ 0.00257968, -0.00641288],
       [-0.00156892,  0.003893  ],
       [-0.00652037,  0.01618243]]), 'db1': array([[ 0.00176201],
       [ 0.00150995],
       [-0.00091736],
       [-0.00381422]])}


In [52]:
#7.Update Grades
def update(parameters,grades,lr=0.01):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]

    dW1=grades["dW1"]
    db1=grades["db1"]
    dW2=grades["dW2"]
    db2=grades["db2"]


    W1=W1-lr*dW1
    b1=b1-lr*db1
    W2=W2-lr*dW2
    b2=b2-lr*db2

    parameters={"W1":W1,"b1":b1,"W2":W2,"b2":b2}

    return parameters


In [53]:
parameters=update(parameters,grades,lr=0.01)
print(parameters)

{'W1': array([[-0.00419768, -0.00048794],
       [-0.02138776,  0.01646684],
       [-0.01791867, -0.0084564 ],
       [ 0.00509402, -0.01261471]]), 'b1': array([[-1.76201370e-05],
       [-1.50994736e-05],
       [ 9.17363463e-06],
       [ 3.81421789e-05]]), 'W2': array([[-0.01058741, -0.00926662,  0.00552296,  0.02302433]]), 'b2': array([[0.00166557]])}


In [54]:
#8.creat own NN
def NN(X,Y,layers,itr=10000,print_cost=False):
    np.random.seed(3)
    n_x=layer_sizes(X, Y,layers)[0]
    n_y=layer_sizes(X, Y,layers)[2]
    n_h=layer_sizes(X, Y,layers)[1]

    parameters=initialize_parameters(n_x, n_h, n_y)

    for i in range (0,itr):
        A2, cache = forward_propagation(X, parameters)
        cost=compute_cost(A2, Y, parameters)
        grades=BWP(parameters,cache,X,Y)
        parameters=update(parameters,grades,lr=0.01)

        if print_cost and i%100==0:
            print("cost % i:%f" %(i,cost))

    return parameters


In [55]:
NN(X,Y,4,itr=1000,print_cost=True)

cost  0:0.692739
cost  100:0.669215
cost  200:0.649677
cost  300:0.619729
cost  400:0.555289
cost  500:0.447120
cost  600:0.328614
cost  700:0.234149
cost  800:0.169726
cost  900:0.127562


{'W1': array([[-0.21830665,  0.43559748],
        [-0.38633057,  0.73732683],
        [ 0.11624791, -0.28270074],
        [ 0.39639168, -0.77368548]]),
 'b1': array([[ 0.03393657],
        [ 0.09586648],
        [-0.01517238],
        [-0.10360464]]),
 'W2': array([[-0.52988031, -1.04262947,  0.31610115,  1.11019047]]),
 'b2': array([[0.34409584]])}