# Part 2: Logistic Regression with a Neural Network mindset

## 1 - Packages

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
# from lr_utils import load_dataset

#  % matplotlib inline

### File: lr_utils, function: load_dataset()

In [None]:
def load_dataset():
    train_dataset = h5py.File("datasets/train_catvnoncat.h5", "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])
    
    test_dataset = h5py.File("datasets/test_catvnoncat.h5", "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])
    
    classes = np.array(test_dataset["list_classes"][:])
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

## 2 - Overview of the Problem set

In [None]:
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()

In [None]:
# Example of a picture
index = 25
plt.imshow(train_set_x_orig[index])
plt.show()
print("y = " + str(train_set_y[:, index]) + ", it's a '" + 
      classes[np.squeeze(train_set_y[:, index])].decode("utf-8") + "' picture")

In [None]:
m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig.shape[1]

print ("Number of training examples: m_train = " + str(m_train))
print ("Number of testing examples: m_test = " + str(m_test))
print ("Height/Width of each image: num_px = " + str(num_px))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_set_x shape: " + str(train_set_x_orig.shape))
print ("train_set_y shape: " + str(train_set_y.shape))
print ("test_set_x shape: " + str(test_set_x_orig.shape))
print ("test_set_y shape: " + str(test_set_y.shape))

In [None]:
train_set_x_flatten = train_set_x_orig.reshape(m_train, -1).T
test_set_x_flatten = test_set_x_orig.reshape(m_test, -1).T

print ("train_set_x_flatten shape: " + str(train_set_x_flatten.shape))
print ("train_set_y shape: " + str(train_set_y.shape))
print ("test_set_x_flatten shape: " + str(test_set_x_flatten.shape))
print ("test_set_y shape: " + str(test_set_y.shape))
print ("sanity check after reshaping: " + str(train_set_x_flatten[0:5,0]))

In [None]:
train_set_x = train_set_x_flatten / 255.
test_set_x = test_set_x_flatten / 255.

## 3 - General Architecture of the learning algorithm


\begin{equation}
\mathcal{L} (a^{(i)}, y^{(i)}) = - y^{(i)} log (a^{(i)} - (1- y^{(i)}) log(1- a^{(i)}) 
\end{equation}

\begin{equation}
J = \frac{1}{m} \sum_{i=1}^{m} \mathcal{L} (a^{(i)}, y^{(i)}    ) 
\end{equation}

1. 初始化参数
2. 通过最小化代价函数来学习参数
3. 使用学习到的参数来对测试集进行预测
4. 分析结果

## 4 - Building the parts of our algorithm

### 4.1 Helper functions

In [None]:
def sigmoid(z):
    s = 1.0 / (1 + np.exp(-z))
    return s

# print ("sigmoid([0, 2]) = " + str(sigmoid(np.array([0,2]))))

### 4.2 Initializing parameters

In [None]:
def initialize_with_zeros(dim):
    """
    Create a vector of zeros of shape (dim, 1) for w 
    and initialize b to 0.
    
    Argument:
        dim -- size of the w vector we want (or number of parameters int this case)
        
    Returns:
        w -- initialized vector of shape (dim, 1)
        b -- initialized scalar (corresponds to the bias)
        """
    w = np.zeros((dim, 1)) # double brackets
    b = 0
    
    assert(w.shape == (dim, 1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return w, b

# dim = 2
# w, b = initialize_with_zeros(dim)
# print ("w = " + str(w))
# print ("b = " + str(b))

### 4.3 - 前向和反向传播

\begin{equation}
\frac{\partial{J}}{\partial{\omega}} =  \frac{1}{m} X {(A-Y)}^T
\end{equation}

\begin{equation}
\frac{\partial{J}}{\partial{b}} =  \frac{1}{m} \sum_{i=1}^{m} ( a^{(i)} - y^{(i)})
\end{equation}

In [None]:
def propagate(w, b, X, Y):
    """
    Implement the cost function and its gradient for the propagation explained above.
    
    Returns:
        cost -- J, negative log-likelihood cost for logistic regression
        dw -- gradient for the loss with respect to w, thus same shape as w
        db -- gradient for the loss with respect to b, thus same shape as b
    """
    # Forward 
    m = X.shape[1]
    Z = np.dot(w.T, X) + b
    A = sigmoid(Z)
    cost = -(1.0/m) * np.sum(Y*np.log(A) + (1-Y)*np.log(1-A))
    
    # Backward
    dw = (1.0/m) * np.dot(X, (A-Y).T)
    db = (1.0/m) * np.sum(A-Y)
    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    grads = {"dw": dw, 
            "db": db}
    
    return grads, cost
    
# w, b, X, Y = np.array([[1.],[2.]]), 2., np.array([[1.,2.,-1.],[3.,4.,-3.2]]), np.array([[1,0,1]])
# grads, cost = propagate(w, b, X, Y)
# print ("dw = " + str(grads["dw"]))
# print ("db = " + str(grads["db"]))
# print ("cost = " + str(cost))     

### 4.4 - Optimization

In [None]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost=False):
    """
    This function optimizes w and b by running a gradient descent algorithm

    Arguments:
        num_iterations -- number of iterations of the optimization loop
        learning_rate -- learning rate of the gradient descent update rule
        print_cost -- True to print the loss every 100 steps

    Returns:
        params -- dictionary containing the weights w and bias b
        grads -- dictionary containing the gradients of the weights and bias 
                with respect to the cost function
        costs -- list of all the costs computed during the optimization, 
                this will be used to plot the learning curve.
    """
    
    costs = []
    
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        
        dw = grads["dw"]
        db = grads["db"]
        w = w - learning_rate * dw
        b = b - learning_rate * db
        
        if i % 100 == 0:
            costs.append(cost)
            
        if print_cost and i % 100 == 0:
            print("Cost after iterition {}: {}".format(i, cost))
            
    params = {"w": w, 
             "b": b}
    
    grads = {"dw": dw,
            "db": db}
    
    return params, grads, costs


# params, grads, costs = optimize(w, b, X, Y, n
#                                 um_iterations= 500, 
#                                 learning_rate = 0.009, 
#                                 print_cost = False)
# print ("w = " + str(params["w"]))
# print ("b = " + str(params["b"]))
# print ("dw = " + str(grads["dw"]))
# print ("db = " + str(grads["db"]))
# print ("costs = " + str((np.array(costs)).round(3)))

### predict

In [None]:
def predict(w, b, X):
    """
    Return:
        Y_prediction -- a numpy array (vector) containing all predictions (0/1) 
            for the examples in X. 
    """
    
    m = X.shape[1]
    Y_predictions = np.zeros((1, m))
    w = w.reshape((X.shape[0], 1))
    
    Z = np.dot(w.T, X) + b
    A = sigmoid(Z)
    
    for i in range(m):
        if A[0, i] > 0.5:
            Y_predictions[0, i] = 1
        else:
            Y_predictions[0, i] = 0
    
    assert(Y_predictions.shape == (1, m))
    
    return Y_predictions


# w = np.array([[0.1124579],[0.23106775]])
# b = -0.3
# X = np.array([[1.,-1.1,-3.2],[1.2,2.,0.1]])
# print ("predictions = " + str(predict(w, b, X)))             

## 5 - Merge all funcions into a model

In [None]:
def model(X_train, Y_train, X_test, Y_test, 
          num_iterations=2000, learning_rate=0.5, print_cost=False):
    """
    Returns:
        d -- dictionary containing information about the model.
    """
    w, b = initialize_with_zeros(X_train.shape[0])
    
    parameters, grads, costs = optimize(w, b, X_train, Y_train, 
                                        num_iterations, learning_rate, print_cost)
    
    w = parameters["w"]
    b = parameters["b"]
    
    
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)
    
    print("训练准确率：{} %".format(100 - np.mean(np.abs(Y_prediction_train-Y_train)) * 100))
    print("测试准确率: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
    
    d = {"costs": costs,
        "Y_prediction_test": Y_prediction_test,
        "Y_prediction_train": Y_prediction_train,
        "w": w,
        "b": b,
        "learning_rate": learning_rate,
        "num_iterations": num_iterations}
    
    return d


d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)

In [None]:
# Example of a picture that was wrongly classified.
index = 1
plt.imshow(test_set_x[:,index].reshape((num_px, num_px, 3)))
plt.show()
# print ("y = " + str(test_set_y[0,index]) 
#        + ", you predicted that it is a \"" 
#        + classes[d["Y_prediction_test"][0,index]].decode("utf-8") 
#        +  "\" picture.")

print ("y = " + str(test_set_y[0,index])
       + ", you predicted that it is a \""
       + classes[int(d["Y_prediction_test"][0:, index])].decode("utf-8")
       +  "\" picture.")


In [None]:
costs = np.squeeze(d['costs'])
plt.plot(costs)
plt.ylabel("cost")
plt.xlabel("iterations (per hundreds)")
plt.title("Learning rate = " + str(d['learning_rate']))
plt.show()

## 6 - Further analysis (optional)

In [None]:
learning_rates = [0.01, 0.001, 0.0001]
models = {}
for rate in learning_rates:
    print("learning rate: {}".format(rate))
    models[str(rate)] = model(train_set_x, train_set_y, test_set_x, test_set_y,
                                      num_iterations=1500, learning_rate=rate,
                                      print_cost=False)
    print ('\n' + "-------------------------------------------------------" + '\n')
    
for rate in learning_rates:
    plt.plot(np.squeeze(models[str(rate)]["costs"]), label=str(models[str(rate)]["learning_rate"]))
    
plt.ylabel("cost")
plt.xlabel("iterations")

legend = plt.legend(loc='upper center', shadow=True)
frame = legend.get_frame()
frame.set_facecolor('0.9')
plt.show()

## 7- Test with your own image (optional0)

In [None]:
my_image = "isacatornot.jpg"
fname = "images/" + my_image
image = np.array(ndimage.imread(fname, flatten=False))
# my_image = scipy.misc.imresize(image, size=(num_px, num_px)).reshape((1, num_px**2*3)).T
    
my_image = scipy.misc.imresize(image, size=(num_px, num_px)).reshape((num_px**2*3, 1))
my_predicted_image = predict(d["w"], d["b"], my_image)

plt.imshow(image)
plt.show()
print("y = " + str(np.squeeze(my_predicted_image)) 
      + ", your algorithm predicts a \"" 
      + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") 
      +  "\" picture.")