In [2]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
from lr_utils import load_dataset
from IPython.core.debugger import set_trace
%matplotlib inline

In [27]:
# Loading the data (cat/non-cat)
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig.shape[1]
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_set_x = train_set_x_flatten/255.
test_set_x = test_set_x_flatten/255.

$sigmoid( w^T x + b) = \frac{1}{1 + e^{-(w^T x + b)}}$ 
<img src="images/320px-Logistic-curve.svg.png">

In [4]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [17]:
def weight_initialization(n_feature):
    w = np.zeros((n_feature, 1))
    b = 0
    return w, b

$$A = \sigma(w^T X + b) = (a^{(1)}, a^{(2)}, ..., a^{(m-1)}, a^{(m)})$$
$$J = -\frac{1}{m}\sum_{i=1}^{m}y^{(i)}\log(a^{(i)})+(1-y^{(i)})\log(1-a^{(i)})$$
$$ \frac{\partial J}{\partial w} = \frac{1}{m}X(A-Y)^T$$
$$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{i=1}^m (a^{(i)}-y^{(i)})$$

In [44]:
def propagate(w, b, X, Y):
    
    m = X.shape[1] # number of samples
    print("w.T shape:"+str(w.T.shape))
    print("b:"+str(b))
    #print("X shape:"+str(X.shape))
    activation = sigmoid(np.dot(w.T, X) + b) #
    print("activation shape:"+str(activation.shape))
    cost = -(np.sum((Y * np.log(activation))+((1 - Y) * np.log(1 - activation))))/m 
    dw = np.dot(X , (activation - Y).T)/m
    db = np.sum(activation - Y)/m
    print("db shape:"+str(db.shape))
    print("db:"+str(db))
    assert(db.dtype == float)
 
    grads = {
        "dw":dw,
        "db":db
    }
    
    return grads, cost

In [13]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
     
    costs = None
    dw = None
    db = None
    
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        
        dw = grads["dw"]
        db = grads["dw"]
        
        w = w - learning_rate * dw
        b = b - learning_rate * db
    
    params = {
        "w":w,
        "b":b
    }
    
    grads = {
        "dw":dw,
        "db":db
    }
    return params, grads, costs

In [5]:
def perdict(w, b, X):
    Y_prediction = None
    return Y_prediction

In [23]:
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.005, print_cost = False):
    
    n_features = X_train.shape[0] # 209
    w, b = weight_initialization(n_features)
    optimize(w, b, X_train, Y_train, num_iterations, learning_rate)
    
    costs = None
    Y_prediction_test = None
    Y_prediction_train = None
    w = None
    b = None
    
    d = {"costs": costs,
     "Y_prediction_test": Y_prediction_test, 
     "Y_prediction_train" : Y_prediction_train, 
     "w" : w, 
     "b" : b,
     "learning_rate" : learning_rate,
     "num_iterations": num_iterations}
    
    return d

In [45]:
model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)

w.T shape:(1, 12288)
b:0
activation shape:(1, 209)
db shape:()
db:0.15550239234449761
w.T shape:(1, 12288)
b:[[-0.00023604]
 [-0.00031499]
 [-0.00024618]
 ...
 [-0.00025373]
 [-0.00031063]
 [-0.00016226]]
activation shape:(12288, 209)
db shape:()
db:-1888.4753068641428
w.T shape:(12288, 12288)
b:[[ 1.67668245e-04  1.67688877e-04  1.67670893e-04 ...  1.67672867e-04
   1.67687737e-04  1.67648961e-04]
 [ 2.71140712e-05  2.71351066e-05  2.71167710e-05 ...  2.71187833e-05
   2.71339442e-05  2.70944097e-05]
 [ 9.39655865e-05  9.39836167e-05  9.39679006e-05 ...  9.39696254e-05
   9.39826204e-05  9.39487339e-05]
 ...
 [ 5.00422763e-05  5.00601301e-05  5.00445678e-05 ...  5.00462757e-05
   5.00591436e-05  5.00255886e-05]
 [-5.10511161e-05 -5.10324684e-05 -5.10487228e-05 ... -5.10469389e-05
  -5.10334988e-05 -5.10685459e-05]
 [ 1.41142553e-04  1.41156776e-04  1.41144379e-04 ...  1.41145739e-04
   1.41155990e-04  1.41129259e-04]]


ValueError: operands could not be broadcast together with shapes (12288,209) (12288,12288) 

In [22]:
train_set_x.shape

(12288, 209)