In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
from sklearn.datasets import load_breast_cancer as load_dataset
from sklearn import preprocessing

In [2]:
split = .8;
data = load_dataset()
split = int(len(data.data) * .8)
train_set_x_orig = data.data[:int(len(data.data) * .8)]
train_set_y_orig = data.target[:int(len(data.data) * .8)]
test_set_x_orig = data.data[int(len(data.data) * .8):]
test_set_y_orig = data.target[int(len(data.data) * .8):]

In [3]:
m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig.shape[1]

In [4]:
#scale data to [0,1]
min_max_scaler = preprocessing.MinMaxScaler()
X_train = min_max_scaler.fit_transform(train_set_x_orig)
X_train = X_train.reshape(num_px, m_train)
X_test = min_max_scaler.fit_transform(test_set_x_orig)
X_test = X_test.reshape(num_px, m_test)
y_train = np.expand_dims(train_set_y_orig, axis=0)
y_test = np.expand_dims(test_set_y_orig, axis=0)

In [5]:
# Sigmoid of Z
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def initialize(dim):
    w = np.expand_dims(np.random.rand(dim), axis = 1)
    b = 0
    return w, b

def propagate(w, b, X, Y):

    m = X.shape[1]
    A = sigmoid(np.dot(w.T, X) + b)    
    cost = (-1/m) * (np.sum((Y * np.log(A)) + ((1 - Y) * np.log(1 - A))))   
    
    dw = (1/m) * np.dot(X, (np.subtract(A,Y)).T)
    db = (1/m) * np.sum(A - Y)

    
    cost = np.squeeze(cost)

    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    
    costs = []
    
    for i in range(num_iterations):
        
        grads, cost = propagate(w, b, X, Y)

        dw = grads["dw"]
        db = grads["db"]
        
        w = w - (learning_rate * dw)
        b = b - (learning_rate * db)
        
        if i % 100 == 0:
            costs.append(cost)
        
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

def predict(w, b, X):
    
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)
    
    A = sigmoid(np.dot(w.T, X) + b)
    
    A[A > 0.5] = 1
    A[A <= 0.5] = 0
    Y_prediction = A

    return Y_prediction

def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):

    w, b = initialize(X_train.shape[0])

    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
    
    w = parameters["w"]
    b = parameters["b"]
    
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)

    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d

d = model(X_train, y_train, X_test, y_test, num_iterations = 10000, learning_rate = 0.001, print_cost = True)

Cost after iteration 0: 1.552904
Cost after iteration 100: 1.510827
Cost after iteration 200: 1.469437
Cost after iteration 300: 1.428791
Cost after iteration 400: 1.388949
Cost after iteration 500: 1.349972
Cost after iteration 600: 1.311922
Cost after iteration 700: 1.274862
Cost after iteration 800: 1.238855
Cost after iteration 900: 1.203961
Cost after iteration 1000: 1.170238
Cost after iteration 1100: 1.137742
Cost after iteration 1200: 1.106522
Cost after iteration 1300: 1.076623
Cost after iteration 1400: 1.048082
Cost after iteration 1500: 1.020929
Cost after iteration 1600: 0.995186
Cost after iteration 1700: 0.970865
Cost after iteration 1800: 0.947969
Cost after iteration 1900: 0.926491
Cost after iteration 2000: 0.906414
Cost after iteration 2100: 0.887714
Cost after iteration 2200: 0.870357
Cost after iteration 2300: 0.854301
Cost after iteration 2400: 0.839498
Cost after iteration 2500: 0.825894
Cost after iteration 2600: 0.813430
Cost after iteration 2700: 0.802044
Cost