In [2]:
import numpy as np
import tensorflow as tf #importing tensorflow to load mnist dataset from tf.keras

In [3]:
print(tf.__version__)

2.0.0-rc0


In [4]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
train_filter = np.where((y_train == 0 ) | (y_train == 1))
test_filter = np.where((y_test == 0) | (y_test == 1))   #using only 1 and 0 for dataset

In [7]:
x_train, y_train = x_train[train_filter], y_train[train_filter]
x_test, y_test = x_test[test_filter], y_test[test_filter]

In [8]:
x_train

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [9]:
y_train

array([0, 1, 1, ..., 1, 0, 1], dtype=uint8)

In [10]:
x_train.shape

(12665, 28, 28)

In [11]:
m=x_train.shape[0]

In [12]:
m

12665

In [15]:
m_test=x_test.shape[0]

In [16]:
m_test

2115

In [22]:
x_train_flatten = x_train.reshape(x_train.shape[0], -1).T 
x_test_flatten = x_test.reshape(x_test.shape[0], -1).T 

In [23]:
x_train_flatten.shape

(784, 12665)

In [24]:
x_train_set=x_train_flatten/255
x_test_set=x_test_flatten/255

In [25]:
def sigmoid(z):
    s = 1/(1+np.exp(-z))
    return s
    

In [26]:
def initialize_with_zeros(dim):
    w=np.zeros((dim,1))
    b=0
    
    assert(w.shape == (dim, 1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return w, b

In [27]:
def propagte(w,b,X,Y):
    
    m=X.shape[1]
    A=sigmoid(np.dot(w.T,X)+b)
    cost=-(1/m)*np.sum((Y*np.log(A)+(1-Y)*np.log(1-A)))
    
    dw = (1 / m) * np.dot(X, (A - Y).T)
    db = (1 / m) * np.sum(A - Y)
    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

In [40]:

def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    
    costs=[]
    
    
    for i in range(num_iterations):
        grads,cost=propagte(w,b,X,Y)
        dw=grads["dw"]
        db=grads["db"]
        
        w=w-learning_rate*dw
        b=b-learning_rate*db
        
        if i%10==0:
            costs.append(cost)
        
        if print_cost and i % 10 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs
    

In [41]:
def predict(w,b,X):
    
    m=X.shape[1]
    
    Y_prediction=np.zeros((1,m))
    
    w=w.reshape(X.shape[0],1)
    
    A=sigmoid(np.dot(w.T,X)+b)
    
    Y_prediction= A>=0.5
    
    assert(Y_prediction.shape==(1,m))
    
    return Y_prediction

In [42]:
w = np.array([[0.1124579],[0.23106775]])
b = -0.3
X = np.array([[1.,-1.1,-3.2],[1.2,2.,0.1]])
print ("predictions = " + str(predict(w, b, X)))

predictions = [[ True  True False]]


In [45]:
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
    
    w,b=initialize_with_zeros(X_train.shape[0])
    
    params,grads,cost=optimize(w,b,X_train,Y_train,num_iterations,learning_rate,print_cost=False)
    
    w=params["w"]
    b=params["b"]
    
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)
    
    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    
    d = {"costs": cost,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d
    

In [46]:
d=model(x_train_set,y_train,x_test_set,y_test,num_iterations=2000,learning_rate=0.5,print_cost=True)

train accuracy: 89.90130280300039 %
test accuracy: 99.95271867612293 %
