In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def softmax(z):
    z -= np.max(z)
    sm = (np.exp(z).T / np.sum(np.exp(z), axis =1))
    return(sm)

In [3]:
def initialize(dim1, dim2):
    """
        :param dim: size of vector w initialized with zeros
        
    """
    w = np.zeros(shape =(dim1, dim2))
    b = np.zeros(shape =(10, 1))
    return(w,b)

In [4]:
def propagate(w, b, X, Y):
    """
        :param w: weights for w
        :param b: bias
        :param X: size of data(no. of features, no. of examples)
        :param Y: true label
    """
    m = X.shape[1] # getting number of row
    
    ## Forward propagation
    A = softmax((np.dot(w.T, X) + b).T)
    cost = (-1/m) * np.sum(Y * np.log(A))
    
    ## Backward Propagation
    dw = (1/m) * np.dot(X, (A - Y).T)
    db = (1/m) * np.sum(A - Y)
    
    cost = np.squeeze(cost)
    grads = {"dw": dw,
              "db":db}
    return(grads, cost)
    

In [5]:
def optimize(w, b, X, Y, num_iters, alpha, print_cost = False):
    """
        :param w: weight for w
        :param b: bias
        :param X: size of data(number of features, number of examples)
        :param Y: true label
        :param num_iters: number of iterations for gradient
        :param alpha:
        :return:
        
    """
    costs = []
    for i in range(num_iters):
        grads, cost = propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]
        w = w - alpha * dw
        b = b - alpha * db
        
        # Record the costs
        if i%50 == 0:
            costs.append(cost)
        
        # Print the cost every 100 training examples
        if print_cost and i%50 == 0:
            print("Cost after iteration %i: %f" %(i, cost))
            
            
    params = {"w": w,
             "b": b} 
    
    grads = {"dw": dw,
            "db": db}
    
    return(params,grads,costs)        
        

In [6]:
def predict(w, b, X):
    """
        :param w:
        :param b:
        :param X:
    """
    
    y_pred = np.argmax(softmax((np.dot(w.T, X) + b) .T), axis = 0)
    return(y_pred)

In [7]:
def model(X_train, Y_train, Y ,X_test, Y_test, num_iters, alpha, print_cost):
    
    """
        :param X_train:
        :param Y_train:
        :param X_test:
        :param Y_test:
        :param num_iterations:
        :param learning_rate:
        :param print_cost:
    """
    
    print("Logistic Regression Result")
    w,b = initialize(X_train.shape[0], Y_train.shape[0])
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iters, alpha, print_cost)
    
    w = parameters["w"]
    b = parameters["b"]
    
    y_prediction_train = predict(w, b, X_train)
    y_prediction_test = predict(w, b, X_test)
    print("Train_accuracy: {} %", sum(y_prediction_train == Y) /(float(len(Y))) * 100)
    print("Test accuracy: {} %", sum(y_prediction_test == Y_test) / (float(len(Y_test))) *100)
    
    d = {"costs": costs,
       "Y_prediction_test": y_prediction_test,
       "Y_prediction_train": y_prediction_train,
       "w": w,
       "b": b,
       "learning_rate": alpha,
       "num_iterations": num_iters}
    
    return(d)   

In [8]:
def pri(X_test, y_prediction_test):
    example = X_test[2,:]
    print("Prediction for the example is ", y_prediction_test[2])
    plt.imshow(np.reshape(example, [28,28]))
    plt.plot()
    plt.show()