In [None]:
import numpy as np
from datetime import datetime
from ann_structure import *
from gradient_checking import dict_to_vector, approximate_gradient, gradient_difference, vector_to_dict

%load_ext autoreload
%autoreload 2

## Helper functions:

In [None]:
def predict(X, parameters):
    
    L = len(parameters) // 2
    m = X.shape[1]
    
    AL, caches = L_forward(X, parameters)
    
    S = AL.copy()
    max_prob = np.argmax(S, axis=0)
        
    for i in range(m):
        S[:, i] = 0
        S[max_prob[i], i] = 1
    
    return S


def accuracy(Y_hat, Y):
    
    assert(Y_hat.shape == Y.shape)
    
    m = Y.shape[1] # number of data entries
    
    w = 0
    c = 0
    
    A = Y_hat.T
    B = Y.T
    assert(A.shape == B.shape)
    for i in range(Y.shape[1]):
        if np.all(A[i] == B[i]):
            c += 1
        else:
            w += 1
    
    return c/m


def standard_score_rescaling(X):
    '''
    Rescales the features to their Z-score
    '''
    m = X.shape[1]
    mu = (1 / m) * np.sum(X, axis=1, keepdims=True)
    
    X = X - mu
    
    sigma = X.var(axis=1, keepdims=True)

    X = X / np.sqrt(sigma)
    
    return X

def feature_rescaling(X):
    '''
    Rescales the features to the interval [0, 1]
    '''
    mn = np.min(X, axis=1, keepdims=True)
    mx = np.max(X, axis=1, keepdims=True)
    
    X = (X - mn) / (mx - mn)
    
    return X

# MLP Implementation:

In [None]:
def L_layer_model(X, Y, layer_dims, learning_rate =0.05, num_iterations=60, print_cost=True, grad_check=False):
    
    costs = []
    
    parameters = init_params(layer_dims, kind='small')
    
    for i in range(num_iterations):
    
        # forward pass:
        AL, caches = L_forward(X, parameters)

        # calculate the cost:
        cost = cost_total(AL, Y)
        costs.append(cost)

        # backward pass:
        grads = L_backward(AL, Y, caches)
            
        # (optional) gradient checking:
        #if grad_check:
        #    grad_approx = approximate_gradient(parameters, X, Y, epsilon=1e-7)
        #    grad_exact, _ = dict_to_vector(grads)
        #    dif = gradient_difference(grad_approx, grad_exact)
            
        
        # update parameters:
        parameters = update_parameters(parameters, grads, learning_rate)
        
        # (optional) print the cost:
        if print_cost and any([(i+1)%10==0, i == 0]):
            print('Cost after ' + str(i + 1) + ' iterations is ' + str(cost) + ' at ' + str(datetime.now()))
    
    return parameters

# Train and Test:

In [24]:
X = np.load('data/covtype_train_x.npy')
Y = np.load('data/covtype_train_y.npy')

In [28]:
layer_dims = (54, 100, 70, 70, 50, 30, 20, 10, 7)

In [None]:
params = L_layer_model(X, Y, layer_dims, learning_rate =0.3, num_iterations=1000, print_cost=True, grad_check=False)

In [None]:
AL, _ = L_forward(X, params)

for i in range(AL.shape[1]):
    AL[:, i][AL[:, i] < np.max(AL[:, i])] = 0
    AL[:, i][AL[:, i] == np.max(AL[:, i])] = 1

AL

In [None]:
np.all(AL[1, :])

In [None]:
out = predict(X, params)
out = out.squeeze()
a2 = accuracy(out, Y)

In [None]:
a1, a2