In [165]:
import numpy as np
import pprint
from sklearn.model_selection import train_test_split
np.set_printoptions(precision=4, linewidth=100, suppress=True)

In [166]:
def normalize(data, method='min-max'):
    
    '''
        Normalize `data` using `method`. Computes statistics assuming each sample is a row.

        Arguments:
            data:       data to be normalized
            method:     if 'min-max' then normalization is used, else standardization is used

        Returns:
            Normalized data   
    '''
    
    if (method == 'min-max'):
        numerator = data - np.min(data, axis=0)
        denominator = np.max(data, axis=0) - np.min(data, axis=0)
        return numerator/denominator
    
    if (method == 'standardization'):
        numerator = data - np.mean(data, axis=0)
        denominator = np.std(data, axis=0)
        return numerator/denominator
    
def segregate_target(data):

    '''
        Segregates `data` into (X, t) tuple where `X` has each example as a column and `t`
        is the corresponding class label.
    '''
    
    X = data[:, :-1]
    t = data[:, -1:]
    
    return X, t

def train_test_validation_split(X, t, test_ratio=0.33):

    '''
        Make use of sklearn's `train_test_split` to split `X` into train, test and validation sets.
    '''
    
    X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=test_ratio, random_state=42)
    X_valid, X_test, t_valid, t_test = train_test_split(X_test, t_test, test_size=0.5, random_state=42)
    
    data = {
        'X_train': X_train,
        't_train': t_train,
        'X_valid': X_valid,
        't_valid': t_valid,
        'X_test': X_test,
        't_test': t_test
    }
    
    return data

def load_data(path):

    '''
        Load .npy data specified at `path`.
    '''
    
    data = np.load(path)
    return data

In [240]:
class Model:
    
    def __init__(self, L_, n_, w_={}, b_={}, activation_='relu', learning_rate_=0.0001, max_iters_=1000):
        
        """
            A Model is a deep neural network whose configuration is contained in `n`. Its paramters are
            defined in `w` and biases in `b`. The activation functions for hidden layers can be specified as
            'relu' or 'sigmoid'. The output layer always uses 'sigmoid' activation.
            
            Arguments:
                L_:             number of layers in neural network (excluding input layer)
                n_:             neural network configuration, specifically `n_[i]` is the number of nodes in layer i
                w_:             weight parameters for each layer, specifically `w_[i]` is the weight matrix of layer i
                b_:             biases for each layer, specifically `b_[i]` is the bias vector for layer i
                activation_:    activation for each hidden layer
                learning_rate_: learning rate for updation of weights
                max_iters_:     maximum number of iterations during training
        """
        
        self.L = L_
        self.n = n_
        self.w = w_
        self.b = b_
        self.activation = activation_
        self.learning_rate = learning_rate_
        self.max_iters = max_iters_
        self.initialize_parameters()
        
    
    def initialize_parameters(self):
    
        # the ith hidden layer should be of shape (n[i - 1], n[i])
        for i in range(1, self.L + 1):
            self.w[i] = np.random.randn(self.n[i - 1], self.n[i])
            self.b[i] = np.random.randn(self.n[i], 1)
            
            
    def sigmoid(self, a):

        x = np.copy(a)
        x[x < -15] = -15
        x[x > 15] = 15

        return 1/(1 + np.exp(-x))
    

    def relu(self, a):

        x = np.copy(a)
        x[x < 0] = 0

        return x


    def activation_function(self, a):

        if activation == 'relu':
            return self.relu(a)

        if activation == 'sigmoid':
            return self.sigmoid(a)
        

    def derivative(self, a):

        if self.activation == 'relu':
            x = np.copy(a)
            x[x > 0] = 1
            x[x < 0] = 0
            return x

        if self.activation == 'sigmoid':
            return self.sigmoid(a)*self.sigmoid(1 - a)
        
    
    def forward_propagate(self, X):

        z = {}
        a = {}
        z[0] = X
        M = X.shape[1]
        for i in range(1, self.L):
            a[i] = np.matmul(self.w[i].T, z[i - 1]).reshape(self.n[i], M) + self.b[i]
            z[i] = self.activation_function(a[i])
        a[self.L] = np.dot(self.w[self.L].T, z[self.L - 1]) + self.b[self.L]
        z[self.L] = self.sigmoid(a[self.L])

        return a, z
    
    
    def back_propagate(self, a, z, t):

        delta = {}
        dw = {}
        db = {}

        delta[self.L] = z[self.L] - t
        dw[self.L] = np.matmul(z[self.L - 1], delta[self.L].T)
        db[self.L] = np.sum(delta[self.L], axis=1, keepdims=True)

        for i in range(self.L - 1, 0, -1):

            delta[i] = self.derivative(a[i])*np.matmul(self.w[i + 1], delta[i + 1])
            dw[i] = np.matmul(z[i - 1], delta[i].T)
            db[i] = np.sum(delta[i], axis=1, keepdims=True)

        return dw, db
    
    
    def update(self, dw, db):

        for i in range(1, self.L + 1):
            
            self.w[i] = self.w[i] - eta*dw[i]
            self.b[i] = self.b[i] - eta*db[i]
            
    
    def predict(self, X):
    
        a, z = self.forward_propagate(X)
        preds = np.copy(z[L])
        preds[preds > 0.5] = 1
        preds[preds < 0.5] = 0

        return preds
    
    def fit(self, X, t):
        
        for i in range(self.max_iters):
            
            a, z = self.forward_propagate(X)
            dw, db = self.back_propagate(a, z, t)
            self.update(dw, db)
            
        preds = self.predict(X)
        training_accuracy = calculate_accuracy(preds, t)
        
        return training_accuracy

In [167]:
data = load_data('data/data.npy')
data = normalize(data)
X = data[0]
t = X[-1]
X = X[:-1]

In [168]:
## Some global parameters

# number of hidden layers
L = 0
# number of nodes in each layer; n[i] is number of nodes in ith layer (numbering starts from 0)
n = []
# parameters w
w = {}
# parameters b
b = {}
# choice of activation function
activation = 'relu'
# learning rate
eta = 0.0001

In [235]:
def Models(L_, n_, eta_, activation_='relu'):
    
    global L, n, w, b, activation
    
    L = L_
    n = n_
    activation = activation_
    eta = eta_
    w = {}
    b = {}

In [170]:
def initialize_parameters():
    
    global L, n, w, b, activation
    
    # the ith hidden layer should be of shape (n[i - 1], n[i])
    for i in range(1, L + 1):
        w[i] = np.random.randn(n[i - 1], n[i])
        b[i] = np.random.randn(n[i], 1)
        # w[i] = np.ones((n[i - 1], n[i]))
        # b[i] = np.ones((n[i], 1))

In [180]:
def sigmoid(a):

    x = np.copy(a)
    x[x < -15] = -15
    x[x > 15] = 15
    
    return 1/(1 + np.exp(-x))

def relu(a):
    
    x = np.copy(a)
    x[x < 0] = 0
    
    return x


def activation_function(a):
    
    global activation
    
    if activation == 'relu':
        return relu(a)
    
    if activation == 'sigmoid':
        return sigmoid(a)
    
def derivative(a):
    
    global activation
    
    if activation == 'relu':
        x = np.copy(a)
        x[x > 0] = 1
        x[x < 0] = 0
        return x
    
    if activation == 'sigmoid':
        return sigmoid(a)*sigmoid(1 - a)

In [181]:
def forward_propagate(X):
    
    global L, n, w, b, activation
    
    z = {}
    a = {}
    z[0] = X
    M = X.shape[1]
    for i in range(1, L):
        a[i] = np.matmul(w[i].T, z[i - 1]).reshape(n[i], M) + b[i]
        z[i] = activation_function(a[i])
    a[L] = np.dot(w[L].T, z[L - 1]) + b[L]
    z[L] = sigmoid(a[L])
    
    return a, z

In [182]:
def back_propagate(a, z, t):
    
    global L, n, w, b, activation
    
    delta = {}
    dw = {}
    db = {}
    
    # print(z[L].shape, t.shape)
    
    delta[L] = z[L] - t
    #print('delta[',L,']:', '\n',delta[L])
    dw[L] = np.matmul(z[L - 1], delta[L].T)
    db[L] = np.sum(delta[L], axis=1, keepdims=True)
    #print('dw[',L,']:', '\n',dw[L])
    #print('db[',L,']:', '\n',db[L])
    
    for i in range(L - 1, 0, -1):
        
        delta[i] = derivative(a[i])*np.matmul(w[i + 1], delta[i + 1])
        #print('derivative: ', derivative(a[i]))
        #print('other term: ', np.matmul(w[i + 1], delta[i + 1]))
        #print('delta[',i,']:', '\n',delta[i])
        dw[i] = np.matmul(z[i - 1], delta[i].T)
        #print('dw[',i,']:', '\n',dw[i])
        db[i] = np.sum(delta[i], axis=1, keepdims=True)
        #print('db[',i,']:', '\n',db[i])
        
    return dw, db

In [183]:
def update(dw, db):
    
    global w, b, eta
    
    for i in range(1, L + 1):
        w[i] = w[i] - eta*dw[i]
        b[i] = b[i] - eta*db[i]

In [184]:
def predict(X):
    
    a, z = forward_propagate(X)
    preds = np.copy(z[L])
    preds[preds > 0.5] = 1
    preds[preds < 0.5] = 0
    
    return preds

def calculate_accuracy(y, t):
    
    return 100 - (np.mean(np.abs(y - t))*100)

In [236]:
L_ = 2
n_ = [10, 20, 1]
activation_ = 'relu'
eta_ = 0.001
Models(L_, n_, eta_, activation_)
initialize_parameters()

pp = pprint.PrettyPrinter(indent=4)
#pp.pprint(w)
#pp.pprint(b)

data = load_data('data/data.npy')
t = data[:,-1]
X = data[:,:-1]
X = normalize(X)
X = X.T
t = t.reshape(1, -1)
# X = np.array([[1, 2], [0, 1]])
# t = np.array([[1, 0]])
# print(X.shape, t.shape)

for i in range(5000):
    #print('Iteration :', i)
    a,z = forward_propagate(X)
    #pp.pprint(a)
    #pp.pprint(z)
    dw, db = back_propagate(a, z, t)
    update(dw, db)
    #pp.pprint('\n')

preds = predict(X)
print(calculate_accuracy(preds, t))

90.13698630136986


In [207]:
print(X[:,0].reshape(-1, 1))

[[0.0334]
 [0.6667]
 [0.5   ]
 [0.1401]
 [0.6667]
 [0.5   ]
 [0.375 ]
 [0.5   ]
 [0.    ]
 [0.3865]]


In [247]:
L_ = 2
n_ = [10, 20, 1]
activation_ = 'relu'
eta_ = 0.01
model = Model(L_, n_, activation_='relu', learning_rate_=eta, max_iters_=5000)

In [248]:
data = load_data('data/data.npy')
t = data[:,-1]
X = data[:,:-1]
X = normalize(X)
X = X.T
t = t.reshape(1, -1)

In [249]:
model.fit(X, t)

90.68493150684931