In [1]:
import numpy as np

In [2]:
np.random.seed(20)

In [3]:
#input values
X_num_row, X_num_col = [2,2000]

In [4]:
X_raw = np.random.rand(X_num_row, X_num_col) * 100
X_raw

array([[58.81308011, 89.77137279, 89.15307295, ..., 81.51663924,
        67.21352033, 86.30981922],
       [10.05276968, 82.39145514, 76.93178003, ..., 39.11792936,
        62.60018814, 20.48588988]])

In [5]:
X_raw.shape

(2, 2000)

In [6]:
#Output values
Y_raw = np.concatenate(([(X_raw[0,:] + X_raw[1,:])], [(X_raw[0,:] - X_raw[1,:])], np.abs([(X_raw[0,:] - X_raw[1,:])])))

In [7]:
Y_num_row, Y_num_col = Y_raw.shape
Y_raw.shape

(3, 2000)

In [8]:
Train_ratio = 0.7

In [9]:
#Selecting 70% of the data to train
Num_train_datum = int(Train_ratio * X_num_col)
X_raw_train = X_raw[:,0:Num_train_datum]
X_raw_test = X_raw[:,Num_train_datum:]
Y_raw_train = Y_raw[:,0:Num_train_datum]
Y_raw_test = Y_raw[:,Num_train_datum:]

In [10]:
#Standardization. zero mean and unit variance.
class scalar:
    def __init__(self,mean,std):
        self.mean = mean
        self.std = std
def get_scalar(row):
    mean = np.mean(row)
    std = np.std(row)
    return scalar(mean,std)
def standardize(data,scalar):
    return (data - (scalar.mean)/scalar.std)
def unstandardize(data,scalar):
    return (data * scalar.std)+scalar.mean

In [11]:
#Constructing scalars from training set before constructing the neural network
X_scalars = [get_scalar(X_raw_train[row,:]) for row in range(X_num_row)]
X_train = np.array([standardize(X_raw_train[row,:], X_scalars[row]) for row in range(X_num_row)])
Y_scalars = [get_scalar(Y_raw_train[row,:]) for row in range(Y_num_row)]
Y_train = np.array([standardize(Y_raw_train[row,:], Y_scalars[row]) for row in range(Y_num_row)])

In [12]:
X_test = np.array([standardize(X_raw_test[row,:], X_scalars[row]) for row in range(X_num_row)])
Y_test = np.array([standardize(Y_raw_test[row,:], Y_scalars[row]) for row in range(Y_num_row)])

In [13]:
print([X_train[row,:].mean() for row in range(X_num_row)])
print([X_train[row,:].std() for row in range(X_num_row)])
print([Y_train[row,:].mean() for row in range(Y_num_row)])
print([Y_train[row,:].std() for row in range(Y_num_row)])

[47.98095742822924, 49.74505071599517]
[28.89937670411455, 28.98437383047892]
[98.72675138335849, -1.7778078139387639, 32.412321337547404]
[40.543788044538964, 41.31267422307638, 23.774224792986935]


In [14]:
class layer:
    def __init__(self, layer_index, is_output, input_dim, output_dim, activation):
        self.layer_index = layer_index
        self.is_output = is_output
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.activation = activation
        
        if layer_index != 0:
            self.w = np.random.randn(output_dim, input_dim) * np.sqrt(2/input_dim)
            self.b = np.random.randn(output_dim, 1)*np.sqrt(2/input_dim)

In [15]:
layers_dim = [X_num_row,4,4,Y_num_row]
Neural_net = []

for layer_index in range(len(layers_dim)):
    if layer_index == 0:
        Neural_net.append(layer(layer_index, False, 0, layers_dim[layer_index], 'irrelevant'))
    elif layer_index + 1 == len(layers_dim):
        Neural_net.append(layer(layer_index, True, layers_dim[layer_index - 1], layers_dim[layer_index], activation='linear'))
    else:
        Neural_net.append(layer(layer_index, False, layers_dim[layer_index - 1], layers_dim[layer_index], activation='relu'))

In [16]:
pred_n_param = sum([(layers_dim[layer_index]+1)*layers_dim[layer_index+1] for layer_index in range(len(layers_dim)-1)])
act_n_param = sum([Neural_net[layer_index].w.size + Neural_net[layer_index].b.size for layer_index in range(1,len(layers_dim))])
print(f'Predicted number of hyperparameters: {pred_n_param}')
print(f'Actual number of hyperparameters: {act_n_param}')
print(f'Number of data: {X_num_col}')

if act_n_param >= X_num_col:
    raise Exception("It will overfit.")

Predicted number of hyperparameters: 47
Actual number of hyperparameters: 47
Number of data: 2000


In [17]:
def activation(input_, act_function):
    if act_function == 'relu':
        return np.maximum(input_, np.zeros(input_.shape))
    elif act_function == 'linear':
        return input_
    else:
        raise Exception('Activation function is not defined')

In [18]:
def forward_prop(input_vec, layers_dim = layers_dim, Neural_net = Neural_net):
    Neural_net[0].a = input_vec
    for layer_index in range(1,len(layers_dim)):
        Neural_net[layer_index].z = np.add(np.dot(Neural_net[layer_index].w, Neural_net[layer_index-1].a), Neural_net[layer_index].b)
        Neural_net[layer_index].a = activation(Neural_net[layer_index].z, Neural_net[layer_index].activation)
    return Neural_net[layer_index].a

In [19]:
forward_prop(X_train).shape == Y_train.shape

True

In [20]:
def get_loss(Y,Y_hat, metric = 'mse'):
    if metric == 'mse':
        individual_loss = 0.5 * (Y_hat - Y)**2
        return np.mean([np.linalg.norm(individual_loss[:,col],2) for col in range (individual_loss.shape[1])])
    else:
        raise Exception('Loss metric is not defined')
def get_dz_from_loss(Y,Y_hat,metric):
    if metric == 'mse':
        return Y_hat - Y
    else:
        raise Exception('Loss metric is not defined')
def get_deactivation(a, act_function):
    if act_function == 'relu':
        return np.maximum(np.sign(a), np.zeros(a.shape))
    elif act_function == 'linear':
        return np.ones(a.shape)
    else:
        raise Exception('Activation function is not defined')

In [21]:
def backward_prop(Y, Y_hat,metric = 'mse', layers_dim = layers_dim, Neural_net = Neural_net, Num_train_datum = Num_train_datum):
    for layer_index in range(len(layers_dim)-1,0,-1):
        if layer_index+1 == len(layers_dim):
            dz = get_dz_from_loss(Y, Y_hat, metric)
        else:
            dz = np.multiply(np.dot(Neural_net[layer_index + 1].w.t, dz), get_deactivation(Neural_net[layer_index].a, Neural_net[layer_index].activation))
        
        dw = np.dot(dz, Neural_net[layer_index-1].a.t) / Num_train_datum
        db = np.sum(dz, axis = 1, keepdims = True) / Num_train_datum
        
        Neural_net[layer_index].dw = dw
        Neural_net[layer_index].db = db

In [22]:
print(get_loss(Y_test, forward_prop(X_test)))

5614.781215435073


In [23]:
def predict(X_raw_any):
    X_any = np.array([standardize(X_raw_any[row,:], X_scalars[row]) for row in range(X_num_row)])
    Y_hat = forward_prop(X_any)
    Y_hat_any = np.array([unstandardize(Y_hat[row,:], Y_scalars[row]) for row in range(Y_num_row)])
    return Y_hat_any

In [24]:
predict(np.array([[40,60],[60,40],[5,9],[766,354]]))

array([[ 163.22428687,  302.42540662],
       [  69.38298228,  174.47180656],
       [  32.65643104, -129.27313089]])