In [1]:
import pandas as pd
import numpy as np
import os.path
from datetime import date, datetime
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, max_error
import matplotlib.pyplot as plt

## Funções de ativação

In [2]:
def linear(x, derivative=False):
    return np.ones_like(x) if derivative else x

def sigmoid(x, derivative=False):
    if derivative:
        y = sigmoid(x)
        return y*(1 - y)
    return 1.0/(1.0 + np.exp(-x))

def tanh(x, derivative=False):
    if derivative:
        y = tanh(x)
        return 1 - y**2
    return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))

def relu(x, derivative=False):
    if derivative:
        return np.where(x <= 0, 0, 1)
    return np.maximum(0, x)

def leaky_relu(x, derivative=False):
    alpha = 0.1
    if derivative:
        return np.where(x <= 0, alpha, 1)
    return np.where(x <= 0, alpha*x, x)

def elu(x, derivative=False):
    alpha = 1.0
    if derivative:
        y = elu(x)
        return np.where(x <= 0, y + alpha, 1)
    return np.where(x <= 0, alpha*(np.exp(x) - 1), x)

## Funções de custo

In [3]:
def mae(y, y_pred, derivative=False):
    if derivative:
        return np.where(y_pred > y, 1, -1) / y.shape[0]
    return np.mean(np.abs(y - y_pred))

def mse(y, y_pred, derivative=False):
    if derivative:
        return -(y - y_pred) / y.shape[0]
    return 0.5*np.mean((y - y_pred)**2)

## Inicialização de pesos

In [4]:
def zeros(rows,cols):
    return np.zeros((rows,cols))

def ones(rows,cols):
    return np.ones((rows,cols))

def random_normal(rows,cols):
    return np.random.randn(rows, cols)

def random_uniform(rows,cols):
    return np.random.rand(rows, cols)

def glorot_normal(rows,cols):
    std_dev = np.sqrt(2.0 / (rows + cols))
    return std_dev * np.random.randn(rows, cols)

def glorot_uniform(rows,cols):
    limit = np.sqrt(6.0 / (rows + cols))
    return 2*limit*np.random.randn(rows, cols)-limit

## Regularização

In [5]:
def l1_regularization(weights, derivative=False):
    if derivative:
        weights = [np.where(w<0, -1, w) for w in weights]
        return np.array([np.where(w>0, 1,w) for w in weights])
    return np.sum([np.sum(np.abs(w)) for w in weights])

def l2_regularization(weights, derivative=False):
    if derivative:
        return weights
    return 0.5 * np.sum(weights**2)

# Implementação da rede neural

In [6]:
class Layer():
    def __init__(self, input_dim, output_dim, activation=linear, weights_initializer=random_normal, biases_initializer=ones, dropout_prob = 0, reg_func=l2_regularization, reg_strenght=0.0):
        self.input = None
        self.weights = weights_initializer(output_dim, input_dim)
        self.biases = biases_initializer(1, output_dim)
        self.activation = activation
        self.dropout_prob = dropout_prob
        
        self._activ_inp, self._activ_out = None, None
        self._dweights, self._dbiases = None, None
        self.dropout_mask = None
        
        self.reg_func = reg_func
        self.reg_strenght = reg_strenght

class NeuralNetwork():
    def __init__(self, cost_func=mse, learning_rate=1e-3):
        self.layers = []
        self.cost_func = cost_func
        self.learning_rate = learning_rate
        
    def fit(self, x_train, y_train, epochs=100, verbose=10):
        for epoch in range(epochs+1):
            y_pred = self.__feedforward(x_train)
            self.__backprop(y_train, y_pred)

            # Função de custo
            if epoch % verbose == 0:
                loss_train = self.cost_func(y_train, self.predict(x_train))
                loss_reg = (1.0 / y_train.shape[0]) * np.sum([layer.reg_strenght * layer.reg_func(layer.weights) for layer in self.layers])
                print("epoch: {0:=4}/{1} loss_train: {2:.8f}".format(epoch, epochs, loss_train))

    def predict(self, x):
        return self.__feedforward(x, is_training=False)

    def __feedforward(self, x, is_training=True):
        self.layers[0].input = x
        for current_layer, next_layer in zip(self.layers, self.layers[1:] + [Layer(0, 0)]):
            y = np.dot(current_layer.input, current_layer.weights.T) + current_layer.biases
            current_layer._dropout_mask = np.random.binomial(1, 1.0-current_layer.dropout_prob, y.shape) / (1.0-current_layer.dropout_prob)
            current_layer._activ_inp = y
            current_layer._activ_out = current_layer.activation(y) * (current_layer._dropout_mask if is_training else 1.0)
            next_layer.input = current_layer._activ_out
        return self.layers[-1]._activ_out

    def __backprop(self, y, y_pred):
        # Atualização dos pesos
        last_delta = self.cost_func(y, y_pred, derivative=True)
        for layer in reversed(self.layers):
            dactivation = layer.activation(layer._activ_inp, derivative=True) * last_delta * layer._dropout_mask
            last_delta = np.dot(dactivation, layer.weights)
            
            layer._dweights = np.dot(dactivation.T, layer.input)
            layer._dbiases = 1.0*dactivation.sum(axis=0, keepdims=True)

        for layer in reversed(self.layers):
            layer._dweights = layer._dweights + (1.0/y.shape[0]) * layer.reg_strenght * layer.reg_func(layer.weights, derivative=True)
            layer.weights = layer.weights - self.learning_rate*layer._dweights
            layer.biases = layer.biases - self.learning_rate*layer._dbiases
        

# Treino

In [7]:
X_train = np.genfromtxt('xtrain.csv', delimiter=',')
y_train = np.genfromtxt('ytrain.csv', delimiter=',')

In [8]:
y_train = y_train.reshape(-1,1)
y_train.shape

(783, 1)

In [9]:
X_train.shape

(783, 4)

In [10]:
input_dim = X_train.shape[1]

nn = NeuralNetwork(cost_func=mae, learning_rate=1e-3)

#weights_initializer, dropout_prob, reg_func, reg_strenght
nn.layers.append(Layer(input_dim=input_dim, output_dim=18, activation=relu, weights_initializer=glorot_normal, reg_strenght=0.1)) 
nn.layers.append(Layer(input_dim=18, output_dim=9, weights_initializer=glorot_normal, activation=relu)) 
nn.layers.append(Layer(input_dim=9, output_dim=5, weights_initializer=glorot_normal, activation=relu)) 
nn.layers.append(Layer(input_dim=5, output_dim=1, weights_initializer=glorot_normal, activation=linear)) 

nn.fit(X_train, y_train, epochs=2000, verbose=200)

epoch:    0/2000 loss_train: 12.63578777
epoch:  200/2000 loss_train: 0.30367413
epoch:  400/2000 loss_train: 0.25612771
epoch:  600/2000 loss_train: 0.24501331
epoch:  800/2000 loss_train: 0.23976204
epoch: 1000/2000 loss_train: 0.23563461
epoch: 1200/2000 loss_train: 0.23176489
epoch: 1400/2000 loss_train: 0.22855260
epoch: 1600/2000 loss_train: 0.22573038
epoch: 1800/2000 loss_train: 0.22306935
epoch: 2000/2000 loss_train: 0.22052320
