In [1]:
import numpy as np

def tanh(x):
    return np.tanh(x)

def tanh_deriv(x):
    return 1.0 - np.tanh(x)**2

def logistic(x):
    return 1/(1 + np.exp(-x))

def logistic_derivative(x):
    return logistic(x)*(1-logistic(x))

In [26]:
class NeuralNetwork:
    def __init__(self, layers, activation='tanh'):
        """
        :param layers: A list containing the number of units in each layer.
        Should be at least two values
        :param activation: The activation function to be used. Can be
        "logistic" or "tanh"
        """
        if activation == 'logistic':
            self.activation = logistic
            self.activation_deriv = logistic_derivative
        elif activation == 'tanh':
            self.activation = tanh
            self.activation_deriv = tanh_deriv

        self.weights = []
        for i in range(1, len(layers) - 1):
            self.weights.append((2*np.random.random((layers[i - 1] + 1, layers[i]
                                ))-1)*0.25)
        self.weights.append((2*np.random.random((layers[i] + 1, layers[i +
                            1]))-1)*0.25)
        
    def fit(self, X, y, learning_rate=0.2, epochs=10000):
        X = np.atleast_2d(X)
        temp = np.ones([X.shape[0], X.shape[1]+1])
        temp[:, 0:-1] = X  # adding the bias unit to the input layer
        X = temp
        y = np.array(y)

        for k in range(epochs):
            i = np.random.randint(X.shape[0])
            a = [X[i]]
            print(X.shape[0], a)

            for l in range(len(self.weights)):
                hidden_inputs = np.ones([self.weights[l].shape[1] + 1])
                hidden_inputs[0:-1] = self.activation(np.dot(a[l], self.weights[l]))
                a.append(hidden_inputs)
            error = y[i] - a[-1][:-1]
            deltas = [error * self.activation_deriv(a[-1][:-1])]
            l = len(a) - 2

            # The last layer before the output is handled separately because of
            # the lack of bias node in output
            deltas.append(deltas[-1].dot(self.weights[l].T)*self.activation_deriv(a[l]))

            for l in range(len(a) -3, 0, -1): # we need to begin at the second to last layer
                deltas.append(deltas[-1][:-1].dot(self.weights[l].T)*self.activation_deriv(a[l]))

            deltas.reverse()
            for i in range(len(self.weights)-1):
                layer = np.atleast_2d(a[i])
                delta = np.atleast_2d(deltas[i])
                self.weights[i] += learning_rate * layer.T.dot(delta[:,:-1])
            # Handle last layer separately because it doesn't have a bias unit
            i+=1
            layer = np.atleast_2d(a[i])
            delta = np.atleast_2d(deltas[i])
            self.weights[i] += learning_rate * layer.T.dot(delta)
            
    def predict(self, x):
        a = np.array(x)
        for l in range(0, len(self.weights)):
            temp = np.ones(a.shape[0]+1)
            temp[0:-1] = a
            a = self.activation(np.dot(temp, self.weights[l]))
        return a

In [27]:
nn = NeuralNetwork([2,2,1], 'tanh')
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])
#nn.weights = 
nn.fit(X, y, epochs=10)
for i in [[0, 0], [0, 1], [1, 0], [1,1]]:
    print(i,nn.predict(i))
#print(nn.weights)

4 [array([ 0.,  0.,  1.])]
4 [array([ 0.,  1.,  1.])]
4 [array([ 0.,  0.,  1.])]
4 [array([ 0.,  0.,  1.])]
4 [array([ 1.,  0.,  1.])]
4 [array([ 1.,  1.,  1.])]
4 [array([ 0.,  1.,  1.])]
4 [array([ 1.,  1.,  1.])]
4 [array([ 1.,  0.,  1.])]
4 [array([ 0.,  0.,  1.])]
[0, 0] [ 0.36445474]
[0, 1] [ 0.37133924]
[1, 0] [ 0.36517525]
[1, 1] [ 0.36978539]


In [3]:
nn = NeuralNetwork([2,2,2], 'logistic')
X = np.array([[0.05, 0.1]])
y = np.array([[0.01, 0.99]])
nn.fit(X, y, epochs=2)
for i in [[0.05, 0.99]]:
    print(i,nn.predict(i))
print(nn.weights)

camada 1
[array([[ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.]])]
[array([[ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.]]), array([[ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.]])]
[[ 0.05  0.1   1.  ]]
[0.05, 0.99] [ 0.49139796  0.50859299]
[array([[  1.54433148e-06,   1.54433148e-06],
       [  3.08866295e-06,   3.08866295e-06],
       [  3.08866295e-05,   3.08866295e-05]]), array([[-0.01147046,  0.01145839],
       [-0.01147046,  0.01145839],
       [-0.02294091,  0.02291677]])]


In [8]:
import numpy as np
from sklearn.cross_validation import train_test_split 
from sklearn.datasets import load_digits
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelBinarizer

In [9]:
digits = load_digits()
X = digits.data
y = digits.target
X -= X.min() # normalize the values to bring them into the range 0-1
X /= X.max()

In [10]:
print(X)

[[ 0.      0.      0.3125 ...,  0.      0.      0.    ]
 [ 0.      0.      0.     ...,  0.625   0.      0.    ]
 [ 0.      0.      0.     ...,  1.      0.5625  0.    ]
 ..., 
 [ 0.      0.      0.0625 ...,  0.375   0.      0.    ]
 [ 0.      0.      0.125  ...,  0.75    0.      0.    ]
 [ 0.      0.      0.625  ...,  0.75    0.0625  0.    ]]


In [13]:
print(y.shape)
y

(1797,)


array([0, 1, 2, ..., 8, 9, 8])

In [16]:
nn = NeuralNetwork([64,100,10],'tanh')
X_train, X_test, y_train, y_test = train_test_split(X, y)
labels_train = LabelBinarizer().fit_transform(y_train)
labels_test = LabelBinarizer().fit_transform(y_test)

In [30]:
y_train

array([3, 0, 3, ..., 0, 6, 2])

In [18]:
labels_train

array([[0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0]])

In [19]:
labels_train[2]

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0])

In [20]:
y_train[2]

3

In [21]:
X_train

array([[ 0.    ,  0.    ,  0.375 , ...,  0.375 ,  0.    ,  0.    ],
       [ 0.    ,  0.    ,  0.1875, ...,  0.3125,  0.    ,  0.    ],
       [ 0.    ,  0.    ,  0.3125, ...,  0.875 ,  0.1875,  0.    ],
       ..., 
       [ 0.    ,  0.    ,  0.0625, ...,  0.375 ,  0.    ,  0.    ],
       [ 0.    ,  0.    ,  0.    , ...,  0.6875,  0.0625,  0.    ],
       [ 0.    ,  0.125 ,  0.9375, ...,  1.    ,  0.625 ,  0.    ]])

In [22]:
X_train[0]

array([ 0.    ,  0.    ,  0.375 ,  0.8125,  1.    ,  0.5   ,  0.    ,
        0.    ,  0.    ,  0.3125,  1.    ,  0.9375,  0.875 ,  0.75  ,
        0.    ,  0.    ,  0.    ,  0.5625,  0.75  ,  0.125 ,  0.9375,
        0.5   ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.5625,
        0.75  ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
        0.9375,  1.    ,  0.8125,  0.1875,  0.    ,  0.    ,  0.    ,
        0.    ,  0.1875,  0.5625,  0.9375,  0.6875,  0.    ,  0.    ,
        0.    ,  0.0625,  0.5   ,  0.875 ,  1.    ,  0.5   ,  0.    ,
        0.    ,  0.    ,  0.4375,  1.    ,  0.875 ,  0.375 ,  0.    ,  0.    ])

In [29]:
for sample in X_train:
    print("Sample", sample)

Sample [ 0.      0.      0.375   0.8125  1.      0.5     0.      0.      0.
  0.3125  1.      0.9375  0.875   0.75    0.      0.      0.      0.5625
  0.75    0.125   0.9375  0.5     0.      0.      0.      0.      0.
  0.5625  0.75    0.      0.      0.      0.      0.      0.      0.9375
  1.      0.8125  0.1875  0.      0.      0.      0.      0.1875  0.5625
  0.9375  0.6875  0.      0.      0.      0.0625  0.5     0.875   1.      0.5
  0.      0.      0.      0.4375  1.      0.875   0.375   0.      0.    ]
Sample [ 0.      0.      0.1875  0.9375  0.5625  0.      0.      0.      0.      0.
  0.875   0.5     0.6875  0.3125  0.      0.      0.      0.1875  1.
  0.1875  0.0625  0.875   0.125   0.      0.      0.3125  0.75    0.      0.
  0.75    0.25    0.      0.      0.125   0.75    0.      0.      0.375
  0.5     0.      0.      0.125   0.875   0.      0.      0.75    0.3125
  0.      0.      0.      0.75    0.5     0.3125  0.9375  0.      0.      0.
  0.      0.0625  0.8125  0.875 

In [15]:
nn.fit(X_train,labels_train,epochs=30000)
predictions = []
for i in range(X_test.shape[0]):
    o = nn.predict(X_test[i] )
    predictions.append(np.argmax(o))
print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))

[[36  0  0  0  4  0  0  0  0  0]
 [ 0 34  1  1  6  0  2  0  0  0]
 [ 0  1 42  0  1  0  1  0  0  0]
 [ 0  0  1 45  1  1  2  0  0  2]
 [ 0  0  0  0 45  0  0  0  0  0]
 [ 0  1  0  0  0 43  1  0  0  0]
 [ 0  1  0  0  0  0 42  0  0  0]
 [ 0  0  0  0  2  0  0 39  0  0]
 [ 0 10  0  1 17  0  5  1  7  4]
 [ 1  0  0  0  4  2  2  0  0 41]]
             precision    recall  f1-score   support

          0       0.97      0.90      0.94        40
          1       0.72      0.77      0.75        44
          2       0.95      0.93      0.94        45
          3       0.96      0.87      0.91        52
          4       0.56      1.00      0.72        45
          5       0.93      0.96      0.95        45
          6       0.76      0.98      0.86        43
          7       0.97      0.95      0.96        41
          8       1.00      0.16      0.27        45
          9       0.87      0.82      0.85        50

avg / total       0.87      0.83      0.81       450

