Adaptado de https://towardsdatascience.com/diy-ai-an-old-school-matrix-nn-401a00021a55

Mas usando gradiente descendente para o treino (https://levelup.gitconnected.com/training-a-single-perceptron-405026d61f4b)

In [31]:
!pip install terminaltables

import numpy
import os
import random
from terminaltables import AsciiTable


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting terminaltables
  Downloading terminaltables-3.1.10-py2.py3-none-any.whl (15 kB)
Installing collected packages: terminaltables
Successfully installed terminaltables-3.1.10


In [33]:
# we need to tell numpy the dimensions of our arrays
samples = numpy.empty([0, 256])
results = numpy.empty([0, 10])

with open('sample_data/semeion.data') as file:
    for line in file:
        # split line to array using space as separator
        numbers = line.split(' ')
        # as line read from the file is always is string, we need to convert first 256 parts to decimals,
        # and following 10 to integers
        sample = numpy.array([ float(x) for x in numbers[0:256] ])
        result = numpy.array([ int(x) for x in numbers[256:266] ])

        # print(samples.ndim, numpy.array([sample]).ndim) # 2 2
        # print(samples.shape, numpy.array([sample]).shape) # (0, 256) (1, 256)
        # print(type(samples), type(sample))

        # after that, append freshly read sample and result to arrays
        samples = numpy.concatenate( (samples, numpy.array([sample])), axis=0)
        results = numpy.concatenate((results, numpy.array([result])), axis=0)


print(samples, samples.shape)
print(results, results.shape)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 1. ... 0. 0. 0.]
 [0. 1. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]] (1593, 256)
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]] (1593, 10)


In [34]:
# logistic function
def sigmoid(x):
    return 1.0 / (1.0 + numpy.exp(-x))

# derivative of logistic function
def dsigmoid(x):
    return x * (1.0 - x)

# numpy.random returns 0..1, by multiplying by 2 we get 0..2,
# by subtracting 1 we get -1..1, and by division by 100 we get -0.01..0.01 
first_layer = (2 * numpy.random.random((256, 256)) - 1) / 100  # (256, 256)
second_layer = (2 * numpy.random.random((256, 10)) - 1) / 100  # (256, 10)

print(first_layer.shape)
print(second_layer.shape)

(256, 256)
(256, 10)


In [35]:
# rate defines how fast out network will change. Smaller values leads to slower but more precise training
rate = 0.4
# initial value of error must be high
error = 1000.0
# current epoch
epoch = 1
# limit of epochs
epoch_limit = 100
# we stop after error is that small
desired_error = 0.1

while epoch <= epoch_limit and error > desired_error:
    # this array will hold all errors from the current epoch
    errors = []
    # loop through all samples
    for sample_index in range(samples.shape[0]):
        # this is a bit tricky - samples[sample_index] returns vector, but we need a matrix, so we wrap it in array
        sample = numpy.array([samples[sample_index]])
        result = numpy.array([results[sample_index]])

        # Feed forward through both layers
        first_output = sigmoid(numpy.dot(sample, first_layer)) # (1, 256)
        second_output = sigmoid(numpy.dot(first_output, second_layer)) # (1, 10)

        # print('--')
        # print('first_output', first_output.shape)
        # print('second_output', second_output.shape)

        # Compute output error and add the error to current epoch errors
        second_error = result - second_output # (1, 10)
        errors.append(numpy.max(numpy.abs(second_error)))

        # the delta represents how much each of the weights contribute to the error
        second_delta = second_error * dsigmoid(second_output) # (1, 10)

        # print('second_delta', second_delta.shape)
        # print('second_layer', second_layer.shape)

        # how much did each first layer value contribute to the second layer error (according to the weights)?
        first_error = second_delta.dot(second_layer.T)

        # the delta represents how much each of the weights contribute to the error
        first_delta = first_error * dsigmoid(first_output)

        second_layer += first_output.T.dot(second_delta) * rate # (256, 10)
        first_layer += sample.T.dot(first_delta) * rate

    # select max error found during the epoch
    error = max(errors)
    epoch += 1

# print current epoch status
print(f'Epoch: {epoch-1}, error: {error} (of desired < {desired_error})')
    

Epoch: 100, error: 0.9999924047564264 (of desired < 0.1)


In [36]:
# computes result from [1x256] sample, requires first_layer and second_layer to be defined globally
# returns single detected number
def compute_result(input_sample):
    # process input vector through both layers on NN
    l1 = sigmoid(numpy.dot(input_sample, first_layer))
    l2 = sigmoid(numpy.dot(l1, second_layer))

    # loop through all numbers in sequence and return index of highest value
    maximum = 0
    selected_index = 0
    for index in range(10):
        if l2[index] > maximum:
            maximum = l2[index]
            selected_index = index

    return selected_index

# converts [1x256] sample line into pretty 16x16 character block where 1 is * and other symbols are omitted
def print_sample(input_sample):
    # convert [1x256] matrix to [16x16]
    input_sample = input_sample.reshape(16, 16).tolist()

    text = []

    # process sample row by row
    for sample_row in range(16):
        text_row = input_sample[sample_row]
        # replace 1 with * and 0 with empty space
        text_row = map(lambda cell: '*' if cell == 1 else ' ', text_row)
        # join 16 characters into line
        text_row = ''.join(text_row)
        # line to rows array
        text.append(text_row)

    # finally, join rows with newlines
    return '\n'.join(text)



In [37]:
print('Actual testing of trained NN')

table_data = [
    ['Sample', 'Digit', 'Sample', 'Digit', 'Sample', 'Digit', 'Sample', 'Digit']
]

# we print three rows
for row in range(3):
    table_data.append([''] * 8)
    # with 8 columns, 4 image -> result pairs
    for col in range(4):
        # pick one random sample between 0 and sample count
        ri = random.randint(0, samples.shape[0] - 1)
        sample = samples[ri]

        table_data[row+1][col*2] = print_sample(sample)
        table_data[row+1][col*2+1] = '\n'.join([' ' * 5, ' ' * 5, '  %d' % compute_result(sample)])

table = AsciiTable(table_data)
table.inner_row_border = True

print(table.table)

Actual testing of trained NN
+------------------+-------+------------------+-------+------------------+-------+------------------+-------+
| Sample           | Digit | Sample           | Digit | Sample           | Digit | Sample           | Digit |
+------------------+-------+------------------+-------+------------------+-------+------------------+-------+
|             **** |       |       ********** |       | ***         **   |       |      ****        |       |
|            ****  |       |                  |       | **        ****   |       |      ****        |       |
|           ****** |   1   |     ***          |   5   | *************    |   7   |     ******       |   4   |
|           *****  |       |    ***           |       |     **  ***      |       |     ** ***       |       |
|          *****   |       |  ****            |       |         **       |       |    *** ***       |       |
|       *******    |       | ***              |       |        ***       |       |   ***   