In [27]:
from TPNN.tools.perceptron import*
import numpy as np
import random as rd
import numpy.random as rand

### Test net

In [28]:
net = Net()
net.insert_layer(0, 2, ident)
net.insert_layer(1, 5, sigmoid)
net.insert_layer(2, 1, ident)
print(net.layers_count)
net.init_weights(1, np.array([[1,1,1,1,1], [1,1,1,1,1]]))
net.init_weights(2, np.array([[1,1,1,1,1]]).transpose())
net.init_biases(2, np.array([[0]]))
net.init_biases(1, np.array([[1,1,1,1,1]]))

net.print_net_config()

3
layers=3
[0]
 size=2
 act_function=<function ident at 0x062924A8>
 activations=[[0. 0.]]
 biases=[[0. 0.]]
 weights:
     empty
--------------------------
[1]
 size=5
 act_function=<function sigmoid at 0x0A641730>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[1 1 1 1 1]]
 weights:
     |1 1 1 1 1 |
     |1 1 1 1 1 |

--------------------------
[2]
 size=1
 act_function=<function ident at 0x062924A8>
 activations=[[0.]]
 biases=[[0]]
 weights:
     |1 |
     |1 |
     |1 |
     |1 |
     |1 |

--------------------------


In [29]:
net.calc_output(np.array([1, 1]))

array([[4.76287063]])

In [30]:
net = Net()
net.insert_layer(0, 5, ident)
net.insert_layer(1, 7, sigmoid)
net.insert_layer(2, 7, th)
net.insert_layer(3, 5, ident)

net.init_weights(1, np.array([[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7]]))
net.init_weights(2, np.array([[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,-4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7]]))
net.init_weights(3, np.array([[1,1,0,1,1],[1,1,1,1,1],[1,1,1,0,1],[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1]]))
net.init_biases(3, np.array([[1,1,1,1,1]]))

net.print_net_config()

layers=4
[0]
 size=5
 act_function=<function ident at 0x062924A8>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0.]]
 weights:
     empty
--------------------------
[1]
 size=7
 act_function=<function sigmoid at 0x0A641730>
 activations=[[0. 0. 0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0. 0. 0.]]
 weights:
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |

--------------------------
[2]
 size=7
 act_function=<function th at 0x0A641778>
 activations=[[0. 0. 0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0. 0. 0.]]
 weights:
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 -4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |

--------------------------
[3]
 size=5
 act_function=<function ident at 0x062924A8>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[1 1 1 1 1]]
 weights:
     |1 1 0 1 1 |
     |1 1 1 1 1 |
     |1 1 1 0 1 |
     |1 1 1 1 1 |
     |1 1 1 1 1 |
     |1 1 1 1 1 |
  

In [31]:
net.calc_output(np.array([1,2,0,2,1]))

array([[7.99999833, 7.99999833, 7.        , 6.99999833, 7.99999833]])

### Fill activation derivatives array

In [32]:
def calc_activations_derivatives(net: Net, target_vector):
    last_layer = net.layers[net.layers_count - 1]
    activation_der_array = []
    last_layer_derivatives_array = []

    for i in range(last_layer.neuron_count):
        der = predict_error_der(last_layer.activations[0], target_vector, i)
        last_layer_derivatives_array.append(der)

    activation_der_array.insert(0, last_layer_derivatives_array)

    # cal derivatives on each layer, except last and first layers
    cur_iteration = 0
    for i in range(net.layers_count - 2, 0, -1):
        cur_layer = net.layers[i]
        layer_derivatives_array = []

        for j in range(cur_layer.neuron_count):
            next_layer_der_array = activation_der_array[0]
            layer_derivatives_array.append(net.der_cost_act(i, j, next_layer_der_array))

        # add derivatives array of the current layer
        activation_der_array.insert(0, layer_derivatives_array)
        cur_iteration += 1

    return activation_der_array

## Net training functions

In [33]:
def get_weight_gradient_matrix(net: Net, layer_idx, activation_der_array):
    assert 0 < layer_idx < net.layers_count
    prev_layer_neuron_count = net.layers[layer_idx - 1].neuron_count
    cur_layer_neuron_count = net.layers[layer_idx].neuron_count

    gradient_matrix = np.zeros((prev_layer_neuron_count, cur_layer_neuron_count))

    for i in range(prev_layer_neuron_count):
        for j in range(cur_layer_neuron_count):
            gradient_matrix[i][j] = net.der_cost_weigh(layer_idx, i, j, activation_der_array[layer_idx - 1][j])
    return gradient_matrix

def get_bias_gradient_matrix(net: Net, layer_idx, activation_der_array):
    assert 0 < layer_idx < net.layers_count
    cur_layer_neuron_count = net.layers[layer_idx].neuron_count
    gradient_vector = np.zeros(cur_layer_neuron_count)

    for j in range(cur_layer_neuron_count):
        gradient_vector[j] = net.der_cost_bias(layer_idx, j, activation_der_array[layer_idx - 1][j])

    return gradient_vector

def step(net: Net, weight_grad_matrices, biases_grad_matrices):
    for i in range(net.layers_count - 1):
        idx = i + 1
        cur_layer = net.layers[idx]
        cur_layer.weights += weight_grad_matrices[idx - 1]
        cur_layer.biases += biases_grad_matrices[idx - 1]

def training_iteration(training_data_item, target_vector, net: Net):
    net.calc_output(training_data_item)
    act_der_array = calc_activations_derivatives(net, target_vector)

    weight_grad_matrices = []
    biases_grad_matrices = []

    for i in range(net.layers_count - 1):
        idx = i + 1
        weight_grad_matrices.append(get_weight_gradient_matrix(net, idx, act_der_array))
        biases_grad_matrices.append(get_bias_gradient_matrix(net, idx, act_der_array))
    # change net parameters
    step(net, weight_grad_matrices, biases_grad_matrices)

def calc_metric(net: Net, val_target_vectors):
    predict_errors = []

    for item in val_target_vectors:
        last_layer = net.layers[net.layers_count - 1]
        predict_errors = np.append(predict_errors, predict_error(last_layer.activations, item))

    return np.mean(predict_errors)

# net training on all training data items (returns metric on validation)
def training(net: Net, training_data, target_vectors, val_target_vectors):
    for training_item, target_vector in zip(training_data, target_vectors):
        training_iteration(training_item, target_vector, net)
    return calc_metric(net, val_target_vectors)

# init weights and biases with start values
def init_net_parameters(net: Net):
    for i in range(net.layers_count - 1):
        idx = i + 1
        w_shape = net.layers[idx].weights.shape
        b_shape = net.layers[idx].biases.shape

        net.layers[idx].weights = rand.randn(w_shape[0], w_shape[1])
        net.layers[idx].biases = rand.randn(b_shape[0], b_shape[1])

## generation of training data

In [34]:
periods_count = 100
epsilon = 0.1
st_points_count = 10

dst_dir = "../data_sets/"

In [35]:
training_data_file = open(dst_dir + "cos_values_data", "w")

# 1 - write extremum points
training_data_file.write(str(0) + " " + str(1) + "\n")

for i in range(periods_count - 1):
    x = (np.pi / 2) * (i + 1)

    val1 = np.cos(x)
    val2 = np.cos(-x)
    training_data_file.write(str(x) + " " + str(val1) + "\n" + str(-x) + " " + str(val2) + "\n")

# write other points
st_points = np.linspace(epsilon, np.pi / 2 - epsilon, st_points_count)

for st_point in st_points:
    for i in range(periods_count):
        x = st_point + (np.pi / 2) * i
        val1 = np.cos(x)
        val2 = np.cos(-x)
        training_data_file.write(str(x) + " " + str(val1) + "\n" + str(-x) + " " + str(val2) + "\n")

training_data_file.close()

## Load training data

In [36]:
training_data_file = open(dst_dir + "cos_values_data", "r")
training_data = np.array([])
target_vectors = np.array([])
lines = []
items_count = 0

# read lines and shuffle it
for line in training_data_file:
    lines.append(line)

print("before shuffle: " + str(lines[0:5]))
rd.shuffle(lines)
print("after shuffle: " + str(lines[0:5]))
print("-------------------------")

# get training data items and target vectors:
for line in lines:
    arg, value = line.split()
    arg = float(arg)
    value = float(value)

    training_data = np.append(training_data, arg)
    target_vectors = np.append(target_vectors, value)
    items_count += 1

# reshape data to array of 1-d vectors
training_data = training_data.reshape((items_count, 1))
target_vectors = target_vectors.reshape((items_count, 1))

print("first 5 training items: " + str(training_data[0:5]))
print("first 5 training target vectors: " + str(target_vectors[0:5]))

before shuffle: ['0 1\n', '1.5707963267948966 6.123233995736766e-17\n', '-1.5707963267948966 6.123233995736766e-17\n', '3.141592653589793 -1.0\n', '-3.141592653589793 -1.0\n']
after shuffle: ['-115.6819960738907 -0.8488807426887105\n', '-74.99360228020062 0.9192515044496764\n', '132.65613426268015 0.7588552193149402\n', '18.84955592153876 1.0\n', '66.53037783431729 -0.848880742688718\n']
-------------------------
first 5 training items: [[-115.68199607]
 [ -74.99360228]
 [ 132.65613426]
 [  18.84955592]
 [  66.53037783]]
first 5 training target vectors: [[-0.84888074]
 [ 0.9192515 ]
 [ 0.75885522]
 [ 1.        ]
 [-0.84888074]]


## Split training data on batches

In [37]:
batch_size = 100
data_size = len(training_data)
batch_count = data_size // batch_size
rem = data_size % batch_size
training_data_batches = []
target_vectors_batches = []

if data_size % batch_size != 0:
    batch_count += 1

for i in range(batch_count):
    pos = i * batch_size
    tr_data_batch = []
    t_vectors_batch = []

    if i == batch_count - 1:
        tr_data_batch = training_data[pos:]
        t_vectors_batch = target_vectors[pos:]
    else:
        tr_data_batch = training_data[pos:pos + batch_size]
        t_vectors_batch = target_vectors[pos:pos + batch_size]
    training_data_batches.append(tr_data_batch)
    target_vectors_batches.append(t_vectors_batch)

print("batches' count: " + str(batch_count))
print("---------")
idx = 1
print("training data batch " + str(idx) + ":\n" + str(training_data_batches[idx]))
print("size=" + str(len(training_data_batches[idx])))

print("---------")
print("target vectors batch " + str(idx) + ":\n" + str(target_vectors_batches[idx]))
print("size=" + str(len(target_vectors_batches[idx])))

batches' count: 22
---------
training data batch 1:
[[  70.78583471]
 [ 148.51640823]
 [ -78.13519493]
 [ 152.114933  ]
 [  -5.11701039]
 [-107.67570374]
 [  88.52152641]
 [  58.11946409]
 [  34.00058708]
 [ -28.67895529]
 [ -98.86016859]
 [  47.37620051]
 [ 127.48681317]
 [ -25.53736263]
 [ -83.50451602]
 [-117.90972451]
 [ -77.83057353]
 [  64.95958151]
 [ 148.97334034]
 [ -86.49379797]
 [  92.27236187]
 [   4.00314617]
 [ 105.2433539 ]
 [  81.27678759]
 [  61.66567815]
 [  48.69468613]
 [-153.93804003]
 [   0.55693211]
 [ -63.54109588]
 [  72.66125244]
 [  77.98288423]
 [ -62.57954237]
 [  20.32035225]
 [  96.83244015]
 [  51.12703597]
 [-130.47609512]
 [ -96.83244015]
 [ 115.98661748]
 [ 131.54227004]
 [  86.49379797]
 [ -70.2812133 ]
 [  83.96144813]
 [ 109.70343217]
 [ 149.47796175]
 [  30.70668372]
 [ -36.83755833]
 [-130.37609512]
 [-143.19477644]
 [  16.11258467]
 [   2.73697125]
 [-133.76999848]
 [ -65.56882432]
 [ -33.23903357]
 [ -92.12005117]
 [-132.35151286]
 [  34.657519

# Create net and train it

In [38]:
# net parameters
net = Net()
hidden_layers_size = 10
hidden_layers_count = 1
activation = th

net.insert_layer(0, 1, ident)

for i in range(hidden_layers_count):
    net.insert_layer(1, hidden_layers_size, activation)
net.insert_layer(hidden_layers_count + 1, 1, activation)
print(net.layers_count)

for i in range(hidden_layers_count):
    idx = i + 1
    cur_layer = net.layers[idx]

    if i == 0:
        cur_layer.weights = np.zeros((1, hidden_layers_size))
    else:
        cur_layer.weights = np.zeros((hidden_layers_size, hidden_layers_size))
    cur_layer.biases = np.zeros((1, hidden_layers_size))

net.init_weights(net.layers_count - 1, np.zeros((hidden_layers_size, 1)))
net.init_biases(net.layers_count - 1, np.zeros((1, 1)))

# init all net parameters with values from standart normal destribution
init_net_parameters(net)

net.print_net_config()

3
layers=3
[0]
 size=1
 act_function=<function ident at 0x062924A8>
 activations=[[0.]]
 biases=[[0.]]
 weights:
     empty
--------------------------
[1]
 size=10
 act_function=<function th at 0x0A641778>
 activations=[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
 biases=[[-1.32911375 -1.45747774  0.10639789 -0.60684581  0.6162194  -0.20473125
   0.08784803  1.19751599  0.3093872  -0.949713  ]]
 weights:
     |0.9230829723572704 -1.1286882003775778 0.7155939435449482 0.6646651990550531 -0.17644824313457577 -0.7544506331551675 -0.2751670278127521 -0.670684504071213 -1.2944525895817443 -1.0552538550215833 |

--------------------------
[2]
 size=1
 act_function=<function th at 0x0A641778>
 activations=[[0.]]
 biases=[[-1.29238629]]
 weights:
     |-1.7023570876667418 |
     |1.4838065217835346 |
     |-0.7430153538942152 |
     |-0.606831835002515 |
     |-1.412482318869308 |
     |1.2908637090724704 |
     |1.15545006276706 |
     |0.25302548024565574 |
     |0.07479927188256112 |
     |-1.56137927

In [39]:
min_cost = 0.05

for i in range(batch_count):
    training_batch = training_data_batches[i]
    target_vectors_batch = target_vectors_batches[i]

    cost = training(net, training_batch, target_vectors_batch, target_vectors)
    print("epoch " + str(i) + ": " + str(cost))

    if cost <= min_cost:
        break

epoch 0: 1.0001639728161256
epoch 1: 0.999545245733181
epoch 2: 0.9995452456950548
epoch 3: 0.999545246213225
epoch 4: 0.999545246425895
epoch 5: 1.0004546812258035
epoch 6: 1.000454681511304
epoch 7: 0.9995452476879604
epoch 8: 0.9995452478382576
epoch 9: 0.9995448699321536
epoch 10: 1.0004547518984996
epoch 11: 0.9995452478398313
epoch 12: 1.0004547518967524
epoch 13: 1.000454751898518
epoch 14: 1.0004547518985183
epoch 15: 0.9995452478393196
epoch 16: 1.0004547518985316
epoch 17: 0.9995452478393196
epoch 18: 0.9995452478393196
epoch 19: 1.000454751898519
epoch 20: 1.000454751898523
epoch 21: 0.9995452478393196
