In [347]:
import copy

from TPNN.tools.perceptron import*
import numpy as np
import numpy.random as rand

### Test net

In [348]:
net = Net()
net.insert_layer(0, 2, ident)
net.insert_layer(1, 5, sigmoid)
net.insert_layer(2, 1, ident)
print(net.layers_count)
net.init_weights(1, np.array([[1,1,1,1,1], [1,1,1,1,1]]))
net.init_weights(2, np.array([[1,1,1,1,1]]).transpose())
net.init_biases(2, np.array([[0]]))
net.init_biases(1, np.array([[1,1,1,1,1]]))

net.print_net_config()

3
layers=3
[0]
 size=2
 act_function=<function ident at 0x05D42538>
 activations=[[0. 0.]]
 biases=[[0. 0.]]
 weights:
     empty
--------------------------
[1]
 size=5
 act_function=<function sigmoid at 0x0A0F1778>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[1 1 1 1 1]]
 weights:
     |1 1 1 1 1 |
     |1 1 1 1 1 |

--------------------------
[2]
 size=1
 act_function=<function ident at 0x05D42538>
 activations=[[0.]]
 biases=[[0]]
 weights:
     |1 |
     |1 |
     |1 |
     |1 |
     |1 |

--------------------------


In [349]:
net.calc_output(np.array([1, 1]))

array([[4.76287063]])

In [350]:
net = Net()
net.insert_layer(0, 5, ident)
net.insert_layer(1, 7, sigmoid)
net.insert_layer(2, 7, th)
net.insert_layer(3, 5, ident)

net.init_weights(1, np.array([[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7]]))
net.init_weights(2, np.array([[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,-4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7]]))
net.init_weights(3, np.array([[1,1,0,1,1],[1,1,1,1,1],[1,1,1,0,1],[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1]]))
net.init_biases(3, np.array([[1,1,1,1,1]]))

net.print_net_config()

layers=4
[0]
 size=5
 act_function=<function ident at 0x05D42538>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0.]]
 weights:
     empty
--------------------------
[1]
 size=7
 act_function=<function sigmoid at 0x0A0F1778>
 activations=[[0. 0. 0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0. 0. 0.]]
 weights:
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |

--------------------------
[2]
 size=7
 act_function=<function th at 0x0A0F17C0>
 activations=[[0. 0. 0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0. 0. 0.]]
 weights:
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 -4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |

--------------------------
[3]
 size=5
 act_function=<function ident at 0x05D42538>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[1 1 1 1 1]]
 weights:
     |1 1 0 1 1 |
     |1 1 1 1 1 |
     |1 1 1 0 1 |
     |1 1 1 1 1 |
     |1 1 1 1 1 |
     |1 1 1 1 1 |
  

In [351]:
net.calc_output(np.array([1,2,0,2,1]))

array([[7.99999833, 7.99999833, 7.        , 6.99999833, 7.99999833]])

### Fill activation derivatives array

In [352]:
def calc_activations_derivatives(net: Net, target_vector):
    assert target_vector.shape[0] == 1

    last_layer = net.layers[net.layers_count - 1]
    activation_der_array = []
    last_layer_derivatives_array = []

    for i in range(last_layer.neuron_count):
        der = predict_error_der(last_layer.activations[0], target_vector, i)
        last_layer_derivatives_array.append(der)

    activation_der_array.insert(0, last_layer_derivatives_array)

    # cal derivatives on each layer, except last and first layers
    for i in range(net.layers_count - 2, 0, -1):
        cur_layer = net.layers[i]
        layer_derivatives_array = []

        for j in range(cur_layer.neuron_count):
            next_layer_der_array = activation_der_array[0]
            layer_derivatives_array.append(net.der_cost_act(i, j, next_layer_der_array))

        # add derivatives array of the current layer
        activation_der_array.insert(0, layer_derivatives_array)

    return activation_der_array

## Net training functions

In [353]:
def get_weight_gradient_matrix(net: Net, layer_idx, activation_der_array):
    assert 0 < layer_idx < net.layers_count
    prev_layer_neuron_count = net.layers[layer_idx - 1].neuron_count
    cur_layer_neuron_count = net.layers[layer_idx].neuron_count

    gradient_matrix = np.zeros((prev_layer_neuron_count, cur_layer_neuron_count))

    for i in range(prev_layer_neuron_count):
        for j in range(cur_layer_neuron_count):
            gradient_matrix[i][j] = net.der_cost_weigh(layer_idx, i, j, activation_der_array[layer_idx - 1][j])
    return gradient_matrix

def get_bias_gradient_matrix(net: Net, layer_idx, activation_der_array):
    assert 0 < layer_idx < net.layers_count
    cur_layer_neuron_count = net.layers[layer_idx].neuron_count
    gradient_vector = [np.zeros(cur_layer_neuron_count)]

    for j in range(cur_layer_neuron_count):
        gradient_vector[0][j] = net.der_cost_bias(layer_idx, j, activation_der_array[layer_idx - 1][j])

    return gradient_vector

def step(net: Net, weight_grad_matrices, biases_grad_matrices):
    learning_rate = 0.001
    l = lambda x: x * learning_rate
    f = np.vectorize(l)

    for i in range(net.layers_count - 1):
        idx = i + 1
        cur_layer = net.layers[idx]
        cur_layer.weights -= f(weight_grad_matrices[idx - 1])
        cur_layer.biases -= f(biases_grad_matrices[idx - 1])

def training_iteration(training_data_item, target_vector, net: Net):
    net.calc_output(training_data_item)
    act_der_array = calc_activations_derivatives(net, target_vector)

    weight_grad_matrices = []
    biases_grad_matrices = []

    for i in range(net.layers_count - 1):
        idx = i + 1
        weight_grad_matrices.append(get_weight_gradient_matrix(net, idx, act_der_array))
        biases_grad_matrices.append(get_bias_gradient_matrix(net, idx, act_der_array))
    # change net parameters
    step(net, weight_grad_matrices, biases_grad_matrices)

def calc_metric(net: Net, val_input_vectors, val_target_vectors):
    predict_errors = []

    idx = 0
    for target_vector in val_target_vectors:
        input_vector = val_input_vectors[idx]
        output = net.calc_output(input_vector)
        predict_errors = np.append(predict_errors, predict_error(output, target_vector))
        idx += 1

    return np.mean(predict_errors)

# net training on all training data items (returns metric on validation)
def training(net: Net, training_data, target_vectors, val_input_vectors, val_target_vectors):
    for training_item, target_vector in zip(training_data, target_vectors):
        training_iteration(training_item, target_vector, net)
    return calc_metric(net, val_input_vectors, val_target_vectors)

# init weights and biases with start values
def init_net_parameters(net: Net):
    for i in range(net.layers_count - 1):
        idx = i + 1
        w_shape = net.layers[idx].weights.shape
        b_shape = net.layers[idx].biases.shape

        interval = (-0.5, 0.5)
        delta = interval[1] - interval[0]

        net.layers[idx].weights = rand.rand(w_shape[0], w_shape[1]) * delta + interval[0]
        net.layers[idx].biases = rand.rand(b_shape[0], b_shape[1]) * delta + interval[0]

## generation of training data

In [354]:
periods_count = 50
epsilon = 0.1
st_points_count = 20

dst_dir = "../data_sets/"

In [355]:
training_data_file = open(dst_dir + "cos_values_data", "w")

# 1 - write extremum points
training_data_file.write(str(0) + " " + str(1) + "\n")

for i in range(periods_count - 1):
    x = (np.pi / 2) * (i + 1)

    val1 = np.cos(x)
    val2 = np.cos(-x)
    training_data_file.write(str(x) + " " + str(val1) + "\n" + str(-x) + " " + str(val2) + "\n")

# write other points
st_points = np.linspace(epsilon, np.pi / 2 - epsilon, st_points_count)

for st_point in st_points:
    for i in range(periods_count):
        x = st_point + (np.pi / 2) * i
        val1 = np.cos(x)
        val2 = np.cos(-x)
        training_data_file.write(str(x) + " " + str(val1) + "\n" + str(-x) + " " + str(val2) + "\n")

training_data_file.close()

## Load training data

In [356]:
training_data_file = open(dst_dir + "cos_values_data", "r")
training_data = np.array([])
target_vectors = np.array([])
lines = []
items_count = 0

# read lines and shuffle it
for line in training_data_file:
    lines.append(line)

print("before sorting: " + str(lines[0:5]))
lines = sorted(lines, key=lambda l: float(l.split()[0]))
print("after sorting: " + str(lines[0:5]))
print("-------------------------")

# get training data items and target vectors:
for line in lines:
    arg, value = line.split()
    arg = float(arg)
    value = float(value)

    training_data = np.append(training_data, arg)
    target_vectors = np.append(target_vectors, value)
    items_count += 1

# reshape data to array of 1-d vectors
training_data = training_data.reshape((items_count, 1))
target_vectors = target_vectors.reshape((items_count, 1))

print("first 5 training items: \n" + str(training_data[0:5]))
print("first 5 training target vectors: \n" + str(target_vectors[0:5]))

before sorting: ['0 1\n', '1.5707963267948966 6.123233995736766e-17\n', '-1.5707963267948966 6.123233995736766e-17\n', '3.141592653589793 -1.0\n', '-3.141592653589793 -1.0\n']
after sorting: ['-78.43981633974482 -0.9950041652780249\n', '-78.36766916465035 -0.9852192311124913\n', '-78.29552198955588 -0.9703082432473036\n', '-78.22337481446142 -0.9503487829172798\n', '-78.15122763936695 -0.9254446983444491\n']
-------------------------
first 5 training items: 
[[-78.43981634]
 [-78.36766916]
 [-78.29552199]
 [-78.22337481]
 [-78.15122764]]
first 5 training target vectors: 
[[-0.99500417]
 [-0.98521923]
 [-0.97030824]
 [-0.95034878]
 [-0.9254447 ]]


## Split training data on batches

In [357]:
batch_size = 100
data_size = len(training_data)
batch_count = data_size // batch_size
rem = data_size % batch_size
training_data_batches = []
target_vectors_batches = []

if data_size % batch_size != 0:
    batch_count += 1

for i in range(batch_count):
    pos = i * batch_size
    tr_data_batch = []
    t_vectors_batch = []

    if i == batch_count - 1:
        tr_data_batch = training_data[pos:]
        t_vectors_batch = target_vectors[pos:]
    else:
        tr_data_batch = training_data[pos:pos + batch_size]
        t_vectors_batch = target_vectors[pos:pos + batch_size]
    training_data_batches.append(tr_data_batch)
    target_vectors_batches.append(t_vectors_batch)

print("batches' count: " + str(batch_count))
print("---------")
idx = 1
print("training data batch " + str(idx) + ":\n" + str(training_data_batches[idx]))
print("size=" + str(len(training_data_batches[idx])))

print("---------")
print("target vectors batch " + str(idx) + ":\n" + str(target_vectors_batches[idx]))
print("size=" + str(len(target_vectors_batches[idx])))

batches' count: 21
---------
training data batch 1:
[[-71.00227623]
 [-70.93012906]
 [-70.85798188]
 [-70.78583471]
 [-70.68583471]
 [-70.58583471]
 [-70.51368753]
 [-70.44154036]
 [-70.36939318]
 [-70.29724601]
 [-70.22509883]
 [-70.15295166]
 [-70.08080448]
 [-70.00865731]
 [-69.93651013]
 [-69.86436295]
 [-69.79221578]
 [-69.7200686 ]
 [-69.64792143]
 [-69.57577425]
 [-69.50362708]
 [-69.4314799 ]
 [-69.35933273]
 [-69.28718555]
 [-69.21503838]
 [-69.11503838]
 [-69.01503838]
 [-68.9428912 ]
 [-68.87074403]
 [-68.79859685]
 [-68.72644968]
 [-68.6543025 ]
 [-68.58215533]
 [-68.51000815]
 [-68.43786098]
 [-68.3657138 ]
 [-68.29356663]
 [-68.22141945]
 [-68.14927228]
 [-68.0771251 ]
 [-68.00497793]
 [-67.93283075]
 [-67.86068358]
 [-67.7885364 ]
 [-67.71638923]
 [-67.64424205]
 [-67.54424205]
 [-67.44424205]
 [-67.37209488]
 [-67.2999477 ]
 [-67.22780053]
 [-67.15565335]
 [-67.08350618]
 [-67.011359  ]
 [-66.93921183]
 [-66.86706465]
 [-66.79491748]
 [-66.7227703 ]
 [-66.65062313]
 [-6

# Create net and train it

In [358]:
# net parameters
net = Net()
hidden_layers_size = 10
hidden_layers_count = 2
activation = th

net.insert_layer(0, 1, ident)

for i in range(hidden_layers_count):
    net.insert_layer(i + 1, hidden_layers_size, activation)
net.insert_layer(hidden_layers_count + 1, 1, ident)
#print(net.layers_count)

for i in range(hidden_layers_count):
    idx = i + 1
    cur_layer = net.layers[idx]

    if i == 0:
        cur_layer.weights = np.zeros((1, hidden_layers_size))
    else:
        cur_layer.weights = np.zeros((hidden_layers_size, hidden_layers_size))
    cur_layer.biases = np.zeros((1, hidden_layers_size))

net.init_weights(net.layers_count - 1, np.zeros((hidden_layers_size, 1)))
net.init_biases(net.layers_count - 1, np.zeros((1, 1)))

# init all net parameters with values from standart normal destribution
init_net_parameters(net)

net.print_net_config()

layers=4
[0]
 size=1
 act_function=<function ident at 0x05D42538>
 activations=[[0.]]
 biases=[[0.]]
 weights:
     empty
--------------------------
[1]
 size=10
 act_function=<function th at 0x0A0F17C0>
 activations=[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
 biases=[[-0.10189921  0.44463151  0.4888648   0.48308052 -0.28123689  0.34229224
   0.43164378  0.17847609  0.22655201  0.03204047]]
 weights:
     |0.49348466741918806 0.4898229810878617 0.19802746092523638 0.41144345146240036 -0.23747814910667042 -0.25468440442996276 0.48971826363897863 0.47331915899125243 -0.326662993488393 0.2069944502949601 |

--------------------------
[2]
 size=10
 act_function=<function th at 0x0A0F17C0>
 activations=[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
 biases=[[-0.19989819 -0.25108238 -0.21081007  0.4418575  -0.13792872 -0.47424723
  -0.33216886 -0.00278209 -0.1831021   0.3762984 ]]
 weights:
     |-0.4681985088187054 -0.48348167813462506 0.3897024452549418 -0.32450654653299904 -0.15626816396342158 0.24179335833046

In [359]:
min_cost = 0.05
saved_nets = []

for i in range(batch_count):
    training_batch = training_data_batches[i]
    target_vectors_batch = target_vectors_batches[i]

    cost = training(net, training_batch, target_vectors_batch, training_data, target_vectors)
    saved_nets.append((copy.deepcopy(net), cost))

    print("epoch " + str(i) + ": " + str(cost))

    if cost <= min_cost:
        break

epoch 0: 0.7252484417831693
epoch 1: 0.6889946983681182
epoch 2: 0.6679087497316731
epoch 3: 0.6646659887466483
epoch 4: 0.6724375635000721
epoch 5: 0.6894869498156289
epoch 6: 0.6865625038684654
epoch 7: 0.6695086322641455
epoch 8: 0.6640936809466762
epoch 9: 0.6665173301905517
epoch 10: 0.6539413962192094
epoch 11: 0.6521812541133866
epoch 12: 0.6510793627563596
epoch 13: 0.6489990011481175
epoch 14: 0.6496087414804866
epoch 15: 0.6586528493880314
epoch 16: 0.6661491590257815
epoch 17: 0.656799879503033
epoch 18: 0.6493111947807878
epoch 19: 0.6491579738592658
epoch 20: 0.6549480565923463


## Get best net config

In [360]:
min = 10000
result = 0

for item in saved_nets:
    if item[1] < min:
        result = item
        min = item[1]

result_net = result[0]

# check net cost function
print("metric of the best net - " + str(calc_metric(result_net, training_data, target_vectors)))

result_net.print_net_config()

print(result_net.calc_output([np.pi/2]))
print(result_net.calc_output([np.pi/4]))
print(result_net.calc_output([23]))

metric of the best net - 0.6489990011481175
layers=4
[0]
 size=1
 act_function=<function ident at 0x05D42538>
 activations=[[78.43981634]]
 biases=[[0.]]
 weights:
     empty
--------------------------
[1]
 size=10
 act_function=<function th at 0x0A0F17C0>
 activations=[[ 1.  1.  1.  1. -1. -1.  1.  1. -1.  1.]]
 biases=[[-0.09944205  0.45015797  0.49127605  0.48249391 -0.27994141  0.34550836
   0.42983199  0.17641664  0.22702225  0.03080921]]
 weights:
     |0.4879690693040836 0.49130288040732056 0.20318483350741942 0.4095923334863581 -0.24217641673475598 -0.25149081512288535 0.4859781075210512 0.4753165107317751 -0.32301203778167803 0.18856866056982396 |

--------------------------
[2]
 size=10
 act_function=<function th at 0x0A0F17C0>
 activations=[[-0.43519084  0.02182684  0.35848235  0.77398232 -0.26804105  0.83995634
   0.86385991  0.8934809   0.7405313   0.82263188]]
 biases=[[-0.21068617 -0.257047   -0.20946733  0.42769027 -0.144449   -0.47292564
  -0.33067507 -0.00205806 -0.18