In [54]:
import copy

from TPNN.tools.perceptron import*
import numpy as np
import numpy.random as rand

### Test net

In [55]:
net = Net()
net.insert_layer(0, 2, ident)
net.insert_layer(1, 5, sigmoid)
net.insert_layer(2, 1, ident)
print(net.layers_count)
net.init_weights(1, np.array([[1,1,1,1,1], [1,1,1,1,1]]))
net.init_weights(2, np.array([[1,1,1,1,1]]).transpose())
net.init_biases(2, np.array([[0]]))
net.init_biases(1, np.array([[1,1,1,1,1]]))

net.print_net_config()

3
layers=3
[0]
 size=2
 act_function=<function ident at 0x061726A0>
 activations=[[0. 0.]]
 biases=[[0. 0.]]
 weights:
     empty
--------------------------
[1]
 size=5
 act_function=<function sigmoid at 0x0A520778>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[1 1 1 1 1]]
 weights:
     |1 1 1 1 1 |
     |1 1 1 1 1 |

--------------------------
[2]
 size=1
 act_function=<function ident at 0x061726A0>
 activations=[[0.]]
 biases=[[0]]
 weights:
     |1 |
     |1 |
     |1 |
     |1 |
     |1 |

--------------------------


In [56]:
net.calc_output(np.array([1, 1]))

array([[4.76287063]])

In [57]:
net = Net()
net.insert_layer(0, 5, ident)
net.insert_layer(1, 7, sigmoid)
net.insert_layer(2, 7, th)
net.insert_layer(3, 5, ident)

net.init_weights(1, np.array([[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7]]))
net.init_weights(2, np.array([[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,-4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7]]))
net.init_weights(3, np.array([[1,1,0,1,1],[1,1,1,1,1],[1,1,1,0,1],[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1]]))
net.init_biases(3, np.array([[1,1,1,1,1]]))

net.print_net_config()

layers=4
[0]
 size=5
 act_function=<function ident at 0x061726A0>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0.]]
 weights:
     empty
--------------------------
[1]
 size=7
 act_function=<function sigmoid at 0x0A520778>
 activations=[[0. 0. 0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0. 0. 0.]]
 weights:
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |

--------------------------
[2]
 size=7
 act_function=<function th at 0x0A5207C0>
 activations=[[0. 0. 0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0. 0. 0.]]
 weights:
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 -4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |

--------------------------
[3]
 size=5
 act_function=<function ident at 0x061726A0>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[1 1 1 1 1]]
 weights:
     |1 1 0 1 1 |
     |1 1 1 1 1 |
     |1 1 1 0 1 |
     |1 1 1 1 1 |
     |1 1 1 1 1 |
     |1 1 1 1 1 |
  

In [58]:
net.calc_output(np.array([1,2,0,2,1]))

array([[7.99999833, 7.99999833, 7.        , 6.99999833, 7.99999833]])

### Fill activation derivatives array

In [59]:
def calc_activations_derivatives(net: Net, target_vector):
    last_layer = net.layers[net.layers_count - 1]
    activation_der_array = []
    last_layer_derivatives_array = []

    for i in range(last_layer.neuron_count):
        der = predict_error_der(last_layer.activations[0], target_vector, i)
        last_layer_derivatives_array.append(der)

    activation_der_array.insert(0, last_layer_derivatives_array)

    # cal derivatives on each layer, except last and first layers
    cur_iteration = 0
    for i in range(net.layers_count - 2, 0, -1):
        cur_layer = net.layers[i]
        layer_derivatives_array = []

        for j in range(cur_layer.neuron_count):
            next_layer_der_array = activation_der_array[0]
            layer_derivatives_array.append(net.der_cost_act(i, j, next_layer_der_array))

        # add derivatives array of the current layer
        activation_der_array.insert(0, layer_derivatives_array)
        cur_iteration += 1

    return activation_der_array

## Net training functions

In [60]:
def get_weight_gradient_matrix(net: Net, layer_idx, activation_der_array):
    assert 0 < layer_idx < net.layers_count
    prev_layer_neuron_count = net.layers[layer_idx - 1].neuron_count
    cur_layer_neuron_count = net.layers[layer_idx].neuron_count

    gradient_matrix = np.zeros((prev_layer_neuron_count, cur_layer_neuron_count))

    for i in range(prev_layer_neuron_count):
        for j in range(cur_layer_neuron_count):
            gradient_matrix[i][j] = net.der_cost_weigh(layer_idx, i, j, activation_der_array[layer_idx - 1][j])
    return gradient_matrix

def get_bias_gradient_matrix(net: Net, layer_idx, activation_der_array):
    assert 0 < layer_idx < net.layers_count
    cur_layer_neuron_count = net.layers[layer_idx].neuron_count
    gradient_vector = np.zeros(cur_layer_neuron_count)

    for j in range(cur_layer_neuron_count):
        gradient_vector[j] = net.der_cost_bias(layer_idx, j, activation_der_array[layer_idx - 1][j])

    return gradient_vector

def step(net: Net, weight_grad_matrices, biases_grad_matrices):
    for i in range(net.layers_count - 1):
        idx = i + 1
        cur_layer = net.layers[idx]
        cur_layer.weights -= weight_grad_matrices[idx - 1]
        cur_layer.biases -= biases_grad_matrices[idx - 1]

def training_iteration(training_data_item, target_vector, net: Net):
    net.calc_output(training_data_item)
    act_der_array = calc_activations_derivatives(net, target_vector)

    weight_grad_matrices = []
    biases_grad_matrices = []

    for i in range(net.layers_count - 1):
        idx = i + 1
        weight_grad_matrices.append(get_weight_gradient_matrix(net, idx, act_der_array))
        biases_grad_matrices.append(get_bias_gradient_matrix(net, idx, act_der_array))
    # change net parameters
    step(net, weight_grad_matrices, biases_grad_matrices)

def calc_metric(net: Net, val_input_vectors, val_target_vectors):
    predict_errors = []

    idx = 0
    for target_vector in val_target_vectors:
        input_vector = val_input_vectors[idx]
        output = net.calc_output(input_vector)
        predict_errors = np.append(predict_errors, predict_error(output, target_vector))

    return np.mean(predict_errors)

# net training on all training data items (returns metric on validation)
def training(net: Net, training_data, target_vectors, val_input_vectors, val_target_vectors):
    for training_item, target_vector in zip(training_data, target_vectors):
        training_iteration(training_item, target_vector, net)
    return calc_metric(net, val_input_vectors, val_target_vectors)

# init weights and biases with start values
def init_net_parameters(net: Net):
    for i in range(net.layers_count - 1):
        idx = i + 1
        w_shape = net.layers[idx].weights.shape
        b_shape = net.layers[idx].biases.shape

        interval = (-0.5, 0.5)
        delta = interval[1] - interval[0]

        net.layers[idx].weights = rand.rand(w_shape[0], w_shape[1]) * delta + interval[0]
        net.layers[idx].biases = rand.rand(b_shape[0], b_shape[1]) * delta + interval[0]

## generation of training data

In [61]:
periods_count = 100
epsilon = 0.1
st_points_count = 10

dst_dir = "../data_sets/"

In [62]:
training_data_file = open(dst_dir + "cos_values_data", "w")

# 1 - write extremum points
training_data_file.write(str(0) + " " + str(1) + "\n")

for i in range(periods_count - 1):
    x = (np.pi / 2) * (i + 1)

    val1 = np.cos(x)
    val2 = np.cos(-x)
    training_data_file.write(str(x) + " " + str(val1) + "\n" + str(-x) + " " + str(val2) + "\n")

# write other points
st_points = np.linspace(epsilon, np.pi / 2 - epsilon, st_points_count)

for st_point in st_points:
    for i in range(periods_count):
        x = st_point + (np.pi / 2) * i
        val1 = np.cos(x)
        val2 = np.cos(-x)
        training_data_file.write(str(x) + " " + str(val1) + "\n" + str(-x) + " " + str(val2) + "\n")

training_data_file.close()

## Load training data

In [63]:
training_data_file = open(dst_dir + "cos_values_data", "r")
training_data = np.array([])
target_vectors = np.array([])
lines = []
items_count = 0

# read lines and shuffle it
for line in training_data_file:
    lines.append(line)

print("before sorting: " + str(lines[0:5]))
lines = sorted(lines, key=lambda l: float(l.split()[0]))
print("after sorting: " + str(lines[0:5]))
print("-------------------------")

# get training data items and target vectors:
for line in lines:
    arg, value = line.split()
    arg = float(arg)
    value = float(value)

    training_data = np.append(training_data, arg)
    target_vectors = np.append(target_vectors, value)
    items_count += 1

# reshape data to array of 1-d vectors
training_data = training_data.reshape((items_count, 1))
target_vectors = target_vectors.reshape((items_count, 1))

print("first 5 training items: " + str(training_data[0:5]))
print("first 5 training target vectors: " + str(target_vectors[0:5]))

before sorting: ['0 1\n', '1.5707963267948966 6.123233995736766e-17\n', '-1.5707963267948966 6.123233995736766e-17\n', '3.141592653589793 -1.0\n', '-3.141592653589793 -1.0\n']
after sorting: ['-156.97963267948967 0.9950041652780265\n', '-156.82732197651245 0.9683381584748817\n', '-156.67501127353523 0.9192515044496723\n', '-156.52270057055804 0.8488807426887266\n', '-156.37038986758083 0.7588552193149422\n']
-------------------------
first 5 training items: [[-156.97963268]
 [-156.82732198]
 [-156.67501127]
 [-156.52270057]
 [-156.37038987]]
first 5 training target vectors: [[0.99500417]
 [0.96833816]
 [0.9192515 ]
 [0.84888074]
 [0.75885522]]


## Split training data on batches

In [64]:
batch_size = 100
data_size = len(training_data)
batch_count = data_size // batch_size
rem = data_size % batch_size
training_data_batches = []
target_vectors_batches = []

if data_size % batch_size != 0:
    batch_count += 1

for i in range(batch_count):
    pos = i * batch_size
    tr_data_batch = []
    t_vectors_batch = []

    if i == batch_count - 1:
        tr_data_batch = training_data[pos:]
        t_vectors_batch = target_vectors[pos:]
    else:
        tr_data_batch = training_data[pos:pos + batch_size]
        t_vectors_batch = target_vectors[pos:pos + batch_size]
    training_data_batches.append(tr_data_batch)
    target_vectors_batches.append(t_vectors_batch)

print("batches' count: " + str(batch_count))
print("---------")
idx = 1
print("training data batch " + str(idx) + ":\n" + str(training_data_batches[idx]))
print("size=" + str(len(training_data_batches[idx])))

print("---------")
print("target vectors batch " + str(idx) + ":\n" + str(target_vectors_batches[idx]))
print("size=" + str(len(target_vectors_batches[idx])))

batches' count: 22
---------
training data batch 1:
[[-142.69015504]
 [-142.53784433]
 [-142.38553363]
 [-142.23322293]
 [-142.08091222]
 [-141.92860152]
 [-141.77629082]
 [-141.62398011]
 [-141.47166941]
 [-141.37166941]
 [-141.27166941]
 [-141.11935871]
 [-140.96704801]
 [-140.8147373 ]
 [-140.6624266 ]
 [-140.5101159 ]
 [-140.35780519]
 [-140.20549449]
 [-140.05318379]
 [-139.90087308]
 [-139.80087308]
 [-139.70087308]
 [-139.54856238]
 [-139.39625168]
 [-139.24394098]
 [-139.09163027]
 [-138.93931957]
 [-138.78700887]
 [-138.63469816]
 [-138.48238746]
 [-138.33007676]
 [-138.23007676]
 [-138.13007676]
 [-137.97776605]
 [-137.82545535]
 [-137.67314465]
 [-137.52083395]
 [-137.36852324]
 [-137.21621254]
 [-137.06390184]
 [-136.91159113]
 [-136.75928043]
 [-136.65928043]
 [-136.55928043]
 [-136.40696973]
 [-136.25465903]
 [-136.10234832]
 [-135.95003762]
 [-135.79772692]
 [-135.64541621]
 [-135.49310551]
 [-135.34079481]
 [-135.1884841 ]
 [-135.0884841 ]
 [-134.9884841 ]
 [-134.836173

# Create net and train it

In [65]:
# net parameters
net = Net()
hidden_layers_size = 10
hidden_layers_count = 3
activation = th

net.insert_layer(0, 1, ident)

for i in range(hidden_layers_count):
    net.insert_layer(i + 1, hidden_layers_size, activation)
net.insert_layer(hidden_layers_count + 1, 1, activation)
#print(net.layers_count)

for i in range(hidden_layers_count):
    idx = i + 1
    cur_layer = net.layers[idx]

    if i == 0:
        cur_layer.weights = np.zeros((1, hidden_layers_size))
    else:
        cur_layer.weights = np.zeros((hidden_layers_size, hidden_layers_size))
    cur_layer.biases = np.zeros((1, hidden_layers_size))

net.init_weights(net.layers_count - 1, np.zeros((hidden_layers_size, 1)))
net.init_biases(net.layers_count - 1, np.zeros((1, 1)))

# init all net parameters with values from standart normal destribution
init_net_parameters(net)

#net.print_net_config()

In [66]:
min_cost = 0.05
saved_nets = []

for i in range(batch_count):
    training_batch = training_data_batches[i]
    target_vectors_batch = target_vectors_batches[i]

    cost = training(net, training_batch, target_vectors_batch, training_data, target_vectors)
    saved_nets.append((copy.deepcopy(net), cost))

    print("epoch " + str(i) + ": " + str(cost))

    if cost <= min_cost:
        break

epoch 0: 0.6397932838694731
epoch 1: 0.9394526132068173
epoch 2: 0.698411583008098
epoch 3: 0.9355425904951062
epoch 4: 0.7859180582560614
epoch 5: 0.8871797430669052
epoch 6: 0.8688935944467461
epoch 7: 0.7003706107269546
epoch 8: 0.901158833364851
epoch 9: 0.6319415395937856
epoch 10: 0.8728017333860119
epoch 11: 0.6333726185163615
epoch 12: 0.7406113482223368
epoch 13: 0.668217485978285
epoch 14: 0.7032543867438426
epoch 15: 0.696823803219905
epoch 16: 0.6939520978505145
epoch 17: 0.6810975354276543
epoch 18: 0.6849934035058579
epoch 19: 0.7335767430840453
epoch 20: 0.636298883441533
epoch 21: 0.6506095069256727


## Get best net config

In [67]:
min = 10000
result = 0

for item in saved_nets:
    if item[1] < min:
        result = item
        min = item[1]

result_net = result[0]

# check net cost function
print("metric of the best net - " + str(calc_metric(result_net, training_data, target_vectors)))

metric of the best net - 0.6319415395937856
