In [145]:
import copy

from TPNN.tools.perceptron import*
import numpy as np
import numpy.random as rand
import plotly.express as px
import plotly.graph_objects as go

### Test net

In [146]:
net = Net()
net.insert_layer(0, 2, ident)
net.insert_layer(1, 5, sigmoid)
net.insert_layer(2, 1, ident)
print(net.layers_count)
net.init_weights(1, np.array([[1,1,1,1,1], [1,1,1,1,1]]))
net.init_weights(2, np.array([[1,1,1,1,1]]).transpose())
net.init_biases(2, np.array([[0]]))
net.init_biases(1, np.array([[1,1,1,1,1]]))

net.print_net_config()

3
layers=3
[0]
 size=2
 act_function=<function ident at 0x05F014A8>
 activations=[[0. 0.]]
 biases=[[0. 0.]]
 weights:
     empty
--------------------------
[1]
 size=5
 act_function=<function sigmoid at 0x0B9627C0>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[1 1 1 1 1]]
 weights:
     |1 1 1 1 1 |
     |1 1 1 1 1 |

--------------------------
[2]
 size=1
 act_function=<function ident at 0x05F014A8>
 activations=[[0.]]
 biases=[[0]]
 weights:
     |1 |
     |1 |
     |1 |
     |1 |
     |1 |

--------------------------


In [147]:
net.calc_output(np.array([1, 1]))

array([[4.76287063]])

In [148]:
net = Net()
net.insert_layer(0, 5, ident)
net.insert_layer(1, 7, sigmoid)
net.insert_layer(2, 7, th)
net.insert_layer(3, 5, ident)

net.init_weights(1, np.array([[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7]]))
net.init_weights(2, np.array([[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,-4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7]]))
net.init_weights(3, np.array([[1,1,0,1,1],[1,1,1,1,1],[1,1,1,0,1],[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1],[1,1,1,1,1]]))
net.init_biases(3, np.array([[1,1,1,1,1]]))

net.print_net_config()

layers=4
[0]
 size=5
 act_function=<function ident at 0x05F014A8>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0.]]
 weights:
     empty
--------------------------
[1]
 size=7
 act_function=<function sigmoid at 0x0B9627C0>
 activations=[[0. 0. 0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0. 0. 0.]]
 weights:
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |

--------------------------
[2]
 size=7
 act_function=<function th at 0x0B962808>
 activations=[[0. 0. 0. 0. 0. 0. 0.]]
 biases=[[0. 0. 0. 0. 0. 0. 0.]]
 weights:
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 -4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |
     |1 2 3 4 5 6 7 |

--------------------------
[3]
 size=5
 act_function=<function ident at 0x05F014A8>
 activations=[[0. 0. 0. 0. 0.]]
 biases=[[1 1 1 1 1]]
 weights:
     |1 1 0 1 1 |
     |1 1 1 1 1 |
     |1 1 1 0 1 |
     |1 1 1 1 1 |
     |1 1 1 1 1 |
     |1 1 1 1 1 |
  

In [149]:
net.calc_output(np.array([1,2,0,2,1]))

array([[7.99999833, 7.99999833, 7.        , 6.99999833, 7.99999833]])

### Fill activation derivatives array

In [150]:
def calc_activations_derivatives(net: Net, target_vector):
    assert target_vector.shape[0] == 1

    last_layer = net.layers[net.layers_count - 1]
    activation_der_array = []
    last_layer_derivatives_array = []

    for i in range(last_layer.neuron_count):
        der = predict_error_der(last_layer.activations[0], target_vector, i)
        last_layer_derivatives_array.append(der)

    activation_der_array.insert(0, last_layer_derivatives_array)

    # cal derivatives on each layer, except last and first layers
    for i in range(net.layers_count - 2, 0, -1):
        cur_layer = net.layers[i]
        layer_derivatives_array = []

        for j in range(cur_layer.neuron_count):
            next_layer_der_array = activation_der_array[0]
            layer_derivatives_array.append(net.der_cost_act(i, j, next_layer_der_array))

        # add derivatives array of the current layer
        activation_der_array.insert(0, layer_derivatives_array)

    return activation_der_array

## Net training functions

In [151]:
def get_weight_gradient_matrix(net: Net, layer_idx, activation_der_array):
    assert 0 < layer_idx < net.layers_count
    prev_layer_neuron_count = net.layers[layer_idx - 1].neuron_count
    cur_layer_neuron_count = net.layers[layer_idx].neuron_count

    gradient_matrix = np.zeros((prev_layer_neuron_count, cur_layer_neuron_count))

    for i in range(prev_layer_neuron_count):
        for j in range(cur_layer_neuron_count):
            gradient_matrix[i][j] = net.der_cost_weigh(layer_idx, i, j, activation_der_array[layer_idx - 1][j])
    return gradient_matrix

def get_bias_gradient_matrix(net: Net, layer_idx, activation_der_array):
    assert 0 < layer_idx < net.layers_count
    cur_layer_neuron_count = net.layers[layer_idx].neuron_count
    gradient_vector = [np.zeros(cur_layer_neuron_count)]

    for j in range(cur_layer_neuron_count):
        gradient_vector[0][j] = net.der_cost_bias(layer_idx, j, activation_der_array[layer_idx - 1][j])

    return gradient_vector

def step(net: Net, optimizer, weight_grad_matrices, biases_grad_matrices):
    learning_rate = 0.0001
    coefficient = optimizer.get_next_coefficient(weight_grad_matrices, biases_grad_matrices)

    l = lambda x: x * learning_rate * coefficient
    f = np.vectorize(l)

    for i in range(net.layers_count - 1):
        idx = i + 1
        cur_layer = net.layers[idx]
        cur_layer.weights -= f(weight_grad_matrices[idx - 1])
        cur_layer.biases -= f(biases_grad_matrices[idx - 1])
    # return gradient norm
    return get_norm(weight_grad_matrices, biases_grad_matrices)

def training_iteration(training_data_item, target_vector, net: Net, optimizer):
    net.calc_output(training_data_item)
    act_der_array = calc_activations_derivatives(net, target_vector)

    weight_grad_matrices = []
    biases_grad_matrices = []

    for i in range(net.layers_count - 1):
        idx = i + 1
        weight_grad_matrices.append(get_weight_gradient_matrix(net, idx, act_der_array))
        biases_grad_matrices.append(get_bias_gradient_matrix(net, idx, act_der_array))
    # change net parameters and return gradient norm
    return step(net, optimizer, weight_grad_matrices, biases_grad_matrices)

def calc_metric(net: Net, val_input_vectors, val_target_vectors):
    predict_errors = []

    idx = 0
    for target_vector in val_target_vectors:
        input_vector = val_input_vectors[idx]
        output = net.calc_output(input_vector)
        predict_errors = np.append(predict_errors, predict_error(output, target_vector))
        idx += 1

    return np.mean(predict_errors)

# net training on all training data items (returns metric on validation)
def training(net: Net, optimizer, gradient_change_history: list, training_data, target_vectors, val_input_vectors, val_target_vectors):
    for training_item, target_vector in zip(training_data, target_vectors):
        grad_norm = training_iteration(training_item, target_vector, net, optimizer)
        gradient_change_history.append(grad_norm)

    return calc_metric(net, val_input_vectors, val_target_vectors)

# init weights and biases with start values
def init_net_parameters(net: Net):
    for i in range(net.layers_count - 1):
        idx = i + 1
        w_shape = net.layers[idx].weights.shape
        b_shape = net.layers[idx].biases.shape

        interval = (-0.8, 0.8)
        delta = interval[1] - interval[0]

        net.layers[idx].weights = rand.rand(w_shape[0], w_shape[1]) * delta + interval[0]
        net.layers[idx].biases = rand.rand(b_shape[0], b_shape[1]) * delta + interval[0]

## generation of training data

In [152]:
periods_count = 2
epsilon = 0.00001
st_points_count = 1000

dst_dir = "../data_sets/"

In [153]:
training_data_file = open(dst_dir + "cos_values_data", "w")

# 1 - write extremum points
training_data_file.write(str(0) + " " + str(1) + "\n")

for i in range(periods_count - 1):
    x = (np.pi / 2) * (i + 1)

    val1 = np.cos(x)
    val2 = np.cos(-x)
    training_data_file.write(str(x) + " " + str(val1) + "\n" + str(-x) + " " + str(val2) + "\n")

# write other points
st_points = np.linspace(epsilon, np.pi / 2 - epsilon, st_points_count)

for st_point in st_points:
    for i in range(periods_count):
        x = st_point + (np.pi / 2) * i
        val1 = np.cos(x)
        val2 = np.cos(-x)
        training_data_file.write(str(x) + " " + str(val1) + "\n" + str(-x) + " " + str(val2) + "\n")

training_data_file.close()

## Load training data

In [154]:
training_data_file = open(dst_dir + "cos_values_data", "r")
training_data = np.array([])
target_vectors = np.array([])
lines = []
items_count = 0

# read lines and shuffle it
for line in training_data_file:
    lines.append(line)

print("before sorting: " + str(lines[0:5]))
lines = sorted(lines, key=lambda l: float(l.split()[0]))
print("after sorting: " + str(lines[0:5]))
print("-------------------------")

# get training data items and target vectors:
for line in lines:
    arg, value = line.split()
    arg = float(arg)
    value = float(value)

    training_data = np.append(training_data, arg)
    target_vectors = np.append(target_vectors, value)
    items_count += 1

# reshape data to array of 1-d vectors
training_data = training_data.reshape((items_count, 1))
target_vectors = target_vectors.reshape((items_count, 1))

print("first 5 training items: \n" + str(training_data[0:5]))
print("first 5 training target vectors: \n" + str(target_vectors[0:5]))

before sorting: ['0 1\n', '1.5707963267948966 6.123233995736766e-17\n', '-1.5707963267948966 6.123233995736766e-17\n', '1e-05 0.99999999995\n', '-1e-05 0.99999999995\n']
after sorting: ['-3.141582653589793 -0.99999999995\n', '-3.1400103049143224 -0.9999987480865958\n', '-3.1384379562388522 -0.9999950239464388\n', '-3.136865607563382 -0.9999888275387362\n', '-3.135293258887912 -0.999980158878807\n']
-------------------------
first 5 training items: 
[[-3.14158265]
 [-3.1400103 ]
 [-3.13843796]
 [-3.13686561]
 [-3.13529326]]
first 5 training target vectors: 
[[-1.        ]
 [-0.99999875]
 [-0.99999502]
 [-0.99998883]
 [-0.99998016]]


## Split training data on batches

In [155]:
batch_size = 100
data_size = len(training_data)
batch_count = data_size // batch_size
rem = data_size % batch_size
training_data_batches = []
target_vectors_batches = []

if data_size % batch_size != 0:
    batch_count += 1

for i in range(batch_count):
    pos = i * batch_size
    tr_data_batch = []
    t_vectors_batch = []

    if i == batch_count - 1:
        tr_data_batch = training_data[pos:]
        t_vectors_batch = target_vectors[pos:]
    else:
        tr_data_batch = training_data[pos:pos + batch_size]
        t_vectors_batch = target_vectors[pos:pos + batch_size]
    training_data_batches.append(tr_data_batch)
    target_vectors_batches.append(t_vectors_batch)

print("batches' count: " + str(batch_count))
print("---------")
idx = 1
print("training data batch " + str(idx) + ":\n" + str(training_data_batches[idx]))
print("size=" + str(len(training_data_batches[idx])))

print("---------")
print("target vectors batch " + str(idx) + ":\n" + str(target_vectors_batches[idx]))
print("size=" + str(len(target_vectors_batches[idx])))

batches' count: 41
---------
training data batch 1:
[[-2.98434779]
 [-2.98277544]
 [-2.98120309]
 [-2.97963074]
 [-2.97805839]
 [-2.97648604]
 [-2.97491369]
 [-2.97334135]
 [-2.971769  ]
 [-2.97019665]
 [-2.9686243 ]
 [-2.96705195]
 [-2.9654796 ]
 [-2.96390725]
 [-2.9623349 ]
 [-2.96076256]
 [-2.95919021]
 [-2.95761786]
 [-2.95604551]
 [-2.95447316]
 [-2.95290081]
 [-2.95132846]
 [-2.94975612]
 [-2.94818377]
 [-2.94661142]
 [-2.94503907]
 [-2.94346672]
 [-2.94189437]
 [-2.94032202]
 [-2.93874967]
 [-2.93717733]
 [-2.93560498]
 [-2.93403263]
 [-2.93246028]
 [-2.93088793]
 [-2.92931558]
 [-2.92774323]
 [-2.92617089]
 [-2.92459854]
 [-2.92302619]
 [-2.92145384]
 [-2.91988149]
 [-2.91830914]
 [-2.91673679]
 [-2.91516444]
 [-2.9135921 ]
 [-2.91201975]
 [-2.9104474 ]
 [-2.90887505]
 [-2.9073027 ]
 [-2.90573035]
 [-2.904158  ]
 [-2.90258565]
 [-2.90101331]
 [-2.89944096]
 [-2.89786861]
 [-2.89629626]
 [-2.89472391]
 [-2.89315156]
 [-2.89157921]
 [-2.89000687]
 [-2.88843452]
 [-2.88686217]
 [-

# Create net and train it

In [156]:
print_flag = False
# net parameters
net = Net()
hidden_layers_size = 10
hidden_layers_count = 2
activation = th

net.insert_layer(0, 1, ident)

for i in range(hidden_layers_count):
    net.insert_layer(i + 1, hidden_layers_size, activation)
net.insert_layer(hidden_layers_count + 1, 1, ident)
#print(net.layers_count)

for i in range(hidden_layers_count):
    idx = i + 1
    cur_layer = net.layers[idx]

    if i == 0:
        cur_layer.weights = np.zeros((1, hidden_layers_size))
    else:
        cur_layer.weights = np.zeros((hidden_layers_size, hidden_layers_size))
    cur_layer.biases = np.zeros((1, hidden_layers_size))

net.init_weights(net.layers_count - 1, np.zeros((hidden_layers_size, 1)))
net.init_biases(net.layers_count - 1, np.zeros((1, 1)))

# init all net parameters with values from standart normal destribution
init_net_parameters(net)

if print_flag:
    net.print_net_config()

In [157]:
min_cost = 0.05
saved_nets = []
optimizer = Adam()
grad_change_history = []

for i in range(batch_count):
    training_batch = training_data_batches[i]
    target_vectors_batch = target_vectors_batches[i]

    cost = training(net, optimizer, grad_change_history, training_batch, target_vectors_batch, training_data, target_vectors)
    saved_nets.append((copy.deepcopy(net), cost))

    print("epoch " + str(i) + ": " + str(cost))

    if cost <= min_cost:
        break

epoch 0: 0.7998041372863636
epoch 1: 0.7339307231578642
epoch 2: 0.6793693856776035
epoch 3: 0.6371806759713707
epoch 4: 0.6092125726214529
epoch 5: 0.5919018778488913
epoch 6: 0.5813295558559957
epoch 7: 0.575074595424393
epoch 8: 0.5715440332562358
epoch 9: 0.5696871843045146
epoch 10: 0.568825104973956
epoch 11: 0.5685617879309549
epoch 12: 0.5685597381347711
epoch 13: 0.5689142613367201
epoch 14: 0.5698976593963266
epoch 15: 0.571248084854155
epoch 16: 0.5727600779993103
epoch 17: 0.5743182164058607
epoch 18: 0.5758823632008206
epoch 19: 0.5774644378478103
epoch 20: 0.5790612686107566
epoch 21: 0.5805708461609945
epoch 22: 0.5817556428237723
epoch 23: 0.5823293045426402
epoch 24: 0.5823181222251792
epoch 25: 0.5806678554994635
epoch 26: 0.5767077681346174
epoch 27: 0.5711568773777068
epoch 28: 0.5644127949433445
epoch 29: 0.5569455461056869
epoch 30: 0.5494051455337892
epoch 31: 0.5423818188170144
epoch 32: 0.536597478796291
epoch 33: 0.5326948352699669
epoch 34: 0.5311457599803527

## Plot gradient change history

In [158]:
x = np.arange(1, len(training_data) + 1)
y = np.array(grad_change_history)

fig = px.line(x=x, y=y, labels={'x':'training iteration', 'y':'grad norm'})
fig.show()

## Get best net config

In [159]:
min = 10000
result = 0

for item in saved_nets:
    if item[1] < min:
        result = item
        min = item[1]

result_net = result[0]

# check net cost function
print("metric of the best net - " + str(calc_metric(result_net, training_data, target_vectors)))

if print_flag:
    result_net.print_net_config()

metric of the best net - 0.5311457599803527


## Plot actual cos values and predicted cos values

In [160]:
comparison_graphic = go.Figure()

x = training_data.reshape((1, data_size))[0]
actual_y = np.cos(x)
predicted_y = []

for item in training_data:
    predicted_y.append(result_net.calc_output([item])[0][0])
predicted_y = np.array(predicted_y)

comparison_graphic.add_trace(go.Scatter(x=x, y=actual_y,
                    mode='lines',
                    name='actual graphic',))

comparison_graphic.add_trace(go.Scatter(x=x, y=predicted_y,
                    mode='lines',
                    name='predicted',))
comparison_graphic.show()