-
Notifications
You must be signed in to change notification settings - Fork 0
/
mlp-R-cppad.cpp
135 lines (126 loc) · 4.17 KB
/
mlp-R-cppad.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include "cppad/cppad.hpp"
#include <iomanip>
#include <math.h>
using namespace std;
using CppAD::AD;
#define N_SAMPLES 4
#define N_IN 2
#define N_OUT 1
#define LAYERS 2
#define ELEMENTS_LAYER1 2
#define ELEMENTS_LAYER2 1
#define ELEMENTS_LAYER_MAX 2
#define WEIGHTS_LAYER_MAX 6
#define WEIGHTS 3
struct w_layer {
int n;
int w;};
AD<double> magnitude_squared(int n_x, AD<double> *x) {
AD<double> r;
r = 0.0;
int j;
for (j = 0; j<n_x; j++) r += x[j]*x[j];
return r;}
void sum_layer_sigmoid(AD<double> *activities, int n_elements, int n_weights,
AD<double> *weights, AD<double> *out) {
int i, j;
for (i = 0; i<n_elements; i++) {
out[i] = weights[i*n_weights];
for (j = 0; j<n_weights-1; j++) {
out[i] += activities[j]*weights[i*n_weights+j+1];}
out[i] = 1.0/(exp(-1.0*out[i])+1.0);}}
void forward_pass(int n_ws_layers, AD<double> *ws_layers, int n_layers_format,
struct w_layer *layers_format, int n_in, AD<double> *in,
int n_out, AD<double> *out) {
int i, j, count;
AD<double> temp_in[ELEMENTS_LAYER_MAX];
AD<double> temp_out[ELEMENTS_LAYER_MAX];
AD<double> temp_weights[WEIGHTS_LAYER_MAX];
count = 0;
for (i = 0; i<n_in; i++) temp_in[i] = in[i];
for (i = 0; i<n_layers_format; i++) {
for (j = 0; j<layers_format[i].n*layers_format[i].w; j++){
temp_weights[j] = ws_layers[count];
count++;}
sum_layer_sigmoid(&temp_in[0], layers_format[i].n, layers_format[i].w,
&temp_weights[0], &temp_out[0]);
for (j = 0; j<layers_format[i].n; j++) temp_in[j] = temp_out[j];}
for (i = 0; i<n_out; i++) out[i] = temp_out[i];}
AD<double> error_on_dataset(int n_ws_layers, AD<double> *ws_layers,
int n_layers_format,
struct w_layer *layers_format) {
double xor_data[N_SAMPLES][N_IN+N_OUT];
int i, j;
AD<double> error;
AD<double> in[N_IN];
AD<double> out[N_OUT], absolute_error[N_OUT];
xor_data[0][0] = 0.0;
xor_data[0][1] = 0.0;
xor_data[0][2] = 0.0;
xor_data[1][0] = 0.0;
xor_data[1][1] = 1.0;
xor_data[1][2] = 1.0;
xor_data[2][0] = 1.0;
xor_data[2][1] = 0.0;
xor_data[2][2] = 1.0;
xor_data[3][0] = 1.0;
xor_data[3][1] = 1.0;
xor_data[3][2] = 0.0;
error = 0.0;
for (i = 0; i<N_SAMPLES; i++) {
for (j = 0; j<N_IN; j++) in[j] = xor_data[i][j];
forward_pass(n_ws_layers, ws_layers, n_layers_format, layers_format, N_IN,
&in[0], N_OUT, &out[0]);
for (j = 0; j<N_OUT; j++) absolute_error[j] = out[j]-xor_data[i][j+N_IN];
error += 0.5*magnitude_squared(N_OUT, &absolute_error[0]);}
return error;}
void weight_gradient(int n_w0, AD<double> *ws, int n_layers,
struct w_layer *w0_format, double *grad_f) {
int i, j;
CppAD::vector< AD<double> > w0(n_w0);
CppAD::vector< AD<double> > error(1);
for (i = 0; i<n_w0; i++) w0[i] = ws[i];
CppAD::Independent(w0);
error[0] = error_on_dataset(n_w0, &w0[0], n_layers, w0_format);
CppAD::ADFun<double> f(w0, error);
CppAD::vector<double> dws(n_w0);
CppAD::vector<double> derror(1);
derror[0] = 1;
dws = f.Reverse(1, derror);
for (i = 0; i<n_w0; i++) grad_f[i] = dws[i];}
void vanilla(int n_w0, AD<double> *ws, int n_layers, struct w_layer *w0_format,
int n, double eta) {
int i, j;
AD<double> error;
double *grad_f;
grad_f = new double[n_w0];
for (i = 0; i<n; i++) {
weight_gradient(n_w0, ws, n_layers, w0_format, grad_f);
for (j = 0; j<n_w0; j++) ws[j] -= eta*grad_f[j];}
error = error_on_dataset(n_w0, &ws[0], n_layers, w0_format);
cout << setprecision(18) << error << endl;
delete[] grad_f;}
int main() {
int i, j, n_xor_ws0;
struct w_layer xor_ws0_format[LAYERS];
xor_ws0_format[0].n = ELEMENTS_LAYER1;
xor_ws0_format[1].n = ELEMENTS_LAYER2;
xor_ws0_format[0].w = WEIGHTS;
xor_ws0_format[1].w = WEIGHTS;
n_xor_ws0 = 0;
for (i = 0; i<LAYERS; i++) {
n_xor_ws0 += xor_ws0_format[i].n*xor_ws0_format[i].w;}
AD<double> *xor_ws0;
xor_ws0 = new AD<double>[n_xor_ws0];
xor_ws0[0] = 0.0;
xor_ws0[1] = -0.284227;
xor_ws0[2] = 1.16054;
xor_ws0[3] = 0.0;
xor_ws0[4] = 0.617194;
xor_ws0[5] = 1.30467;
xor_ws0[6] = 0.0;
xor_ws0[7] = -0.084395;
xor_ws0[8] = 0.648461;
vanilla(n_xor_ws0, xor_ws0, LAYERS, &xor_ws0_format[0], 1000000, 0.3);
delete[] xor_ws0;
return EXIT_SUCCESS;}