/
layer.go
230 lines (211 loc) · 6.29 KB
/
layer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
package neural
import (
"fmt"
"github.com/gonum/matrix/mat64"
"github.com/milosgajdos83/go-neural/pkg/config"
"github.com/milosgajdos83/go-neural/pkg/helpers"
"github.com/milosgajdos83/go-neural/pkg/matrix"
)
const (
// INPUT is input network layer
INPUT LayerKind = iota + 1
// HIDDEN is hidden network layer
HIDDEN
// OUTPUT is output network layer
OUTPUT
)
// ActivFunc defines a neuron activation function
type ActivFunc func(int, int, float64) float64
// activations maps activation function names to their actual implementations
var activations = map[string]map[string]ActivFunc{
"sigmoid": {
"act": matrix.SigmoidMx,
"grad": matrix.SigmoidGradMx,
},
"softmax": {
"act": matrix.ExpMx,
"grad": matrix.SigmoidGradMx,
},
"tanh": {
"act": matrix.TanhMx,
"grad": matrix.TanhGradMx,
},
"relu": {
"act": matrix.ReluMx,
"grad": matrix.ReluGradMx,
},
}
// layerKind maps string representations to LayerKind
var layerKind = map[string]LayerKind{
"input": INPUT,
"hidden": HIDDEN,
"output": OUTPUT,
}
// LayerKind defines type of neural network layer
// There are three kinds available: INPUT, HIDDEN and OUTPUT
type LayerKind uint
// String implements Stringer interface for nice LayerKind printing
func (l LayerKind) String() string {
switch l {
case INPUT:
return "INPUT"
case HIDDEN:
return "HIDDEN"
case OUTPUT:
return "OUTPUT"
default:
return "UNKNOWN"
}
}
// Layer represents a Neural Network layer.
type Layer struct {
// id is Layer unique identifier within network
id string
// kind is layer kind: input, hidden or output
kind LayerKind
// weights matrix holds layer neuron weights per row
weights *mat64.Dense
// deltas matrix holds output deltas used for backprop
deltas *mat64.Dense
// act is neuron activation function
act ActivFunc
// actGrad is derivation of neuron activation function
actGrad ActivFunc
// meta contains layer metadata: currently only info about OUT ActFn
meta string
}
// NewLayer creates a new neural network layer and returns it.
// Layer weights are initialized to uniformly distributed random values (-1,1)
// NewLayer fails with error if the neural network supplied as a parameter does not exist.
func NewLayer(c *config.LayerConfig, layerIn int) (*Layer, error) {
// layer in must be positive integer
if layerIn <= 0 {
return nil, fmt.Errorf("Layer input must be positive integer: %d\n", layerIn)
}
// layer size must be positive integer
if c.Size <= 0 {
return nil, fmt.Errorf("Layer size must be positive integer: %d\n", c.Size)
}
// Layer kind must be valid
if _, ok := layerKind[c.Kind]; !ok {
return nil, fmt.Errorf("Invalid layer kind requested: %s", c.Kind)
}
layer := &Layer{}
layer.id = helpers.PseudoRandString(10)
layer.kind = layerKind[c.Kind]
// INPUT layer has neither weights matrix nor activation funcs
if layer.kind != INPUT {
// Set activation function
activFunc, ok := activations[c.NeurFn.Activation]
if !ok {
return nil, fmt.Errorf("Unsupported activation function: %s\n",
c.NeurFn.Activation)
}
// set activation functions
layer.act = activFunc["act"]
// if tanh - needs to be rescaled if used in OUTPUT layer
if c.NeurFn.Activation == "tanh" {
if layer.kind == OUTPUT {
layer.act = matrix.TanhOutMx
}
}
layer.actGrad = activFunc["grad"]
layer.meta = c.NeurFn.Activation
layerOut := c.Size
// initialize weights to random values
var err error
layer.weights, err = matrix.MakeRandMx(layerOut, layerIn+1, 0.0, 1.0)
if err != nil {
return nil, err
}
// initializes deltas to zero values
layer.deltas = mat64.NewDense(layerOut, layerIn+1, nil)
}
return layer, nil
}
// ID returns layer id
func (l Layer) ID() string {
return l.id
}
// Kind returns layer kind
func (l Layer) Kind() LayerKind {
return l.kind
}
// Weights returns layer's eights matrix
func (l *Layer) Weights() *mat64.Dense {
return l.weights
}
// SetWeights allows to set neural network layer weights.
// It fails with error if either the supplied weights have different dimensions
// than the existing layer weights or if the passed in weights matrix is nil
// or if the layer is an INPUT layer: INPUT layer has no weights matrix.
func (l *Layer) SetWeights(w *mat64.Dense) error {
// INPUT layer has no weights
if l.kind == INPUT {
return fmt.Errorf("Can't set weights matrix of %s layer\n", l.kind)
}
// we can't set weights to nil
if w == nil {
return fmt.Errorf("Network weights can't be nil")
}
// weights dimensions must stay the same
wr, wc := w.Dims()
lr, lc := l.weights.Dims()
if wr != lr || wc != lc {
return fmt.Errorf("Dimension mismatch. Current: %d x %d Supplied: %d x %d\n",
lr, lc, wr, wc)
}
l.weights = w
// We must re-allocate deltas too
deltas := mat64.NewDense(wr, wc, nil)
l.deltas = deltas
return nil
}
// Deltas returns layer's output deltas matrix
// Deltas matrix is initialized to zeros and is only non-zero if the back propagation
// algorithm has been run.
func (l *Layer) Deltas() *mat64.Dense {
return l.deltas
}
// FwdOut calculates forward output of the network layer for given input.
// If the layer is an INPUT layer, it returns the matrix supplied as an argument.
func (l *Layer) FwdOut(inputMx mat64.Matrix) (mat64.Matrix, error) {
// if input is nil, return error
if inputMx == nil {
return nil, fmt.Errorf("Cant calculate output for: %v\n", inputMx)
}
// if it's INPUT layer, output is input
if l.kind == INPUT {
return inputMx, nil
}
// input column dimensions + bias must match the weights column dimensions
inRows, inCols := inputMx.Dims()
_, wCols := l.weights.Dims()
if inCols+1 != wCols {
return nil, fmt.Errorf("Dimension mismatch. Weight: %d, Input: %d\n", wCols, inCols)
}
// add bias to input
biasInMx := matrix.AddBias(inputMx)
// calculate activation function inputs
out := new(mat64.Dense)
out.Mul(biasInMx, l.weights.T())
// activate layer neurons
out.Apply(l.act, out)
if l.meta == "softmax" {
rowSums := matrix.RowSums(out)
for i := 0; i < inRows; i++ {
rowVec := out.RowView(i)
rowVec.ScaleVec(1/rowSums[i], rowVec)
out.SetRow(i, rowVec.RawVector().Data)
}
}
return out, nil
}
// ActFn returns layer activation function
func (l Layer) ActFn() func(int, int, float64) float64 {
return l.act
}
// ActGrad returns layer gradient activation function
func (l Layer) ActGrad() func(int, int, float64) float64 {
return l.actGrad
}