From 17ee4bf995589390eaa651ad86e29b27390c0574 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Wed, 20 Feb 2019 16:50:49 -0500 Subject: [PATCH 1/7] place activation interface in activation module --- src/lib/mod_activation.f90 | 9 +++++++++ src/lib/mod_network.f90 | 11 ++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/lib/mod_activation.f90 b/src/lib/mod_activation.f90 index 54127217..083c6022 100644 --- a/src/lib/mod_activation.f90 +++ b/src/lib/mod_activation.f90 @@ -8,12 +8,21 @@ module mod_activation private + public :: activation_function public :: gaussian, gaussian_prime public :: relu, relu_prime public :: sigmoid, sigmoid_prime public :: step, step_prime public :: tanhf, tanh_prime + interface + pure function activation_function(x) + import :: rk + real(rk), intent(in) :: x(:) + real(rk) :: activation_function(size(x)) + end function activation_function + end interface + contains pure function gaussian(x) result(res) diff --git a/src/lib/mod_network.f90 b/src/lib/mod_network.f90 index dfc966df..b104c81c 100644 --- a/src/lib/mod_network.f90 +++ b/src/lib/mod_network.f90 @@ -1,6 +1,7 @@ module mod_network - use mod_activation, only: gaussian, gaussian_prime,& + use mod_activation, only: activation_function,& + gaussian, gaussian_prime,& relu, relu_prime,& sigmoid, sigmoid_prime,& step, step_prime,& @@ -46,14 +47,6 @@ module mod_network module procedure :: net_constructor endinterface network_type - interface - pure function activation_function(x) - import :: rk - real(rk), intent(in) :: x(:) - real(rk) :: activation_function(size(x)) - end function activation_function - end interface - contains type(network_type) function net_constructor(dims, activation) result(net) From 434c5e295a7441a1c5dbe5fdf4a9ed7ee03c6f8e Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Wed, 20 Feb 2019 16:51:27 -0500 Subject: [PATCH 2/7] add activation function component to layer type; not used just yet --- src/lib/mod_layer.f90 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib/mod_layer.f90 b/src/lib/mod_layer.f90 index a5a4a19b..92386e0e 100644 --- a/src/lib/mod_layer.f90 +++ b/src/lib/mod_layer.f90 @@ -2,6 +2,7 @@ module mod_layer ! Defines the layer type and its methods. + use mod_activation, only: activation_function use mod_kinds, only: ik, rk use mod_random, only: randn @@ -15,6 +16,8 @@ module mod_layer real(rk), allocatable :: b(:) ! biases real(rk), allocatable :: w(:,:) ! weights real(rk), allocatable :: z(:) ! arg. to activation function + procedure(activation_function), pointer, nopass :: activation => null() + procedure(activation_function), pointer, nopass :: activation_prime => null() end type layer_type type :: array1d From 16c50ff199832277ca9b50e905142938fe970bc3 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Wed, 20 Feb 2019 17:02:35 -0500 Subject: [PATCH 3/7] add set_activation method to layer class; not used just yet --- src/lib/mod_layer.f90 | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/lib/mod_layer.f90 b/src/lib/mod_layer.f90 index 92386e0e..06d9a568 100644 --- a/src/lib/mod_layer.f90 +++ b/src/lib/mod_layer.f90 @@ -2,7 +2,7 @@ module mod_layer ! Defines the layer type and its methods. - use mod_activation, only: activation_function + use mod_activation use mod_kinds, only: ik, rk use mod_random, only: randn @@ -18,6 +18,8 @@ module mod_layer real(rk), allocatable :: z(:) ! arg. to activation function procedure(activation_function), pointer, nopass :: activation => null() procedure(activation_function), pointer, nopass :: activation_prime => null() + contains + procedure, public, pass(self) :: set_activation end type layer_type type :: array1d @@ -113,4 +115,32 @@ subroutine dw_co_sum(dw) end do end subroutine dw_co_sum + pure subroutine set_activation(self, activation) + ! Sets the activation function. Input string must match one of + ! provided activation functions, otherwise it defaults to sigmoid. + ! If activation not present, defaults to sigmoid. + class(layer_type), intent(in out) :: self + character(len=*), intent(in) :: activation + select case(trim(activation)) + case('gaussian') + self % activation => gaussian + self % activation_prime => gaussian_prime + case('relu') + self % activation => relu + self % activation_prime => relu_prime + case('sigmoid') + self % activation => sigmoid + self % activation_prime => sigmoid_prime + case('step') + self % activation => step + self % activation_prime => step_prime + case('tanh') + self % activation => tanhf + self % activation_prime => tanh_prime + case default + self % activation => sigmoid + self % activation_prime => sigmoid_prime + end select + end subroutine set_activation + end module mod_layer From 69ec56390919903103bff777a10e0c4bf2f01f6e Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Wed, 20 Feb 2019 17:07:58 -0500 Subject: [PATCH 4/7] call set_activation for each layer --- src/lib/mod_network.f90 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lib/mod_network.f90 b/src/lib/mod_network.f90 index b104c81c..30e29f18 100644 --- a/src/lib/mod_network.f90 +++ b/src/lib/mod_network.f90 @@ -204,6 +204,8 @@ pure subroutine set_activation(self, activation) ! If activation not present, defaults to sigmoid. class(network_type), intent(in out) :: self character(len=*), intent(in) :: activation + integer :: n + select case(trim(activation)) case('gaussian') self % activation => gaussian @@ -224,6 +226,11 @@ pure subroutine set_activation(self, activation) self % activation => sigmoid self % activation_prime => sigmoid_prime end select + + do concurrent(n = 1:size(self % layers)) + call self % layers(n) % set_activation(activation) + end do + end subroutine set_activation subroutine sync(self, image) From 9b43a1b79d9a69b330e0d59eec7d7db780cf2379 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Wed, 24 Apr 2019 14:09:35 -0400 Subject: [PATCH 5/7] switch to per-layer activation functions; tests are TODO --- src/lib/mod_network.f90 | 47 +++++++---------------------------------- 1 file changed, 8 insertions(+), 39 deletions(-) diff --git a/src/lib/mod_network.f90 b/src/lib/mod_network.f90 index 30e29f18..4d3d7aa0 100644 --- a/src/lib/mod_network.f90 +++ b/src/lib/mod_network.f90 @@ -1,11 +1,5 @@ module mod_network - use mod_activation, only: activation_function,& - gaussian, gaussian_prime,& - relu, relu_prime,& - sigmoid, sigmoid_prime,& - step, step_prime,& - tanhf, tanh_prime use mod_kinds, only: ik, rk use mod_layer, only: array1d, array2d, db_init, dw_init,& db_co_sum, dw_co_sum, layer_type @@ -20,8 +14,6 @@ module mod_network type(layer_type), allocatable :: layers(:) integer, allocatable :: dims(:) - procedure(activation_function), pointer, nopass :: activation => null() - procedure(activation_function), pointer, nopass :: activation_prime => null() contains @@ -95,13 +87,13 @@ pure subroutine backprop(self, y, dw, db) call dw_init(dw, dims) n = size(dims) - db(n) % array = (layers(n) % a - y) * self % activation_prime(layers(n) % z) + db(n) % array = (layers(n) % a - y) * self % layers(n) % activation_prime(layers(n) % z) dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),& reshape(db(n) % array, [1, dims(n)])) do n = size(dims) - 1, 2, -1 db(n) % array = matmul(layers(n) % w, db(n+1) % array)& - * self % activation_prime(layers(n) % z) + * self % layers(n) % activation_prime(layers(n) % z) dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),& reshape(db(n) % array, [1, dims(n)])) end do @@ -120,7 +112,7 @@ pure subroutine fwdprop(self, x) layers(1) % a = x do n = 2, size(layers) layers(n) % z = matmul(transpose(layers(n-1) % w), layers(n-1) % a) + layers(n) % b - layers(n) % a = self % activation(layers(n) % z) + layers(n) % a = self % layers(n) % activation(layers(n) % z) end do end associate end subroutine fwdprop @@ -174,9 +166,9 @@ pure function output(self, x) result(a) real(rk), allocatable :: a(:) integer(ik) :: n associate(layers => self % layers) - a = self % activation(matmul(transpose(layers(1) % w), x) + layers(2) % b) + a = self % layers(2) % activation(matmul(transpose(layers(1) % w), x) + layers(2) % b) do n = 3, size(layers) - a = self % activation(matmul(transpose(layers(n-1) % w), a) + layers(n) % b) + a = self % layers(n) % activation(matmul(transpose(layers(n-1) % w), a) + layers(n) % b) end do end associate end function output @@ -199,38 +191,15 @@ subroutine save(self, filename) end subroutine save pure subroutine set_activation(self, activation) - ! Sets the activation functions. Input string must match one of - ! provided activation functions, otherwise it defaults to sigmoid. - ! If activation not present, defaults to sigmoid. + ! A thin wrapper around layer % set_activation(). + ! This method can be used to set an activation function + ! for all layers at once. class(network_type), intent(in out) :: self character(len=*), intent(in) :: activation integer :: n - - select case(trim(activation)) - case('gaussian') - self % activation => gaussian - self % activation_prime => gaussian_prime - case('relu') - self % activation => relu - self % activation_prime => relu_prime - case('sigmoid') - self % activation => sigmoid - self % activation_prime => sigmoid_prime - case('step') - self % activation => step - self % activation_prime => step_prime - case('tanh') - self % activation => tanhf - self % activation_prime => tanh_prime - case default - self % activation => sigmoid - self % activation_prime => sigmoid_prime - end select - do concurrent(n = 1:size(self % layers)) call self % layers(n) % set_activation(activation) end do - end subroutine set_activation subroutine sync(self, image) From 0f1946c5a3ca67017b5b783b62de671a11618690 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Fri, 3 May 2019 16:54:06 -0400 Subject: [PATCH 6/7] testing activation functions per-layer --- CMakeLists.txt | 2 +- src/tests/test_set_activation_function.f90 | 63 ++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 src/tests/test_set_activation_function.f90 diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b639545..2a616136 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ string(REGEX REPLACE "^ | $" "" LIBS "${LIBS}") # tests enable_testing() -foreach(execid mnist network_save network_sync) +foreach(execid mnist network_save network_sync set_activation_function) add_executable(test_${execid} src/tests/test_${execid}.f90) target_link_libraries(test_${execid} neural ${LIBS}) add_test(test_${execid} bin/test_${execid}) diff --git a/src/tests/test_set_activation_function.f90 b/src/tests/test_set_activation_function.f90 new file mode 100644 index 00000000..479055f0 --- /dev/null +++ b/src/tests/test_set_activation_function.f90 @@ -0,0 +1,63 @@ +program test_set_activation_function + + ! This program will test whether per-network and per-layer + ! setting of activation functions works as expected. + ! First we create an array of random variables. + ! Then we set different activation functions to different + ! layers in the network. + ! Finally, we test whether each function produces same + ! values as the activation functions set in the layers. + + use mod_activation + use mod_network, only: network_type + use mod_random, only: randn + + implicit none + type(network_type) :: net + real, allocatable :: x(:) + integer :: n + logical, allocatable :: tests(:) + + tests = [logical ::] + + x = randn(100) + + ! the network will be created with + ! sigmoid activation functions for all layers + net = network_type([1, 1, 1, 1, 1]) + + do n = 1, size(net % layers) + tests = [tests, all(sigmoid(x) == net % layers(n) % activation(x))] + tests = [tests, all(sigmoid_prime(x) == net % layers(n) % activation_prime(x))] + end do + + ! now set the various functions for other layers + call net % layers(2) % set_activation('gaussian') + call net % layers(3) % set_activation('step') + call net % layers(4) % set_activation('tanh') + call net % layers(5) % set_activation('relu') + + tests = [tests, all(sigmoid(x) == net % layers(1) % activation(x))] + tests = [tests, all(sigmoid_prime(x) == net % layers(1) % activation_prime(x))] + + tests = [tests, all(gaussian(x) == net % layers(2) % activation(x))] + tests = [tests, all(gaussian_prime(x) == net % layers(2) % activation_prime(x))] + + tests = [tests, all(step(x) == net % layers(3) % activation(x))] + tests = [tests, all(step_prime(x) == net % layers(3) % activation_prime(x))] + + tests = [tests, all(tanhf(x) == net % layers(4) % activation(x))] + tests = [tests, all(tanh_prime(x) == net % layers(4) % activation_prime(x))] + + tests = [tests, all(relu(x) == net % layers(5) % activation(x))] + tests = [tests, all(relu_prime(x) == net % layers(5) % activation_prime(x))] + + print *, tests + + if (all(tests)) then + print *, 'All tests passed.' + else + error stop 'some tests failed.' + end if + +end program test_set_activation_function From ea475426b53b17ec8438989c6b07f1a28b528dac Mon Sep 17 00:00:00 2001 From: milancurcic Date: Fri, 3 May 2019 16:54:25 -0400 Subject: [PATCH 7/7] example on setting activation for a single layer --- README.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4055cde5..6fd3b5df 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,8 @@ cmake .. -DCMAKE_BUILD_TYPE=debug ### Creating a network -Creating a network with 3 layers (one hidden layer) +Creating a network with 3 layers, +one input, one hidden, and one output layer, with 3, 5, and 2 neurons each: ```fortran @@ -127,8 +128,10 @@ type(network_type) :: net net = network_type([3, 5, 2]) ``` +### Setting the activation function + By default, the network will be initialized with the sigmoid activation -function. You can specify a different activation function: +function for all layers. You can specify a different activation function: ```fortran net = network_type([3, 5, 2], activation='tanh') @@ -141,6 +144,16 @@ net = network_type([3, 5, 2]) call net % set_activation('tanh') ``` +It's possible to set different activation functions for each layer. +For example, this snippet will create a network with a Gaussian +activation functions for all layers except the output layer, +and a RELU function for the output layer: + +```fortran +net = network_type([3, 5, 2], activation='gaussian') +call net % layers(3) % set_activation('relu') +``` + Available activation function options are: `gaussian`, `relu`, `sigmoid`, `step`, and `tanh`. See [mod_activation.f90](https://github.com/modern-fortran/neural-fortran/blob/master/src/lib/mod_activation.f90)