diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b639545..2a616136 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ string(REGEX REPLACE "^ | $" "" LIBS "${LIBS}") # tests enable_testing() -foreach(execid mnist network_save network_sync) +foreach(execid mnist network_save network_sync set_activation_function) add_executable(test_${execid} src/tests/test_${execid}.f90) target_link_libraries(test_${execid} neural ${LIBS}) add_test(test_${execid} bin/test_${execid}) diff --git a/README.md b/README.md index 4055cde5..6fd3b5df 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,8 @@ cmake .. -DCMAKE_BUILD_TYPE=debug ### Creating a network -Creating a network with 3 layers (one hidden layer) +Creating a network with 3 layers, +one input, one hidden, and one output layer, with 3, 5, and 2 neurons each: ```fortran @@ -127,8 +128,10 @@ type(network_type) :: net net = network_type([3, 5, 2]) ``` +### Setting the activation function + By default, the network will be initialized with the sigmoid activation -function. You can specify a different activation function: +function for all layers. You can specify a different activation function: ```fortran net = network_type([3, 5, 2], activation='tanh') @@ -141,6 +144,16 @@ net = network_type([3, 5, 2]) call net % set_activation('tanh') ``` +It's possible to set different activation functions for each layer. +For example, this snippet will create a network with a Gaussian +activation functions for all layers except the output layer, +and a RELU function for the output layer: + +```fortran +net = network_type([3, 5, 2], activation='gaussian') +call net % layers(3) % set_activation('relu') +``` + Available activation function options are: `gaussian`, `relu`, `sigmoid`, `step`, and `tanh`. See [mod_activation.f90](https://github.com/modern-fortran/neural-fortran/blob/master/src/lib/mod_activation.f90) diff --git a/src/lib/mod_activation.f90 b/src/lib/mod_activation.f90 index 54127217..083c6022 100644 --- a/src/lib/mod_activation.f90 +++ b/src/lib/mod_activation.f90 @@ -8,12 +8,21 @@ module mod_activation private + public :: activation_function public :: gaussian, gaussian_prime public :: relu, relu_prime public :: sigmoid, sigmoid_prime public :: step, step_prime public :: tanhf, tanh_prime + interface + pure function activation_function(x) + import :: rk + real(rk), intent(in) :: x(:) + real(rk) :: activation_function(size(x)) + end function activation_function + end interface + contains pure function gaussian(x) result(res) diff --git a/src/lib/mod_layer.f90 b/src/lib/mod_layer.f90 index a5a4a19b..06d9a568 100644 --- a/src/lib/mod_layer.f90 +++ b/src/lib/mod_layer.f90 @@ -2,6 +2,7 @@ module mod_layer ! Defines the layer type and its methods. + use mod_activation use mod_kinds, only: ik, rk use mod_random, only: randn @@ -15,6 +16,10 @@ module mod_layer real(rk), allocatable :: b(:) ! biases real(rk), allocatable :: w(:,:) ! weights real(rk), allocatable :: z(:) ! arg. to activation function + procedure(activation_function), pointer, nopass :: activation => null() + procedure(activation_function), pointer, nopass :: activation_prime => null() + contains + procedure, public, pass(self) :: set_activation end type layer_type type :: array1d @@ -110,4 +115,32 @@ subroutine dw_co_sum(dw) end do end subroutine dw_co_sum + pure subroutine set_activation(self, activation) + ! Sets the activation function. Input string must match one of + ! provided activation functions, otherwise it defaults to sigmoid. + ! If activation not present, defaults to sigmoid. + class(layer_type), intent(in out) :: self + character(len=*), intent(in) :: activation + select case(trim(activation)) + case('gaussian') + self % activation => gaussian + self % activation_prime => gaussian_prime + case('relu') + self % activation => relu + self % activation_prime => relu_prime + case('sigmoid') + self % activation => sigmoid + self % activation_prime => sigmoid_prime + case('step') + self % activation => step + self % activation_prime => step_prime + case('tanh') + self % activation => tanhf + self % activation_prime => tanh_prime + case default + self % activation => sigmoid + self % activation_prime => sigmoid_prime + end select + end subroutine set_activation + end module mod_layer diff --git a/src/lib/mod_network.f90 b/src/lib/mod_network.f90 index dfc966df..4d3d7aa0 100644 --- a/src/lib/mod_network.f90 +++ b/src/lib/mod_network.f90 @@ -1,10 +1,5 @@ module mod_network - use mod_activation, only: gaussian, gaussian_prime,& - relu, relu_prime,& - sigmoid, sigmoid_prime,& - step, step_prime,& - tanhf, tanh_prime use mod_kinds, only: ik, rk use mod_layer, only: array1d, array2d, db_init, dw_init,& db_co_sum, dw_co_sum, layer_type @@ -19,8 +14,6 @@ module mod_network type(layer_type), allocatable :: layers(:) integer, allocatable :: dims(:) - procedure(activation_function), pointer, nopass :: activation => null() - procedure(activation_function), pointer, nopass :: activation_prime => null() contains @@ -46,14 +39,6 @@ module mod_network module procedure :: net_constructor endinterface network_type - interface - pure function activation_function(x) - import :: rk - real(rk), intent(in) :: x(:) - real(rk) :: activation_function(size(x)) - end function activation_function - end interface - contains type(network_type) function net_constructor(dims, activation) result(net) @@ -102,13 +87,13 @@ pure subroutine backprop(self, y, dw, db) call dw_init(dw, dims) n = size(dims) - db(n) % array = (layers(n) % a - y) * self % activation_prime(layers(n) % z) + db(n) % array = (layers(n) % a - y) * self % layers(n) % activation_prime(layers(n) % z) dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),& reshape(db(n) % array, [1, dims(n)])) do n = size(dims) - 1, 2, -1 db(n) % array = matmul(layers(n) % w, db(n+1) % array)& - * self % activation_prime(layers(n) % z) + * self % layers(n) % activation_prime(layers(n) % z) dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),& reshape(db(n) % array, [1, dims(n)])) end do @@ -127,7 +112,7 @@ pure subroutine fwdprop(self, x) layers(1) % a = x do n = 2, size(layers) layers(n) % z = matmul(transpose(layers(n-1) % w), layers(n-1) % a) + layers(n) % b - layers(n) % a = self % activation(layers(n) % z) + layers(n) % a = self % layers(n) % activation(layers(n) % z) end do end associate end subroutine fwdprop @@ -181,9 +166,9 @@ pure function output(self, x) result(a) real(rk), allocatable :: a(:) integer(ik) :: n associate(layers => self % layers) - a = self % activation(matmul(transpose(layers(1) % w), x) + layers(2) % b) + a = self % layers(2) % activation(matmul(transpose(layers(1) % w), x) + layers(2) % b) do n = 3, size(layers) - a = self % activation(matmul(transpose(layers(n-1) % w), a) + layers(n) % b) + a = self % layers(n) % activation(matmul(transpose(layers(n-1) % w), a) + layers(n) % b) end do end associate end function output @@ -206,31 +191,15 @@ subroutine save(self, filename) end subroutine save pure subroutine set_activation(self, activation) - ! Sets the activation functions. Input string must match one of - ! provided activation functions, otherwise it defaults to sigmoid. - ! If activation not present, defaults to sigmoid. + ! A thin wrapper around layer % set_activation(). + ! This method can be used to set an activation function + ! for all layers at once. class(network_type), intent(in out) :: self character(len=*), intent(in) :: activation - select case(trim(activation)) - case('gaussian') - self % activation => gaussian - self % activation_prime => gaussian_prime - case('relu') - self % activation => relu - self % activation_prime => relu_prime - case('sigmoid') - self % activation => sigmoid - self % activation_prime => sigmoid_prime - case('step') - self % activation => step - self % activation_prime => step_prime - case('tanh') - self % activation => tanhf - self % activation_prime => tanh_prime - case default - self % activation => sigmoid - self % activation_prime => sigmoid_prime - end select + integer :: n + do concurrent(n = 1:size(self % layers)) + call self % layers(n) % set_activation(activation) + end do end subroutine set_activation subroutine sync(self, image) diff --git a/src/tests/test_set_activation_function.f90 b/src/tests/test_set_activation_function.f90 new file mode 100644 index 00000000..479055f0 --- /dev/null +++ b/src/tests/test_set_activation_function.f90 @@ -0,0 +1,63 @@ +program test_set_activation_function + + ! This program will test whether per-network and per-layer + ! setting of activation functions works as expected. + ! First we create an array of random variables. + ! Then we set different activation functions to different + ! layers in the network. + ! Finally, we test whether each function produces same + ! values as the activation functions set in the layers. + + use mod_activation + use mod_network, only: network_type + use mod_random, only: randn + + implicit none + type(network_type) :: net + real, allocatable :: x(:) + integer :: n + logical, allocatable :: tests(:) + + tests = [logical ::] + + x = randn(100) + + ! the network will be created with + ! sigmoid activation functions for all layers + net = network_type([1, 1, 1, 1, 1]) + + do n = 1, size(net % layers) + tests = [tests, all(sigmoid(x) == net % layers(n) % activation(x))] + tests = [tests, all(sigmoid_prime(x) == net % layers(n) % activation_prime(x))] + end do + + ! now set the various functions for other layers + call net % layers(2) % set_activation('gaussian') + call net % layers(3) % set_activation('step') + call net % layers(4) % set_activation('tanh') + call net % layers(5) % set_activation('relu') + + tests = [tests, all(sigmoid(x) == net % layers(1) % activation(x))] + tests = [tests, all(sigmoid_prime(x) == net % layers(1) % activation_prime(x))] + + tests = [tests, all(gaussian(x) == net % layers(2) % activation(x))] + tests = [tests, all(gaussian_prime(x) == net % layers(2) % activation_prime(x))] + + tests = [tests, all(step(x) == net % layers(3) % activation(x))] + tests = [tests, all(step_prime(x) == net % layers(3) % activation_prime(x))] + + tests = [tests, all(tanhf(x) == net % layers(4) % activation(x))] + tests = [tests, all(tanh_prime(x) == net % layers(4) % activation_prime(x))] + + tests = [tests, all(relu(x) == net % layers(5) % activation(x))] + tests = [tests, all(relu_prime(x) == net % layers(5) % activation_prime(x))] + + print *, tests + + if (all(tests)) then + print *, 'All tests passed.' + else + error stop 'some tests failed.' + end if + +end program test_set_activation_function