modern-fortran · milancurcic · May 3, 2019 · Feb 20, 2019 · Feb 20, 2019 · Feb 20, 2019
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -72,7 +72,7 @@ string(REGEX REPLACE "^ | $" "" LIBS "${LIBS}")
 
 # tests
 enable_testing()
-foreach(execid mnist network_save network_sync)
+foreach(execid mnist network_save network_sync set_activation_function)
   add_executable(test_${execid} src/tests/test_${execid}.f90)
   target_link_libraries(test_${execid} neural ${LIBS})
   add_test(test_${execid} bin/test_${execid})

diff --git a/README.md b/README.md
@@ -118,7 +118,8 @@ cmake .. -DCMAKE_BUILD_TYPE=debug
 
 ### Creating a network
 
-Creating a network with 3 layers (one hidden layer)
+Creating a network with 3 layers,
+one input, one hidden, and one output layer,
 with 3, 5, and 2 neurons each:
 
 ```fortran
@@ -127,8 +128,10 @@ type(network_type) :: net
 net = network_type([3, 5, 2])
 ```
 
+### Setting the activation function
+
 By default, the network will be initialized with the sigmoid activation
-function. You can specify a different activation function:
+function for all layers. You can specify a different activation function:
 
 ```fortran
 net = network_type([3, 5, 2], activation='tanh')
@@ -141,6 +144,16 @@ net = network_type([3, 5, 2])
 call net % set_activation('tanh')
 ```
 
+It's possible to set different activation functions for each layer.
+For example, this snippet will create a network with a Gaussian
+activation functions for all layers except the output layer,
+and a RELU function for the output layer:
+
+```fortran
+net = network_type([3, 5, 2], activation='gaussian')
+call net % layers(3) % set_activation('relu')
+```
+
 Available activation function options are: `gaussian`, `relu`, `sigmoid`,
 `step`, and `tanh`.
 See [mod_activation.f90](https://github.com/modern-fortran/neural-fortran/blob/master/src/lib/mod_activation.f90)

diff --git a/src/lib/mod_activation.f90 b/src/lib/mod_activation.f90
@@ -8,12 +8,21 @@ module mod_activation
 
   private
 
+  public :: activation_function
   public :: gaussian, gaussian_prime
   public :: relu, relu_prime
   public :: sigmoid, sigmoid_prime
   public :: step, step_prime
   public :: tanhf, tanh_prime
 
+  interface
+    pure function activation_function(x)
+      import :: rk
+      real(rk), intent(in) :: x(:)
+      real(rk) :: activation_function(size(x))
+    end function activation_function
+  end interface
+
 contains
 
   pure function gaussian(x) result(res)

diff --git a/src/lib/mod_layer.f90 b/src/lib/mod_layer.f90
@@ -2,6 +2,7 @@ module mod_layer
 
   ! Defines the layer type and its methods.
 
+  use mod_activation
   use mod_kinds, only: ik, rk
   use mod_random, only: randn
 
@@ -15,6 +16,10 @@ module mod_layer
     real(rk), allocatable :: b(:) ! biases
     real(rk), allocatable :: w(:,:) ! weights
     real(rk), allocatable :: z(:) ! arg. to activation function
+    procedure(activation_function), pointer, nopass :: activation => null()
+    procedure(activation_function), pointer, nopass :: activation_prime => null()
+  contains
+    procedure, public, pass(self) :: set_activation
   end type layer_type
 
   type :: array1d
@@ -110,4 +115,32 @@ subroutine dw_co_sum(dw)
     end do
   end subroutine dw_co_sum
 
+  pure subroutine set_activation(self, activation)
+    ! Sets the activation function. Input string must match one of
+    ! provided activation functions, otherwise it defaults to sigmoid.
+    ! If activation not present, defaults to sigmoid.
+    class(layer_type), intent(in out) :: self
+    character(len=*), intent(in) :: activation
+    select case(trim(activation))
+      case('gaussian')
+        self % activation => gaussian
+        self % activation_prime => gaussian_prime
+      case('relu')
+        self % activation => relu
+        self % activation_prime => relu_prime
+      case('sigmoid')
+        self % activation => sigmoid
+        self % activation_prime => sigmoid_prime
+      case('step')
+        self % activation => step
+        self % activation_prime => step_prime
+      case('tanh')
+        self % activation => tanhf
+        self % activation_prime => tanh_prime
+      case default
+        self % activation => sigmoid
+        self % activation_prime => sigmoid_prime
+    end select
+  end subroutine set_activation
+
 end module mod_layer
diff --git a/src/lib/mod_network.f90 b/src/lib/mod_network.f90
@@ -1,10 +1,5 @@
 module mod_network
 
-  use mod_activation, only: gaussian, gaussian_prime,&
-                            relu, relu_prime,&
-                            sigmoid, sigmoid_prime,&
-                            step, step_prime,&
-                            tanhf, tanh_prime
   use mod_kinds, only: ik, rk
   use mod_layer, only: array1d, array2d, db_init, dw_init,&
                        db_co_sum, dw_co_sum, layer_type
@@ -19,8 +14,6 @@ module mod_network
 
     type(layer_type), allocatable :: layers(:)
     integer, allocatable :: dims(:)
-    procedure(activation_function), pointer, nopass :: activation => null()
-    procedure(activation_function), pointer, nopass :: activation_prime => null()
 
   contains
 
@@ -46,14 +39,6 @@ module mod_network
     module procedure :: net_constructor
   endinterface network_type
 
-  interface
-    pure function activation_function(x)
-      import :: rk
-      real(rk), intent(in) :: x(:)
-      real(rk) :: activation_function(size(x))
-    end function activation_function
-  end interface
-
 contains
 
   type(network_type) function net_constructor(dims, activation) result(net)
@@ -102,13 +87,13 @@ pure subroutine backprop(self, y, dw, db)
       call dw_init(dw, dims)
 
       n = size(dims)
-      db(n) % array = (layers(n) % a - y) * self % activation_prime(layers(n) % z)
+      db(n) % array = (layers(n) % a - y) * self % layers(n) % activation_prime(layers(n) % z)
       dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),&
                                reshape(db(n) % array, [1, dims(n)]))
 
       do n = size(dims) - 1, 2, -1
         db(n) % array = matmul(layers(n) % w, db(n+1) % array)&
-                      * self % activation_prime(layers(n) % z)
+                      * self % layers(n) % activation_prime(layers(n) % z)
         dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),&
                                  reshape(db(n) % array, [1, dims(n)]))
       end do
@@ -127,7 +112,7 @@ pure subroutine fwdprop(self, x)
       layers(1) % a = x
       do n = 2, size(layers)
         layers(n) % z = matmul(transpose(layers(n-1) % w), layers(n-1) % a) + layers(n) % b
-        layers(n) % a = self % activation(layers(n) % z)
+        layers(n) % a = self % layers(n) % activation(layers(n) % z)
       end do
     end associate
   end subroutine fwdprop
@@ -181,9 +166,9 @@ pure function output(self, x) result(a)
     real(rk), allocatable :: a(:)
     integer(ik) :: n
     associate(layers => self % layers)
-      a = self % activation(matmul(transpose(layers(1) % w), x) + layers(2) % b)
+      a = self % layers(2) % activation(matmul(transpose(layers(1) % w), x) + layers(2) % b)
       do n = 3, size(layers)
-        a = self % activation(matmul(transpose(layers(n-1) % w), a) + layers(n) % b)
+        a = self % layers(n) % activation(matmul(transpose(layers(n-1) % w), a) + layers(n) % b)
       end do
     end associate
   end function output
@@ -206,31 +191,15 @@ subroutine save(self, filename)
   end subroutine save
 
   pure subroutine set_activation(self, activation)
-    ! Sets the activation functions. Input string must match one of
-    ! provided activation functions, otherwise it defaults to sigmoid.
-    ! If activation not present, defaults to sigmoid.
+    ! A thin wrapper around layer % set_activation().
+    ! This method can be used to set an activation function
+    ! for all layers at once. 
     class(network_type), intent(in out) :: self
     character(len=*), intent(in) :: activation
-    select case(trim(activation))
-      case('gaussian')
-        self % activation => gaussian
-        self % activation_prime => gaussian_prime
-      case('relu')
-        self % activation => relu
-        self % activation_prime => relu_prime
-      case('sigmoid')
-        self % activation => sigmoid
-        self % activation_prime => sigmoid_prime
-      case('step')
-        self % activation => step
-        self % activation_prime => step_prime
-      case('tanh')
-        self % activation => tanhf
-        self % activation_prime => tanh_prime
-      case default
-        self % activation => sigmoid
-        self % activation_prime => sigmoid_prime
-    end select
+    integer :: n
+    do concurrent(n = 1:size(self % layers))
+      call self % layers(n) % set_activation(activation)
+    end do
   end subroutine set_activation
 
   subroutine sync(self, image)

diff --git a/src/tests/test_set_activation_function.f90 b/src/tests/test_set_activation_function.f90
@@ -0,0 +1,63 @@
+program test_set_activation_function
+
+  ! This program will test whether per-network and per-layer
+  ! setting of activation functions works as expected.
+  ! First we create an array of random variables.
+  ! Then we set different activation functions to different 
+  ! layers in the network. 
+  ! Finally, we test whether each function produces same 
+  ! values as the activation functions set in the layers. 
+
+  use mod_activation
+  use mod_network, only: network_type
+  use mod_random, only: randn
+
+  implicit none
+  type(network_type) :: net
+  real, allocatable :: x(:)
+  integer :: n
+  logical, allocatable :: tests(:)
+
+  tests = [logical ::]
+
+  x = randn(100)
+
+  ! the network will be created with 
+  ! sigmoid activation functions for all layers
+  net = network_type([1, 1, 1, 1, 1])
+
+  do n = 1, size(net % layers)
+    tests = [tests, all(sigmoid(x) == net % layers(n) % activation(x))]
+    tests = [tests, all(sigmoid_prime(x) == net % layers(n) % activation_prime(x))]
+  end do
+
+  ! now set the various functions for other layers
+  call net % layers(2) % set_activation('gaussian')
+  call net % layers(3) % set_activation('step')
+  call net % layers(4) % set_activation('tanh')
+  call net % layers(5) % set_activation('relu')
+
+  tests = [tests, all(sigmoid(x) == net % layers(1) % activation(x))]
+  tests = [tests, all(sigmoid_prime(x) == net % layers(1) % activation_prime(x))]
+
+  tests = [tests, all(gaussian(x) == net % layers(2) % activation(x))]
+  tests = [tests, all(gaussian_prime(x) == net % layers(2) % activation_prime(x))]
+
+  tests = [tests, all(step(x) == net % layers(3) % activation(x))]
+  tests = [tests, all(step_prime(x) == net % layers(3) % activation_prime(x))]
+
+  tests = [tests, all(tanhf(x) == net % layers(4) % activation(x))]
+  tests = [tests, all(tanh_prime(x) == net % layers(4) % activation_prime(x))]
+
+  tests = [tests, all(relu(x) == net % layers(5) % activation(x))]
+  tests = [tests, all(relu_prime(x) == net % layers(5) % activation_prime(x))]
+
+  print *, tests
+
+  if (all(tests)) then
+    print *, 'All tests passed.'
+  else
+    error stop 'some tests failed.'
+  end if
+
+end program test_set_activation_function