# [DFA Preliminaries](./DFASlide.pdf)

In [10]:
logis(x) = 1 ./ (1 .+ exp(-x))
dlogis(x) = logis(x) .* (1 .- logis(x))



dlogis (generic function with 1 method)

In [11]:
inp = [1 1 ; 0 1 ; 1 0 ; 0 0]'

2×4 Array{Int64,2}:
 1  0  1  0
 1  1  0  0

In [12]:
out = [0 1 1 0]

1×4 Array{Int64,2}:
 0  1  1  0

In [13]:
y=zeros(size(out))

1×4 Array{Float64,2}:
 0.0  0.0  0.0  0.0

In [14]:
n_hidden = 10; # Number of hidden units
num_iterations = 1000; #Number of learning steps
trials = 4000;

In [15]:
w1 = randn(n_hidden,size(inp,1))

10×2 Array{Float64,2}:
 -0.0137424   0.0793102
 -1.21523     1.3835   
 -1.00232    -0.26536  
 -1.53517     1.13683  
  1.98658    -0.633892 
  0.703696   -0.915932 
  2.12164    -0.691851 
 -0.425324    1.19837  
 -0.596488    0.959852 
 -1.93161    -0.490768 

In [16]:
w2 = randn(n_hidden,n_hidden);

In [17]:
w3 = randn(size(out,1),n_hidden)

1×10 Array{Float64,2}:
 0.27572  -0.798905  -0.110264  0.742285  …  0.287355  0.704697  1.25104

In [18]:
logis(w3 * (logis(w2 * (logis(w1 * inp)))))

1×4 Array{Float64,2}:
 0.463856  0.405243  0.511414  0.43549

In [19]:
bp_w1=w1
bp_w2=w2
bp_w3=w3

1×10 Array{Float64,2}:
 0.27572  -0.798905  -0.110264  0.742285  …  0.287355  0.704697  1.25104

In [20]:
B1=rand(n_hidden,1)
B2=rand(n_hidden,1)

10×1 Array{Float64,2}:
 0.935558  
 0.737461  
 0.880536  
 0.465581  
 0.54508   
 0.415001  
 0.899647  
 0.297021  
 0.451022  
 0.00329869

In [21]:
a1 = w1 * inp
z1 = logis(a1)
a2 = w2 * z1
z2 = logis(a2)
ay = w3 * z2
y .= logis(ay)

1×4 Array{Float64,2}:
 0.463856  0.405243  0.511414  0.43549

In [22]:
err=y-out

1×4 Array{Float64,2}:
 0.463856  -0.594757  -0.488586  0.43549

In [23]:
d_a1 = (B1.*err) .* dlogis(a1)

10×4 Array{Float64,2}:
 0.0403464  -0.0517065  -0.0425411  0.0379198
 0.0333898  -0.0276407  -0.0249974  0.0315704
 0.0750384  -0.137889   -0.0905644  0.102752 
 0.0882721  -0.0866041  -0.0564141  0.0862052
 0.0547345  -0.0973745  -0.037464   0.0787044
 0.0785708  -0.0831821  -0.0741327  0.0745998
 0.0198132  -0.0362435  -0.0127977  0.0298424
 0.020952   -0.0221367  -0.0244128  0.0227588
 0.0981744  -0.104184   -0.0978908  0.0952468
 0.0343408  -0.138569   -0.0534299  0.107695 

In [24]:
d_a2 = (B2*err) .* dlogis(a2)

10×4 Array{Float64,2}:
 0.108459     -0.13461      -0.114242    0.0986703  
 0.0852576    -0.0968743    -0.0837592   0.050092   
 0.0475307    -0.112345     -0.0772581   0.0957494  
 0.00591929   -0.00707256   -0.00786305  0.00584865 
 0.0606902    -0.0361952    -0.0658159   0.0409281  
 0.0476881    -0.0573742    -0.0506336   0.0430163  
 0.0503498    -0.0900813    -0.0624694   0.0756741  
 0.0254395    -0.0205611    -0.0346512   0.0209015  
 0.0393832    -0.0477032    -0.0493248   0.0440145  
 0.000173004  -0.000395432  -0.00022703  0.000311786

In [25]:
dw1 = -d_a1 * inp'

10×2 Array{Float64,2}:
  0.00219468    0.0113601 
 -0.00839245   -0.00574915
  0.015526      0.062851  
 -0.0318579    -0.00166797
 -0.0172706     0.04264   
 -0.0044381     0.00461132
 -0.00701551    0.0164304 
  0.00346084    0.00118474
 -0.000283543   0.00600959
  0.0190891     0.104228  

In [26]:
dw2 = -d_a2 * z1'

10×10 Array{Float64,2}:
  0.0213589     0.0256469     0.0159405    …   0.0245977     0.00740025 
  0.0228769     0.0253653     0.0207628        0.0244603     0.015393   
  0.0243436     0.0338657     0.0111906        0.0327629     0.00068993 
  0.00159999    0.00132137    0.000956408      0.00149081    0.000274143
 -0.000306893  -0.00935912   -0.000413927     -0.00671561   -0.00333497 
  0.00883332    0.0101039     0.00651439   …   0.00983204    0.00280065 
  0.0140089     0.0211912     0.00697496       0.0197868     0.000172239
  0.00430723    0.000128909   0.00218918       0.00171751   -0.000330402
  0.00694566    0.00607386    0.00329042       0.00677375   -0.000859621
  7.30575e-5    0.000118452   3.86959e-5       0.000108623   8.89646e-6 

In [27]:
dw3 = -err * z2'

1×10 Array{Float64,2}:
 0.106774  0.137502  0.0274758  …  0.0709085  0.0385583  0.0503939

In [28]:
w1 = w1 + dw1
w2 = w2 + dw2
w3 = w3 + dw3

1×10 Array{Float64,2}:
 0.382494  -0.661403  -0.082788  0.749372  …  0.358264  0.743256  1.30143

In [29]:
bp_a1 = bp_w1*inp
bp_z1 = logis(bp_a1)
bp_a2 = bp_w2 * bp_z1
bp_z2 = logis(bp_a2)
bp_ay = bp_w3 * bp_z2
bp_y = logis(bp_ay)
bp_err = bp_y - out

1×4 Array{Float64,2}:
 0.463856  -0.594757  -0.488586  0.43549

In [30]:
bp_d3 = (bp_err) .* dlogis(bp_ay)

1×4 Array{Float64,2}:
 0.115358  -0.143349  -0.122083  0.10706

In [31]:
bp_d2 = (bp_w3' * bp_d3) .* dlogis(bp_a2)

10×4 Array{Float64,2}:
  0.0079493   -0.00956158  -0.00841273   0.00714882
 -0.0229696    0.0252941    0.0226726   -0.0133406 
 -0.00148021   0.00339074   0.00241737  -0.00294762
  0.00234698  -0.00271774  -0.00313242   0.00229235
 -0.0362565    0.020956     0.0395046   -0.0241699 
  0.0364077   -0.0424513   -0.0388393    0.032464  
 -0.00374663   0.00649632   0.00467045  -0.00556641
  0.00612077  -0.00479438  -0.00837653   0.00497116
  0.0153032   -0.0179642   -0.0192568    0.0169064 
  0.0163173   -0.0361455   -0.0215142    0.0290693 

In [32]:
bp_d1 = (bp_w2' * bp_d2) .* dlogis(bp_a1)

10×4 Array{Float64,2}:
 -0.0113463     0.0150518    0.012862     -0.0116155 
  0.00542559   -0.00269393  -0.00444425    0.00442898
  0.00117183   -0.0032103   -0.00259902    0.00668089
 -0.0154322     0.00634901   0.0115385    -0.0138948 
 -0.00762883    0.0153349    0.00699668   -0.017875  
 -0.00346226    0.00187734   0.00138402    0.00162124
  0.00954023   -0.00692292  -0.00650987    0.00932828
  0.000173172   0.00208529   0.000866066  -0.00305784
 -0.0156408     0.010318     0.0167327    -0.0124204 
 -0.00451475    0.00717038   0.00698325   -0.00630138

In [140]:
bp_w1 .-= (inp * bp_d1')'
bp_w2 .-= (z1 * bp_d2')'
bp_w3 .-= (z2 * bp_d3')'

1×10 Array{Float64,2}:
 -0.380241  0.449824  -0.0540714  …  -0.799494  -2.06867  -0.959109

In [None]:
e_store(ii,jj) = sum(abs(err))
bp_e_store(ii,jj) = sum(abs(bp_error))

In [46]:
nprocs()==1 && addprocs()

@everywhere logis(x) = 1 ./ (1 .+ exp(-x))
@everywhere dlogis(x) = logis(x) .* (1 .- logis(x))

@everywhere function dfa!(w1,w2,w3,B1,B2,inp,out)
    a1 = w1 * inp
    z1 = logis(a1)
    a2 = w2 * z1
    z2 = logis(a2)
    ay = w3 * z2
    y = logis(ay)
    err = y - out
    d_a1 = (B1.*err) .* dlogis(a1) 
    d_a2 = (B2.*err) .* dlogis(a2) 
    w1 .-= d_a1 * inp'
    w2 .-= d_a2 * z1'
    w3 .-= err * z2'
    return Dict("output"=>y,"err"=>err)
end

@everywhere function bp!(bp_w1,bp_w2,bp_w3,inp,out)
    bp_a1 = bp_w1*inp
    bp_z1 = logis(bp_a1)
    bp_a2 = bp_w2 * bp_z1
    bp_z2 = logis(bp_a2)
    bp_ay = bp_w3 * bp_z2
    bp_y = logis(bp_ay)
    bp_err = bp_y - out
    bp_d_a3 = (bp_err) .* dlogis(bp_ay)
    bp_d_a2 = (bp_w3' * bp_d_a3) .* dlogis(bp_a2)
    bp_d_a1 = (bp_w2' * bp_d_a2) .* dlogis(bp_a1)
    bp_w1 .-= bp_d_a1 * inp'
    bp_w2 .-= bp_d_a2 * bp_z1'
    bp_w3 .-= bp_d_a3 * bp_z2'
    return Dict("output"=>bp_y,"err"=>bp_err)
end



In [47]:
inp = [1 1 ; 0 1 ; 1 0 ; 0 0]'
out = [0 1 1 0]
y=zeros(size(out))
n_hidden = 15; # Number of hidden units
num_iterations = 1000; #Number of learning steps
trials = 50;

e_store = SharedArray(Float64,(num_iterations,trials))
bp_e_store = SharedArray(Float64,(num_iterations,trials))
y_store = SharedArray(Float64,(size(out,2),num_iterations))
bp_y_store = SharedArray(Float64,(size(out,2),num_iterations))

@elapsed @sync @parallel for jj = 1:trials
    w1 = randn(n_hidden,size(inp,1))
    w2 = randn(n_hidden,n_hidden)
    w3 = randn(size(out,1),n_hidden)
          
    B1=rand(n_hidden,1)
    B2=rand(n_hidden,1)    
    
    bp_w1 = randn(n_hidden,size(inp,1))
    bp_w2 = randn(n_hidden,n_hidden)
    bp_w3 = randn(size(out,1),n_hidden) 
    
    for ii = 1:num_iterations       
        dfa_res = dfa!(w1,w2,w3,B1,B2,inp,out)       
        bp_res = bp!(bp_w1,bp_w2,bp_w3,inp,out)       
        if jj == 1
            y_store[:,ii] = dfa_res["output"]'
            bp_y_store[:,ii] = bp_res["output"]'
        end       
        e_store[ii,jj] = sum(abs(dfa_res["err"]))
        bp_e_store[ii,jj] = sum(abs(bp_res["err"]))
    end
end

2.195718059

In [48]:
using Plots
l = @layout [
    a b
    c d
]

gr()
p1=plot(y_store',label=["1,1" "0,1" "1,0" "0,0"],title="DFA")
p2=plot(bp_y_store',label=["1,1" "0,1" "1,0" "0,0"],title="BP")
p3=plot([e_store[:,1] bp_e_store[:,1]],label=["DFA" "BP"],title="First Trial")
p4=plot([mean(e_store,2) mean(bp_e_store,2)],label=["DFA" "BP"],title="Mean Error")
plot(p1,p2,p3,p4,layout=l)