In [2]:
using Distributions
using DiffBase
using ForwardDiff
using PyPlot
using Optim
# === Upgrading from ForwardDiff v0.1 to v0.2
# instead of ForwardDiff.GradientNumber and ForwardDiff.HessianNumber, 
# we will use ForwardDiff.Dual

convert(::Type{Float64}, x::ForwardDiff.Dual) = Float64(x.value)
function convert(::Array{Float64}, x::Array{ForwardDiff.Dual}) 
    y = zeros(size(x)); 
    for i in 1:prod(size(x)) 
        y[i] = convert(Float64, x[i]) 
    end
    return y
end

    write(Base.IO, ForwardDiff.Partials) at /Users/msyoon/.julia/v0.4/ForwardDiff/src/partials.jl:57
is ambiguous with: 
    write(Base.Base64.Base64EncodePipe, AbstractArray{UInt8, 1}) at base64.jl:89.
To fix, define 
    write(Base.Base64.Base64EncodePipe, ForwardDiff.Partials{N<:Any, UInt8})
before the new definition.


convert (generic function with 2 methods)

In [3]:
using MAT
ratdata = matread("bbparsed_S135.mat")

Dict{ASCIIString,Any} with 1 entry:
  "newdata" => 32346x6 Array{Float64,2}:…

In [4]:
size(ratdata["newdata"])

(32346,6)

In [5]:
## DATA
nRs = ratdata["newdata"][:,1]
nLs = ratdata["newdata"][:,2]

a = ratdata["newdata"][:,3] # inverse - correct(hit)
hit = zeros(length(a))
chooseR = zeros(length(a))

hit[a.==0] = 1
chooseR[((nRs.>nLs) & (hit.==1)) | ((nRs.<nLs) & (hit.==0))]=1


1

In [6]:
"""
Takes params
    beta1 = params[1]; 
    beta2 = params[2]; lapse = params[3];

Returns the log likelihood 
"""
function Loglike_vec(params::Vector, nR, nL, wr)    
    beta1 = params[1]; beta2 = params[2]; lapse = params[3];

    ntrials = length(nR)
    
    sigma_R = beta1*nR + beta2
    sigma_L = beta1*nL + beta2
    
    pright = zeros(typeof(lapse),ntrials)
    for i=1:length(ntrials)
        pright[i] = 1 - cdf(Normal(nR[i]-nL[i],sqrt(sigma_R[i]^2+sigma_L[i]^2)),0)
    end
    
    pright = (1-lapse/2).*pright + (lapse/2).*(1-pright)

    likelihoods = zeros(typeof(lapse),ntrials)
    likelihoods[wr.==1] = pright[wr.==1]
    likelihoods[wr.==0] = 1-pright[wr.==0]
    
    LL = sum(log(pright[wr.==1])) + sum(log(1-pright[wr.==0]))
    LL = -LL # for minimization
    
    return LL, likelihoods
end

Loglike_vec (generic function with 1 method)

In [7]:
### =============== testing 1 ================= ####

# Parameters
beta1 = 3; # m
beta2 = 1; # b
lapse = 0.1;

params = [beta1, beta2, lapse]

@time LL, lls = Loglike_vec(params,nRs,nLs,chooseR)

### =========================================== #### 

  0.180785 seconds (131.19 k allocations: 9.272 MB)


(48294.022990294405,[0.399784,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95  …  0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.05])

In [8]:
"""
Takes params
    beta1 = params[1]; 
    beta2 = params[2]; lapse = params[3];

Returns the log likelihood 
"""
function Loglike(params::Vector, nR, nL, wr)    
    beta1 = params[1]; beta2 = params[2]; lapse = params[3];

    ntrials = length(nR)
    
    sigma_R = beta1*nR + beta2
    sigma_L = beta1*nL + beta2
    
    pright_pre = 1 - cdf(Normal(nR-nL,sqrt(sigma_R^2+sigma_L^2)),0)
    pright = (1-lapse/2)*pright_pre + (lapse/2)*(1-pright_pre)

    if wr > 0
        return log(pright)
    else
        return log(1-pright)
    end
end

Loglike (generic function with 1 method)

In [9]:
"""
function single_trial(params::Vector, nR, nL, wr)   

Takes params
    beta1 = params[1]; 
    beta2 = params[2]; lapse = params[3];

nR   number of right flashes for each trial
nL   number of left flashes for each trial
wr   rat choice (1=right, 0=left) for each trial

Returns the log-likelihood / gradient / hessian matrix for given parameter
"""
function single_trial(params::Vector, nR, nL, wr, hess_mode=0::Int)
    function llikey(params::Vector)
        Loglike(params, nR, nL, wr)
    end
    
    if hess_mode > 0
        result =  DiffBase.HessianResult(params) 
        ForwardDiff.hessian!(result, llikey, params);
    else
        result =  DiffBase.GradientResult(params)
        ForwardDiff.gradient!(result, llikey, params);
    end

    LL     = DiffBase.value(result)
    LLgrad = DiffBase.gradient(result)
    
    if hess_mode > 0
        LLhessian = DiffBase.hessian(result)
    end
   
    if hess_mode > 0
        return LL, LLgrad, LLhessian
    else
        return LL, LLgrad
    end
end


single_trial (generic function with 2 methods)

In [10]:
"""
function SumLikey(params::Vector, nRs, nLs, chooseR)

Takes params
    beta1 = params[1]; 
    beta2 = params[2]; lapse = params[3];

nRs       vector of the number of right flashes for each trial
nLs       vector of the number of left flashes for each trial
chooseR   vector of the rat choice (1=right, 0=left) for each trial

Returns the log-likelihood / gradient / hessian matrix for given parameter
"""
function SumLikey_LL(params::Vector, nRs, nLs, chooseR)
    LL        = 0.
        
    ntrials = length(nRs)

    for i in 1:ntrials
        LLi = Loglike(params,nRs[i],nLs[i],chooseR[i])
        LL        = LL + LLi;
    end

    LL = -LL
    return LL
end

function SumLikey(params::Vector, nRs, nLs, chooseR)
    LL        = 0.
    LLgrad    = zeros(Float64,length(params))
    
    ntrials = length(nRs)

    for i in 1:ntrials
        LLi, LLgradi = single_trial(params,nRs[i],nLs[i],chooseR[i])
        LL        = LL + LLi;
        LLgrad    = LLgrad + LLgradi;
        
    end

    LL = -LL
    LLgrad = -LLgrad
    return LL, LLgrad
end


function SumLikey_hess(params::Vector, nRs, nLs, chooseR)
    LL        = 0.
    LLhess    = zeros(Float64,length(params),length(params))
    
    ntrials = length(nRs)
    for i in 1:ntrials
        LLi, LLgradi, LLhessi = single_trial(params,nRs[i],nLs[i],chooseR[i],1)
        LL        = LL + LLi;
        LLhess    = LLhess + LLhessi;
    end

    LL = -LL
    return LL, LLhess
end

SumLikey_hess (generic function with 1 method)

In [35]:
### =============== testing 2 ================= ####

# Parameters
beta1 = 0.77; # m
beta2 = 0.69; # b
lapse = 0;

params = [beta1, beta2, lapse]

# likelihood
LL = SumLikey_LL(params,nRs,nLs,chooseR)
println(LL)

# gradient 
LL, LLgrad= SumLikey(params,nRs,nLs,chooseR)
println(LLgrad)


18481.850819773303
[-26.601322930053698,-9.33572002821268,3.223046278152269]


(18481.850819773303,
3x3 Array{Float64,2}:
 -7090.53  -2245.25   -8958.05
 -2245.25  -1171.86   -3135.99
 -8958.05  -3135.99  -11800.2 )

In [22]:
function LL_f(params::Vector)
    return SumLikey_LL(params, nRs, nLs, chooseR)
end

function LL_g!(params::Vector, grads::Vector)
    LL, LLgrad = SumLikey(params, nRs, nLs, chooseR)
    for i=1:length(params)
        grads[i] = LLgrad[i]
    end
end

function LL_fg!(params::Vector, grads)
    LL, LLgrad = SumLikey(params, nRs, nLs, chooseR)
    for i=1:length(params)
        grads[i] = LLgrad[i]
    end
    return LL
end

function LL_h!(params::Vector, hess)
    LL, LLhess = SumLikey_hess(params, nRs, nLs, chooseR)
    for i=1:length(params)
        for j=1:length(params)
            hess[i,j] = LLhess[i,j]
        end
    end
end

d4 = DifferentiableFunction(LL_f,
                            LL_g!,
                            LL_fg!)

d = TwiceDifferentiableFunction(LL_f, LL_g!, LL_h!)

Optim.TwiceDifferentiableFunction(LL_f,LL_g!,fg!,LL_h!)

In [26]:
function my_line_search!(df, x, s, x_scratch, gr_scratch, lsr, alpha,
        mayterminate, c1::Real = 1e-4, rhohi::Real = 0.5, rholo::Real = 0.1, iterations::Integer = 1_000)
    initial_alpha = 0.5
    LineSearches.bt2!(df, x, s,x_scratch, gr_scratch, lsr, initial_alpha,
                      mayterminate, c1, rhohi, rholo, iterations)
end

my_line_search! (generic function with 5 methods)

In [40]:
# Parameters
beta1 = 3.37; # m
beta2 = 4.39; # b
lapse = 0.1;

params = [beta1, beta2, lapse]


l = [0.001, 0.001, 0.]
u = [15.,   15.,   1.]

tic()
# restrict the maximum number of iterations for each GradientDescent optimization to 10 
res = optimize(d4, params, l, u, Fminbox(); 
         optimizer = GradientDescent, linesearch = my_line_search!, optimizer_o = Optim.Options(g_tol = 1e-12,
                                                                        iterations = 200,
                                                                        show_every = true,
                                                                        store_trace = true,
                                                                        extended_trace = true,
                                                                        show_trace = true))
println(res)
toc()



Iter     Function value   Gradient norm 
     0     2.104337e+04     1.364900e+03
 * Current step size: 1.0
 * g(x): [248.27986457372566,86.36577972966835,1364.9001293668218]
 * x: [3.37,4.39,0.1]
     1     2.094768e+04     1.377226e+03
 * Current step size: 0.0005000000000000001
 * g(x): [270.39751139383964,93.995797377904,1377.226069488302]
 * x: [3.247543427526096,4.347191737246737,0.05617558715634231]
     2     2.088544e+04     1.403127e+03
 * Current step size: 0.0005000000000000001
 * g(x): [288.89420198199156,100.31843362676696,1403.127414207715]
 * x: [3.1143032096200427,4.300607919913788,0.04145422054002736]
     3     2.082611e+04     1.434677e+03
 * Current step size: 0.0005000000000000001
 * g(x): [308.6108284097965,107.0366329761135,1434.6770042665025]
 * x: [2.9721126790061203,4.250898274832241,0.033192941233426705]
     4     2.076386e+04     1.471108e+03
 * Current step size: 0.0005000000000000001
 * g(x): [330.8131258091477,114.59061279419173,1471.1083991741466]
 * x

848.583807564

In [42]:
res.minimizer


3-element Array{Float64,1}:
 0.773159   
 0.691998   
 2.00779e-10

In [None]:
# hessian
# LL, LLhess= SumLikey_hess(res.minimizer,nRs,nLs,chooseR)
### =========================================== #### 

