In [25]:
# import packages..
import ForwardDiff
using ForwardDiff
import PyPlot
using PyPlot
import Base.convert
import Optim
using Optim

# === Upgrading from ForwardDiff v0.1 to v0.2
# instead of ForwardDiff.GradientNumber and ForwardDiff.HessianNumber, 
# we will use ForwardDiff.Dual

convert(::Type{Float64}, x::ForwardDiff.Dual) = Float64(x.value)
function convert(::Array{Float64}, x::Array{ForwardDiff.Dual}) 
    y = zeros(size(x)); 
    for i in 1:prod(size(x)) 
        y[i] = convert(Float64, x[i]) 
    end
    return y
end


convert (generic function with 656 methods)

To evaluate how well a particular set of parameter values $\theta$ fits the behavioral data, we compute the probability of oberving the data given the model.

For each trial $i$, we will compute the likelihood of seeing the data under the model assuming that trials are independent. 

$P(D|\theta) = \prod_{i}P(d_i|t_{i,R},t_{i,L},\theta)$

$t_{i,R},t_{i,L}$ : the right and left click times on trial $i$

$d_i$ : the subject's decision on trial $i$

The best-fit parameter values are the parameters $\theta$ that maximize the likelihood (Maximum likelihood values)

To help maximize the likelihood(or log likelihood), we will compute the derivative $\partial P(d_i|t_{i,R},t_{i,L},\theta) / \partial\theta$ for each of the parameters in the set $\theta$.

After we get these gradients of 9 model parameters, we will apply them for optimization.

## Import data 

In [2]:
using MAT
ratdata = matread("testdata.mat")
# ratdata = matread("chrono_B069_rawdata.mat")

Dict{ByteString,Any} with 5 entries:
  "ratname"      => "B069"
  "daterange"    => 1x2 Array{Any,2}:…
  "avgdata"      => Dict{ASCIIString,Any}("is_probe"=>1x64537 Array{Bool,2}:…
  "rawdata"      => Dict{ASCIIString,Any}("is_probe"=>1x64537 Array{Any,2}:…
  "total_trials" => 64537.0

In [3]:
function trialdata(ratdata, trial)
    if ratdata["rawdata"]["pokedR"][trial] > 0
        rat_choice = 1;  # "R"
    else
        rat_choice = -1; # "L"
    end;
    
    if typeof(ratdata["rawdata"]["rightbups"][trial]) <: Array
        rvec = vec(ratdata["rawdata"]["rightbups"][trial]);
    else
        rvec = []
    end
    if typeof(ratdata["rawdata"]["leftbups"][trial]) <: Array
        lvec = vec(ratdata["rawdata"]["leftbups"][trial]);
    else
        lvec = []
    end
    
    return rvec, lvec, 
    ratdata["rawdata"]["T"][trial], rat_choice
end

RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata, 1)

([0.0,0.001135000000000004,0.011165000000000001,0.01892,0.10638,0.20342,0.32964499999999997,0.33775999999999995],[0.0,0.19235,0.34360999999999997],0.35095899999999747,-1)

## bin_centers = make_bins(B, dx, binN)

In [4]:
"""
function bin_centers = make_bins(B, dx, binN)

Makes a series of points that will indicate bin centers. The first and
last points will indicate sticky bins. No "bin edges" are made-- the edge
between two bins is always implicity at the halfway point between their
corresponding centers. The center bin is always at x=0; bin spacing
(except for last and first bins) is always dx; and the position
of the first and last bins is chosen so that |B| lies exactly at the
midpoint between 1st (sticky) and 2nd (first real) bins, as well as
exactly at the midpoint between last but one (last real) and last
(sticky) bins.

Playing nice with ForwardDiff means that the *number* of bins must be predetermined.
So this function will not actually set the number of bins; what it'll do is determine their
locations. To accomplish this separation, the function uses as a third parameter binN,
which should be equal to the number of bins with bin centers > 0, as follows: 
   binN = ceil(B/dx)
and then the total number of bins will be 2*binN+1, with the center one always corresponding
to position zero. Use non-differentiable types for B and dx for this to work.
"""
function make_bins(B, dx, binN)
    bins = collect(1.0:binN)*B
    bins = dx*bins/B

    if bins[end] == B
        bins[end] = B + dx
    else
        bins[end] = 2*B - bins[end-1]
    end

    bins = [-bins[end:-1:1]; 0; bins]
    return bins
end;

In [5]:
bins = make_bins(4.1,0.25,ceil(4.1/0.25))
bin_centers = bins
bins

35-element Array{Float64,1}:
 -4.2 
 -4.0 
 -3.75
 -3.5 
 -3.25
 -3.0 
 -2.75
 -2.5 
 -2.25
 -2.0 
 -1.75
 -1.5 
 -1.25
  ⋮   
  1.5 
  1.75
  2.0 
  2.25
  2.5 
  2.75
  3.0 
  3.25
  3.5 
  3.75
  4.0 
  4.2 

## Global Variables

In [6]:
# Global variables 
const epsilon = 10.0^(-10);
const dx = 0.25;
const dt = 0.02;
const total_rate = 40;

## Parameters

a : decision variable, memory accumulator

$$ da =
  \begin{cases}
    0       & \quad \text{if, } |a| \geq B \\
    \sigma_adW + (\delta_{t,t_R} \cdot \eta C(t) - \delta_{t,t_L} \cdot \eta C(t))dt + \lambda adt  & \quad \text{otherwise, }\\
  \end{cases}
$$



The impact of each click (C) is affected by sensory adaptation that depends on clicks from both right and left sides:

$$ 
\frac{\mathrm d C}{\mathrm d t} = \frac{1-C}{\tau_\phi} + (1-\phi)C(\delta_{t,t_R}+\delta_{t,t_L}) 
$$


sigma2_a ($\sigma_a^2$) : a diffusion constant, parameterizing noise in a.

sigma2_s ($\sigma_s^2$) : parameterizing noise when adding evidence from a right or left pulse. (incoming sensory evidence)

sigma2_i ($\sigma_i^2$) : initial condition for the dynamical equation at $t=0$

lam ($\lambda$) : consistent drift in the memory a ($\lambda<0$ : leaky or forgetful case, $\lambda>0$ : unstable or impulsive case)

B : decision bound

bias : bias parameter determines the position of the threshold in a (which a Rightward decision is made)

phi ($\phi$) : parameterize sensory adaptation (by defining the dynamics of C ($\phi>1$ : Facilitation, $\phi<1$ : Depression, $\phi=1$ : absense of sensory adaptation)

tau_phi ($\tau_\phi$) :

lapse : The lapse rate parameterizes the probability of making a random response.


In [7]:
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
sigma_a_sbin = sigma_a  # remember we need this copy for Fmatrix
lam = -0.0005; B = 4.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]   

sigma = params[1];
lam   = params[2];
c     = params[3];

## F = Fmatrix([sigma, lambda, c], bin_centers)

In [8]:
"""
function F = Fmatrix([sigma, lambda, c], bin_centers)

Uses globals
    dt
    dx
    epsilon       (=10.0^-10)

Returns a square Markov matrix of transition probabilities. 
Plays nice with ForwardDiff-- that is why bin_centers is a global vector (so that the rem
operations that go into defining the bins, which ForwardDiff doesn't know how to deal with,
stay outside of this differentiable function)

sigma  should be in (accumulator units) per (second^(1/2))
lambda should be in s^-1
c      should be in accumulator units per second
bin_centers should be a vector of the centers of all the bins. Edges will be at midpoints
       between the centers, and the first and last bin will be sticky.

dx is not used inside Fmatrix, because bin_centers specifies all we need to know.
dt *is* used inside Fmatrix, to convert sigma, lambda, and c into timestep units
"""
function Fmatrix(params::Vector, bin_centers)
    sigma2 = params[1];
    lam   = params[2];
    c     = params[3];
    
    sigma2_sbin = convert(Float64, sigma2)
  
    F = zeros(typeof(sigma2),length(bin_centers),length(bin_centers))    
#     F = collect(1.0:length(bin_centers))*collect(1.0:length(bin_centers))';
#     F = 0.0*sigma2*F; # Multiplying by that sigma is needed, 
#                      # for type casting reasons I do not understand...

    # added condition if lambda=0 
    if lam == 0
        mus = bin_centers*exp(lam*dt)
    else
        mus = (bin_centers + c/lam)*exp(lam*dt) - c/lam
    end

    n_sbins = max(70, ceil(10*sqrt(sigma2_sbin)/dx))
    
    swidth = 5*sqrt(sigma2_sbin)
    sbinsize = swidth/n_sbins;#sbins[2] - sbins[1]
    sbins    = collect(-swidth:sbinsize:swidth)

    ps       = exp(-sbins.^2/(2*sigma2))#exp(-sbins.^2/(2*sigma^2)) / sqrt(2*sigma^2)
    ps       = ps/sum(ps);

    base_sbins = sbins;
        
    for j in 2:length(bin_centers)
        sbins = collect(0:(length(base_sbins)-1))*sbinsize
        sbins = sbins + mus[j]-swidth

        for k in 1:length(sbins)
            if sbins[k] < bin_centers[1] #(bin_centers[1] + bin_centers[2])/2
                F[1,j] = F[1,j] + ps[k]
            elseif bin_centers[end] <= sbins[k]#(bin_centers[end]+bin_centers[end-1])/2 <= sbins[k]
                F[end,j] = F[end,j] + ps[k]
            else # more condition
                if (sbins[k] > bin_centers[1] && sbins[k] < bin_centers[2])
                    lp = 1; hp = 2;
                elseif (sbins[k] > bin_centers[end-1] && sbins[k] < bin_centers[end])
                    lp = length(bin_centers)-1; hp = length(bin_centers);
                else 
                    lp = floor(Int,((sbins[k]-bin_centers[2])/dx) + 2)#find(bin_centers .<= sbins[k])[end]#Int(floor((sbins[k]-bin_centers[2])/dx) + 1);
                    hp = lp+1#Int(ceil((sbins[k]-bin_centers[2])/dx) + 1);
                end

                if lp < 1 
                    lp = 1; 
                end
                if hp < 1 
                    hp = 1;
                end

                if lp == hp
                    F[lp,j] = F[lp,j] + ps[k]
                else
                    F[hp,j] = F[hp,j] + ps[k]*(sbins[k] - bin_centers[lp])/(bin_centers[hp] - bin_centers[lp])
                    F[lp,j] = F[lp,j] + ps[k]*(bin_centers[hp] - sbins[k])/(bin_centers[hp] - bin_centers[lp])
                end                   
            end
        end
    end
    F[:,1] = 0; F[:,end] = 0; F[1,1] = 1; F[end,end] = 1;
    return F
end

Fmatrix (generic function with 1 method)

In [9]:
F = Fmatrix([0.2, 0, 0.0],bin_centers) # Fi
F

35x35 Array{Float64,2}:
 1.0  0.41182      0.218907     0.0917964    …  0.0          0.0          0.0
 0.0  0.197142     0.172131     0.112014        0.0          0.0          0.0
 0.0  0.187228     0.217923     0.187228        0.0          0.0          0.0
 0.0  0.119939     0.187228     0.217923        0.0          0.0          0.0
 0.0  0.0571377    0.119939     0.187228        0.0          0.0          0.0
 0.0  0.0202246    0.0571377    0.119939     …  0.0          0.0          0.0
 0.0  0.00531176   0.0202246    0.0571377       0.0          0.0          0.0
 0.0  0.00103206   0.00531176   0.0202246       0.0          0.0          0.0
 0.0  0.000148294  0.00103206   0.00531176      0.0          0.0          0.0
 0.0  1.59548e-5   0.000148294  0.00103206      0.0          0.0          0.0
 0.0  1.11105e-6   1.59548e-5   0.000148294  …  0.0          0.0          0.0
 0.0  0.0          1.11105e-6   1.59548e-5      0.0          0.0          0.0
 0.0  0.0          0.0          1.11105e

In [10]:
F = Fmatrix(params,bin_centers)
F

35x35 Array{Float64,2}:
 1.0  0.461125    0.365007    0.274946    …  0.0         0.0         0.0
 0.0  0.0908452   0.0859592   0.0811359      0.0         0.0         0.0
 0.0  0.0945948   0.100835    0.0946575      0.0         0.0         0.0
 0.0  0.0888839   0.0945533   0.100892       0.0         0.0         0.0
 0.0  0.0737024   0.0889555   0.0945117      0.0         0.0         0.0
 0.0  0.0612142   0.0736883   0.0890271   …  0.0         0.0         0.0
 0.0  0.044833    0.0612781   0.0736743      0.0         0.0         0.0
 0.0  0.0329193   0.0448354   0.0613421      0.0         0.0         0.0
 0.0  0.0212918   0.0329616   0.0448377      0.0         0.0         0.0
 0.0  0.0138235   0.021298    0.0330038      0.0         0.0         0.0
 0.0  0.0078944   0.0138445   0.0213041   …  0.0         0.0         0.0
 0.0  0.00453257  0.0078985   0.0138655      0.0         0.0         0.0
 0.0  0.00228512  0.00454054  0.00790261     3.13941e-7  0.0         0.0
 ⋮                         

## logProbRight 
### (params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int)

* params = [sigma_a, sigma_s, sigma_i, lambda, B, bias, phi, tau_phi, lapse]
* RightClickTimes vector with elements indicating times of right clicks
* LeftClickTimes vector with elements indicating times of left clicks
* Nsteps number of timesteps to simulate 

a (column vector representing distribution of values of accumulator a)

a_trace (length(bin_centers)-by-Nsteps+1), a trace of the distribution of a as 
    a function of time
    
c_trace (row vector Nsteps+1 long, effective value of c as 
    a function of time after adaptation)


In [11]:
"""
version with inter-click interval(ici) for c_eff_net / c_eff_tot (followed the matlab code) 
(which was using dt for c_eff)

function logProbRight(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int)

    Nsteps            number of timesteps to simulate
    RightClickTimes   vector with elements indicating times of right clicks
    LeftClickTimes    vector with elements indicating times of left clicks

    a      (column vector representing distribution of values of accumulator a)

    a_trace (length(bin_centers)-by-Nsteps+1), a trace of the distribution of a as 
            a function of time
    c_trace (row vector Nsteps+1 long, effective value of c as 
            a function of time after adaptation)

Takes params
    sigma_a = params[1]; sigma_s = params[2]; sigma_i = params[3]; 
    lambda = params[4]; B = params[5]; bias = params[6]; 
    phi = params[7]; tau_phi = params[8]; lapse = params[9]

Returns the log of the probability that the agent chose Right. 
"""

function logProbRight(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int)
    sigma_a = params[1]; sigma_s = params[2]; sigma_i = params[3]; 
    lambda = params[4]; B = params[5]; bias = params[6]; 
    phi = params[7]; tau_phi = params[8]; lapse = params[9]
    
    LeftClicks  = zeros(Int, 1, Nsteps); if isempty(RightClickTimes) RightClickTimes = zeros(0) end;
    RightClicks = zeros(Int, 1, Nsteps); if isempty(LeftClickTimes ) LeftClickTimes  = zeros(0) end;
    for i in ceil((LeftClickTimes+epsilon)/dt)  LeftClicks[Int(i)]  = LeftClicks[Int(i)] + 1 end
    for i in ceil((RightClickTimes+epsilon)/dt) RightClicks[Int(i)] = RightClicks[Int(i)] + 1 end
    
    # === Upgrading from ForwardDiff v0.1 to v0.2
    # instead of using convert we can use floor(Int, ForwardDiff.Dual) and
    # ceil(Int, ForwardDiff.Dual)

#     my_B = convert(Float64, B) # my_B won't be differentiated; ForwardDiff can't do ceil()
#     my_bias = convert(Float64, bias)  # my_bias won't be differentiated' FD can't do floor()
    binN = ceil(Int, B/dx)#Int(ceil(my_B/dx))  
    binBias = floor(Int, bias/dx) + binN+1  
    bin_centers = make_bins(B, dx, binN) 

#     a_trace = zeros(length(bin_centers), Nsteps+1); 
#     c_trace = zeros(1, Nsteps+1)
    
    a0 = zeros(length(bin_centers),1)*sigma_a*0.0; # That weirdo inexact error thing
    a0[binN+1] = 1-lapse; a0[1] = lapse/2; a0[end] = lapse/2;
    
    c_eff_r = 0
    c_eff_l = 0
    cnt_r = 0
    cnt_l = 0
    
    Fi = Fmatrix([sigma_i, 0, 0.0], bin_centers); 
    a = Fi*a0;
#     a_trace[:,1] = a;

    F0 = Fmatrix([sigma_a*dt, lambda, 0.0], bin_centers)
    for i in 2:Nsteps 
        c_eff_tot = 0
        c_eff_net = 0
        if (RightClicks[i-1]==0) & (LeftClicks[i-1]==0)
            c_eff_tot = 0
            c_eff_net = 0

            a = F0*a
        else
            for j in 1:RightClicks[i-1]
                if cnt_r != 0 || j != 1
                    ici = RightClickTimes[cnt_r+j]-RightClickTimes[cnt_r+j-1]                    
                    c_eff_r = 1 + (c_eff_r*phi - 1)*exp(-ici/tau_phi)
                    c_eff_tot = c_eff_tot + c_eff_r
                    c_eff_net = c_eff_net + c_eff_r
                end
                if j == RightClicks[i-1]
                    cnt_r = cnt_r+j
                end
            end
            for j in 1:LeftClicks[i-1]
                if cnt_l != 0 || j != 1
                    ici = LeftClickTimes[cnt_l+j]-LeftClickTimes[cnt_l+j-1]
                    c_eff_l = 1 + (c_eff_l*phi - 1)*exp(-ici/tau_phi)
                    c_eff_tot = c_eff_tot + c_eff_l
                    c_eff_net = c_eff_net - c_eff_l
                end
                if j == LeftClicks[i-1]
                    cnt_l = cnt_l+j
                end
            end
            net_sigma = sigma_a*dt + (sigma_s*c_eff_tot)/total_rate
            F = Fmatrix([net_sigma, lambda, c_eff_net/dt], bin_centers)
            a = F*a
        end
        
#         c_trace[i]   = convert(Float64, c_eff_tot)
#         a_trace[:,i] = convert(Array{Float64}, a)
    end;
#     plot(1:Nsteps+1,c_trace[:])    
#     imshow(a_trace, interpolation="none")
    pright = sum(a[binBias+2:end]) + 
    a[binBias]*((bin_centers[binBias+1] - bias)/dx/2) +
    a[binBias+1]*(0.5 + (bin_centers[binBias+1] - bias)/dx/2)
    
    return log(pright)
end

function logLike(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int, rat_choice::Int)
    if rat_choice > 0
        # println("Right")
        return logProbRight(params, RightClickTimes, LeftClickTimes, Nsteps)
    elseif rat_choice < 0
        # println("Left")
        return log(1 - exp(logProbRight(params, RightClickTimes, LeftClickTimes, Nsteps)))
    else
        error("Rat did what?? It was neither R nor L")
    end
end

logLike (generic function with 1 method)

## single_trial
### (params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int, rat_choice::Int)

In [12]:
""" 
function (LL, LLgrad) = 
    single_trial(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int, rat_choice::Int)

Computes the log likelihood according to Bing's model, and returns log likelihood, gradient

params is a vector whose elements, in order, are
    sigma_a    square root of accumulator variance per unit time sqrt(click units^2 per second)
    sigma_s    standard deviation introduced with each click (will get scaled by click adaptation)
    sigma_i    square root of initial accumulator variance sqrt(click units^2)
    lambda     1/accumulator time constant (sec^-1). Positive means unstable, neg means stable
    B          sticky bound height (click units)
    bias       where the decision boundary lies (click units)
    phi        click adaptation/facilitation multiplication parameter
    tau_phi    time constant for recovery from click adaptation (sec)
    lapse      2*lapse fraction of trials are decided randomly

rat_choice     should be either "R" or "L"


RETURNS:


"""
# function llikey(params::Vector; maxT=1, RightPulseTimes=[], LeftPulseTimes=[], dx=0.25, dt=0.02)

#     global RightClickTimes, LeftClickTimes, Nsteps    
#     LLhessian, allresults = ForwardDiff.hessian(logLike, params, ForwardDiff.AllResults)
#     LL     = ForwardDiff.value(allresults)
#     LLgrad = ForwardDiff.gradient(allresults)
#     LL = logLike(params)
    
#     return LL,LLgrad, LLhessian, length(params) # 
# end

# === Upgrading from ForwardDiff v0.1 to v0.2
# for Retrieving Lower-Order Results
#     # old way
#     answer, results = ForwardDiff.hessian(f, x, AllResults)
#     v = ForwardDiff.value(results)
#     g = ForwardDiff.gradient(results)
#     h = ForwardDiff.hessian(results) # == answer

#     # new way
#     out = HessianResult(x)
#     ForwardDiff.hessian!(out, f, x)
#     v = ForwardDiff.value(out)
#     g = ForwardDiff.gradient(out)
#     h = ForwardDiff.hessian(out)

# function llikey(params::Vector)
    
#     result =  HessianResult(params)
    
#     ForwardDiff.hessian!(result, logLike, params);
    
#     LL     = ForwardDiff.value(result)
#     LLgrad = ForwardDiff.gradient(result)
#     LLhessian = ForwardDiff.hessian(result)
    
#     return LL, LLgrad, LLhessian, length(params)
# end

function single_trial(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int, rat_choice::Int)
    function llikey(params::Vector)
        logLike(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
    end

    result =  GradientResult(params)
    
    ForwardDiff.gradient!(result, llikey, params);
    
    LL     = ForwardDiff.value(result)
    LLgrad = ForwardDiff.gradient(result)
   
    return LL, LLgrad
end

single_trial (generic function with 1 method)

In [26]:
### =============== testing 1 ================= ####

# Parameters
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
sigma_a_sbin = sigma_a  # remember we need this copy for Fmatrix
lam = -0.0005; B = 4.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]   

RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata, 1)

Nsteps = Int(cld(maxT,dt))

@time logLike(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)

### =========================================== #### 

  0.001764 seconds (1.48 k allocations: 1.280 MB)


-2.5352539404380363

In [27]:
### =============== testing 2 ================= ####
@time LL, LLgrad = single_trial(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
println(LL)
println(LLgrad)
### =========================================== ####

  0.005047 seconds (1.65 k allocations: 6.780 MB)
-2.535253940438007
[0.34088848042838044,0.08542855014353426,1.0003691248769693,0.14049989125836537,-2.657734320372509e-13,0.9822273525908097,-1.2967840231752559,10.17539451295827,5.899797264909414]


In [15]:
LL

-2.535253940438007

In [16]:
LLgrad

9-element Array{Float64,1}:
  0.340888   
  0.0854286  
  1.00037    
  0.1405     
 -2.65773e-13
  0.982227   
 -1.29678    
 10.1754     
  5.8998     

In [17]:
#Pkg.update()

# Maximize LL over parameter space
### Optimization with Optim.jl


In [18]:
# Pkg.add("Optim")

# import Optim
# using Optim

In [19]:
function SumLikey_LL(params::Vector, ratdata, ntrials::Int)
    LL        = 0
        
    for i in 1:ntrials
        RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata, i)
        Nsteps = Int(ceil(maxT/dt))

        LLi = logLike(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
        LL        = LL + LLi;
    end
    
    LL = -LL
    return LL
end

function SumLikey(params::Vector, ratdata, ntrials::Int)
    LL        = float(0)
    LLgrad    = zeros(size(params))
    
    for i in 1:ntrials
        if rem(i,1000)==0
            println("     sum_ll_all_trials: running trial ", i, "/", ntrials);
        end

        RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata, i)
        Nsteps = Int(ceil(maxT/dt))

        LLi, LLgradi = single_trial(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
        LL        = LL + LLi;
        LLgrad    = LLgrad + LLgradi;
        
    end

    LL = -LL
    LLgrad = -LLgrad
    return LL, LLgrad
end

SumLikey (generic function with 1 method)

In [28]:
@time SumLikey_LL(params, ratdata, 27) # sum of LL for trial (1-27)

  0.063710 seconds (53.47 k allocations: 47.177 MB, 13.32% gc time)


20.272306105945443

In [21]:
@profile SumLikey_LL(params, ratdata, 27) # sum of LL for trial (1-27)
Profile.print()
Profile.clear_malloc_data() 

40 task.jl; anonymous; line: 447
 40 ...4/IJulia/src/IJulia.jl; eventloop; line: 143
  40 ...rc/execute_request.jl; execute_request_0x535c5df2; line: 183
   40 loading.jl; include_string; line: 282
    1  ...a/lib/julia/sys.dylib; typeinf_ext; (unknown line)
     1 ...a/lib/julia/sys.dylib; typeinf; (unknown line)
      1 ...a/lib/julia/sys.dylib; typeinf_uncached; (unknown line)
       1 ...a/lib/julia/sys.dylib; abstract_eval; (unknown line)
        1 .../lib/julia/sys.dylib; abstract_eval_call; (unknown line)
         1 .../lib/julia/sys.dylib; abstract_call; (unknown line)
          1 ...lib/julia/sys.dylib; abstract_call_gf; (unknown line)
           1 ...lib/julia/sys.dylib; typeinf; (unknown line)
            1 ...lib/julia/sys.dylib; typeinf; (unknown line)
             1 ...ib/julia/sys.dylib; typeinf_uncached; (unknown line)
              1 ...ib/julia/sys.dylib; abstract_eval; (unknown line)
               1 ...b/julia/sys.dylib; abstract_eval_call; (unknown line)
          

In [29]:
@time SumLikey(params, ratdata, 27)

  0.277136 seconds (57.96 k allocations: 249.842 MB, 14.01% gc time)


(20.27230610594542,[-0.885392369843986,-0.24144883173087658,-3.5481816355968476,-0.3207850120167325,-7.862717342247302e-12,-0.24207108230608632,3.7760633551840628,-29.40274364292842,-29.753795098927267])

In [23]:
# Parameters
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
sigma_a_sbin = sigma_a  # remember we need this copy for Fmatrix
lam = -0.0005; B = 4.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]

ntrials = 27

function LL_f(params::Vector)
    return SumLikey_LL(params, ratdata, ntrials)
end

function LL_g!(params::Vector, grads::Vector)
#     LL, LLgrad, LLhess = llikey(params)
    LL, LLgrad = SumLikey(params, ratdata, ntrials)
    for i=1:length(params)
        grads[i] = LLgrad[i]
    end
end

function LL_fg!(params::Vector, grads)
    LL, LLgrad = SumLikey(params, ratdata, ntrials)
    for i=1:length(params)
        grads[i] = LLgrad[i]
    end
    return LL
end

d4 = DifferentiableFunction(LL_f,
                            LL_g!,
                            LL_fg!)

Optim.DifferentiableFunction(LL_f,LL_g!,LL_fg!)

In [None]:
# Parameters
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
sigma_a_sbin = sigma_a  # remember we need this copy for Fmatrix
lam = -0.0005; B = 6.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]

l = [0, 0, 0, -5, 5, -5, 0.01, 0.005, 0]
u = [200, 200, 30, +5, 25, +5, 1.2, 0.7, 1]

res = optimize(d4, params, l, u, Fminbox(); 
         optimizer = GradientDescent, optimizer_o = OptimizationOptions(g_tol = 1e-12,
                                                                        iterations = 200,
                                                                        store_trace = true,
                                                                        show_trace = true))


Iter     Function value   Gradient norm 
     0     2.025301e+01     2.976706e+01
     1     1.652559e+01     1.260444e+00
     2     1.642090e+01     5.862863e-01
     3     1.637982e+01     7.307871e-01
     4     1.635463e+01     3.754367e-01
     5     1.629537e+01     1.003648e+00
     6     1.627173e+01     7.019019e-01
     7     1.624856e+01     5.730056e-01
     8     1.622227e+01     1.129285e+00
     9     1.617938e+01     7.758643e-01
    10     1.614641e+01     8.644536e-01
    11     1.611302e+01     1.040060e+00
    12     1.607326e+01     7.636983e-01
    13     1.602708e+01     1.233374e+00
    14     1.598473e+01     5.175395e-01
    15     1.587222e+01     4.762540e-01
    16     1.586623e+01     4.584652e-01
    17     1.585988e+01     3.678935e-01
    18     1.585338e+01     4.506842e-01
    19     1.584739e+01     2.637441e-01
    20     1.583898e+01     5.578129e-01
    21     1.583071e+01     1.858550e-01
    22     1.582590e+01     3.307266e-01
    23     1.582

In [None]:
function main()
    
    ratname = "B069"
    # data import
    ratdata = matread(*("chrono_",ratname,"_rawdata.mat"))
    println("rawdata of ", ratname, " imported" )

    # number of trials
    ntrials = Int(ratdata["total_trials"])

    # Parameters
    sigma_a = 1.; sigma_s = 0.1; sigma_i = 0.2; 
    sigma_a_sbin = sigma_a  # remember we need this copy for Fmatrix
    lam = -0.0005; B = 6.1; bias = 0.1; 
    phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
    params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]

    l = [0., 0., 0., -5., 5., -5., 0.01, 0.005, 0.]
    u = [200., 200., 30., 5., 25., 5., 1.2, 0.7, 1.]

    # @code_warntype SumLikey(params, ratdata, ntrials)

    function LL_f(params::Vector)
        return SumLikey_LL(params, ratdata, ntrials)
    end

    function LL_g!(params::Vector, grads::Vector)
        LL, LLgrad = SumLikey(params, ratdata, ntrials)
        for i=1:length(params)
            grads[i] = LLgrad[i]
        end
    end

    function LL_fg!(params::Vector, grads)
        LL, LLgrad = SumLikey(params, ratdata, ntrials)
        for i=1:length(params)
            grads[i] = LLgrad[i]
        end
        return LL
    end

    d4 = DifferentiableFunction(LL_f,
                                LL_g!,
                                LL_fg!)

    res = optimize(d4, params, l, u, Fminbox(); 
             optimizer = GradientDescent, optimizer_o = OptimizationOptions(g_tol = 1e-12,
                                                                            iterations = 200,
                                                                            store_trace = true,
                                                                            show_trace = true))

end

In [None]:
main()

In [None]:
y = d4.f(params)

In [None]:
grads = Array(Float64, length(params))
grads
# LL_g!(params, grads)
d4.g!(params, grads)
grads

In [None]:
grads = Array(Float64, length(params))
d4.g!(params, grads)
grads

In [None]:
y = d4.fg!(params, grads)

In [None]:
optimize(d4, params, GradientDescent(),
               OptimizationOptions(g_tol = 1e-12,
                                   iterations = 200,
                                   store_trace = true,
                                   show_trace = true,
                                   extended_trace = true))