# PBupsModel with Algorithmic Differentiation in Julia

In [29]:
# import packages..
import ForwardDiff
using ForwardDiff
using DiffBase
using PyPlot
import Base.convert
import Optim
using Optim

# === Upgrading from ForwardDiff v0.1 to v0.2
# instead of ForwardDiff.GradientNumber and ForwardDiff.HessianNumber, 
# we will use ForwardDiff.Dual

convert(::Type{Float64}, x::ForwardDiff.Dual) = Float64(x.value)
function convert(::Array{Float64}, x::Array{ForwardDiff.Dual}) 
    y = zeros(size(x)); 
    for i in 1:prod(size(x)) 
        y[i] = convert(Float64, x[i]) 
    end
    return y
end


    write(Base.IO, ForwardDiff.Partials) at /Users/msyoon/.julia/v0.4/ForwardDiff/src/partials.jl:57
is ambiguous with: 
    write(Base.Base64.Base64EncodePipe, AbstractArray{UInt8, 1}) at base64.jl:89.
To fix, define 
    write(Base.Base64.Base64EncodePipe, ForwardDiff.Partials{N<:Any, UInt8})
before the new definition.


convert (generic function with 647 methods)

In [20]:
immutable NumericPair{X,Y} <: Number
  x::X
  y::Y
end
Base.isless(a::NumericPair, b::NumericPair) = (a.x<b.x) || (a.x==b.x && a.y<b.y)

isless (generic function with 32 methods)

To evaluate how well a particular set of parameter values $\theta$ fits the behavioral data, we compute the probability of oberving the data given the model.

For each trial $i$, we will compute the likelihood of seeing the data under the model assuming that trials are independent. 

$P(D|\theta) = \prod_{i}P(d_i|t_{i,R},t_{i,L},\theta)$

$t_{i,R},t_{i,L}$ : the right and left click times on trial $i$

$d_i$ : the subject's decision on trial $i$

The best-fit parameter values are the parameters $\theta$ that maximize the likelihood (Maximum likelihood values)

To help maximize the likelihood(or log likelihood), we will compute the derivative $\partial P(d_i|t_{i,R},t_{i,L},\theta) / \partial\theta$ for each of the parameters in the set $\theta$.

After we get these gradients of 9 model parameters, we will apply them for optimization.

## Import data 

In [1]:
using MAT
# ratdata = matread("testdata.mat")
ratdata2 = matread("chrono_B069_rawdata.mat")
# ratdata2 = matread("ai3space_nolsr_rawdata.mat")



Dict{ASCIIString,Any} with 5 entries:
  "ratname"      => "B069"
  "daterange"    => 1x2 Array{Any,2}:…
  "rawdata"      => Dict{ASCIIString,Any}("is_probe"=>1x64537 Array{Any,2}:…
  "avgdata"      => Dict{ASCIIString,Any}("is_probe"=>1x64537 Array{Bool,2}:…
  "total_trials" => 64537.0

In [2]:
ratdata = ratdata2

Dict{ASCIIString,Any} with 5 entries:
  "ratname"      => "B069"
  "daterange"    => 1x2 Array{Any,2}:…
  "rawdata"      => Dict{ASCIIString,Any}("is_probe"=>1x64537 Array{Any,2}:…
  "avgdata"      => Dict{ASCIIString,Any}("is_probe"=>1x64537 Array{Bool,2}:…
  "total_trials" => 64537.0

In [3]:
Float64(ratdata2["rawdata"]["leftbups"][3])



0.0

In [4]:
ratdata["rawdata"]["leftbups"][1]
Nsteps = Int(cld(maxT,dt))


LoadError: UndefVarError: maxT not defined

In [5]:
function trialdata(rawdata, trial::Int)
    if rawdata["pokedR"][trial] > 0
        rat_choice = 1;  # "R"
    else
        rat_choice = -1; # "L"
    end;
    
    if typeof(rawdata["rightbups"][trial]) <: Array
        rvec = vec(rawdata["rightbups"][trial])::Array{Float64,1};
    else
        rvec = Float64[rawdata["rightbups"][trial]] # bug fixed for single pulse
    end
    if typeof(rawdata["leftbups"][trial]) <: Array
        lvec = vec(rawdata["leftbups"][trial])::Array{Float64,1};
    else
        lvec = Float64[rawdata["leftbups"][trial]] # bug fixed for single pulse
    end
    
    return rvec, lvec, 
    rawdata["T"][trial]::Float64, rat_choice
end

RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata["rawdata"], 1)

([0.0,0.001135000000000004,0.011165000000000001,0.01892,0.10638,0.20342,0.32964499999999997,0.33775999999999995],[0.0,0.19235,0.34360999999999997],0.35095899999999747,-1)

In [6]:
@time RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata["rawdata"], 1)

  0.000061 seconds (165 allocations: 42.448 KB)


([0.0,0.001135000000000004,0.011165000000000001,0.01892,0.10638,0.20342,0.32964499999999997,0.33775999999999995],[0.0,0.19235,0.34360999999999997],0.35095899999999747,-1)

In [7]:
RightClickTimes

8-element Array{Float64,1}:
 0.0     
 0.001135
 0.011165
 0.01892 
 0.10638 
 0.20342 
 0.329645
 0.33776 

## bin_centers = make_bins(B, dx, binN)

In [8]:
"""
function bin_centers = make_bins(B, dx, binN)

Makes a series of points that will indicate bin centers. The first and
last points will indicate sticky bins. No "bin edges" are made-- the edge
between two bins is always implicity at the halfway point between their
corresponding centers. The center bin is always at x=0; bin spacing
(except for last and first bins) is always dx; and the position
of the first and last bins is chosen so that |B| lies exactly at the
midpoint between 1st (sticky) and 2nd (first real) bins, as well as
exactly at the midpoint between last but one (last real) and last
(sticky) bins.

Playing nice with ForwardDiff means that the *number* of bins must be predetermined.
So this function will not actually set the number of bins; what it'll do is determine their
locations. To accomplish this separation, the function uses as a third parameter binN,
which should be equal to the number of bins with bin centers > 0, as follows: 
   binN = ceil(B/dx)
and then the total number of bins will be 2*binN+1, with the center one always corresponding
to position zero. Use non-differentiable types for B and dx for this to work.
"""
function make_bins{T}(bins::Vector{T}, B, dx::T, binN)
    cnt = 1
    for i=-binN:binN
        bins[cnt] = i*dx
        cnt = cnt+1
    end
    
    if binN*dx == B
        bins[end] = B + dx
        bins[1] = -B - dx
    else
        bins[end] = 2*B - (binN-1)*dx
        bins[1] = -2*B + (binN-1)*dx
    end
end;

In [9]:
binN = ceil(4.1/0.25)
bins = zeros(typeof(binN), Int(binN*2+1))
@time make_bins(bins,4.1,0.25,binN)
bins
bin_centers = bins
println(binN," ",length(bin_centers))

  0.039068 seconds (57.89 k allocations: 2.331 MB)
17.0 35


In [10]:
binN = ceil(8.1/0.25)
bins = zeros(typeof(binN), Int(binN*2+1))
@time make_bins(bins,8.1,0.25,binN)
bins
bin_centers = bins


  0.000008 seconds (5 allocations: 176 bytes)


67-element Array{Float64,1}:
 -8.2 
 -8.0 
 -7.75
 -7.5 
 -7.25
 -7.0 
 -6.75
 -6.5 
 -6.25
 -6.0 
 -5.75
 -5.5 
 -5.25
  ⋮   
  5.5 
  5.75
  6.0 
  6.25
  6.5 
  6.75
  7.0 
  7.25
  7.5 
  7.75
  8.0 
  8.2 

## Global Variables

In [11]:
# Global variables 
const epsilon = 10.0^(-10);
const dx = 0.25;
const dt = 0.02;
const total_rate = 40; #5

40

## Parameters

a : decision variable, memory accumulator

$$ da =
  \begin{cases}
    0       & \quad \text{if, } |a| \geq B \\
    \sigma_adW + (\delta_{t,t_R} \cdot \eta C(t) - \delta_{t,t_L} \cdot \eta C(t))dt + \lambda adt  & \quad \text{otherwise, }\\
  \end{cases}
$$



The impact of each click (C) is affected by sensory adaptation that depends on clicks from both right and left sides:

$$ 
\frac{\mathrm d C}{\mathrm d t} = \frac{1-C}{\tau_\phi} + (1-\phi)C(\delta_{t,t_R}+\delta_{t,t_L}) 
$$


sigma2_a ($\sigma_a^2$) : a diffusion constant, parameterizing noise in a.

sigma2_s ($\sigma_s^2$) : parameterizing noise when adding evidence from a right or left pulse. (incoming sensory evidence)

sigma2_i ($\sigma_i^2$) : initial condition for the dynamical equation at $t=0$

lam ($\lambda$) : consistent drift in the memory a ($\lambda<0$ : leaky or forgetful case, $\lambda>0$ : unstable or impulsive case)

B : decision bound

bias : bias parameter determines the position of the threshold in a (which a Rightward decision is made)

phi ($\phi$) : parameterize sensory adaptation (by defining the dynamics of C ($\phi>1$ : Facilitation, $\phi<1$ : Depression, $\phi=1$ : absense of sensory adaptation)

tau_phi ($\tau_\phi$) :

lapse : The lapse rate parameterizes the probability of making a random response.


In [12]:
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
lam = -0.0005; B = 4.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]   

sigma = params[1];
lam   = params[2];
c     = params[3];

## F = Fmatrix([sigma, lambda, c], bin_centers)

In [13]:
"""
function F = Fmatrix([sigma, lambda, c], bin_centers)

Uses globals
    dt
    dx
    epsilon       (=10.0^-10)

Returns a square Markov matrix of transition probabilities. 
Plays nice with ForwardDiff-- that is why bin_centers is a global vector (so that the rem
operations that go into defining the bins, which ForwardDiff doesn't know how to deal with,
stay outside of this differentiable function)

sigma  should be in (accumulator units) per (second^(1/2))
lambda should be in s^-1
c      should be in accumulator units per second
bin_centers should be a vector of the centers of all the bins. Edges will be at midpoints
       between the centers, and the first and last bin will be sticky.

dx is not used inside Fmatrix, because bin_centers specifies all we need to know.
dt *is* used inside Fmatrix, to convert sigma, lambda, and c into timestep units
"""
function Fmatrix{T}(F::AbstractArray{T,2},params::Vector, bin_centers)
    sigma2 = params[1];
    lam   = params[2];
    c     = params[3];

    sigma2_sbin = convert(Float64, sigma2)

    if dx > epsilon && sigma2_sbin >= epsilon
        n_sbins = max(70, ceil(10*sqrt(sigma2_sbin)/dx))
    else
        n_sbins = 70
    end
        
    F[1,1] = 1;
    F[end,end] = 1;

    if sigma2_sbin <= 0
        sbin_length = 1;
        base_sbins = 0;
        ps = 0;
    else
        swidth = 5*sqrt(sigma2_sbin)
        sbinsize = swidth/n_sbins;#sbins[2] - sbins[1]
        base_sbins    = collect(-swidth:sbinsize:swidth)

        ps       = exp(-base_sbins.^2/(2*sigma2))
        ps       = ps/sum(ps);

        sbin_length = length(base_sbins)
    end    
    
    binN = length(bin_centers)

    mu = 0.
    for j in 2:binN-1
        if abs(lam) < epsilon 
            mu = bin_centers[j] + c*dt#(exp(lam*dt))
        else
            mu = (bin_centers[j] + c/lam)*exp(lam*dt) - c/lam
        end

        for k in 1:sbin_length
            sbin = mu + base_sbins[k]#(k-1)*sbinsize + mu - swidth

            if sbin <= bin_centers[1] #(bin_centers[1] + bin_centers[2])/2
                F[1,j] = F[1,j] + ps[k]
            elseif bin_centers[end] <= sbin#(bin_centers[end]+bin_centers[end-1])/2 <= sbins[k]
                F[end,j] = F[end,j] + ps[k]
            else # more condition
                if (sbin > bin_centers[1] && sbin < bin_centers[2])
                    lp = 1; hp = 2;
                elseif (sbin > bin_centers[end-1] && sbin < bin_centers[end])
                    lp = binN-1; hp = binN;
                else
                    lp = floor(Int,((sbin-bin_centers[2])/dx)) + 2#find(bin_centers .<= sbins[k])[end]
                    hp = ceil(Int,((sbin-bin_centers[2])/dx)) + 2#lp+1#Int(ceil((sbins[k]-bin_centers[2])/dx) + 1);
                end

                # if lp < 1
                #     lp = 1;
                # end
                # if hp > binN-1
                #     hp = binN-1;
                # end

                if lp == hp
                    F[lp,j] = F[lp,j] + ps[k]
                else
                    F[hp,j] = F[hp,j] + ps[k]*(sbin - bin_centers[lp])/(bin_centers[hp] - bin_centers[lp])
                    F[lp,j] = F[lp,j] + ps[k]*(bin_centers[hp] - sbin)/(bin_centers[hp] - bin_centers[lp])
                end
            end
        end
    end
    # F[:,1] = 0; F[:,end] = 0; F[1,1] = 1; F[end,end] = 1;
end

Fmatrix (generic function with 1 method)

In [14]:
F = zeros(typeof(0.2),length(bin_centers),length(bin_centers))
@time Fmatrix(F,[0.2, 0, 0.0],bin_centers) # Fi
F

  0.366498 seconds (191.80 k allocations: 6.324 MB)


67x67 Array{Float64,2}:
 1.0  0.41182      0.218907     0.0917964    …  0.0          0.0          0.0
 0.0  0.197142     0.172131     0.112014        0.0          0.0          0.0
 0.0  0.187228     0.217923     0.187228        0.0          0.0          0.0
 0.0  0.119939     0.187228     0.217923        0.0          0.0          0.0
 0.0  0.0571377    0.119939     0.187228        0.0          0.0          0.0
 0.0  0.0202246    0.0571377    0.119939     …  0.0          0.0          0.0
 0.0  0.00531176   0.0202246    0.0571377       0.0          0.0          0.0
 0.0  0.00103206   0.00531176   0.0202246       0.0          0.0          0.0
 0.0  0.000148294  0.00103206   0.00531176      0.0          0.0          0.0
 0.0  1.59548e-5   0.000148294  0.00103206      0.0          0.0          0.0
 0.0  1.11105e-6   1.59548e-5   0.000148294  …  0.0          0.0          0.0
 0.0  0.0          1.11105e-6   1.59548e-5      0.0          0.0          0.0
 0.0  0.0          0.0          1.11105e

In [15]:
F = zeros(typeof(params[1]),length(bin_centers),length(bin_centers))
@time Fmatrix(F,params,bin_centers)
F

  0.004142 seconds (78.30 k allocations: 1.202 MB)


67x67 Array{Float64,2}:
 1.0  0.464534    0.368213    0.27785    …  0.0         0.0         0.0
 0.0  0.0901428   0.0863713   0.0807973     0.0         0.0         0.0
 0.0  0.0952597   0.0999224   0.0957109     0.0         0.0         0.0
 0.0  0.0877379   0.0952181   0.0999795     0.0         0.0         0.0
 0.0  0.0739267   0.0878095   0.0951766     0.0         0.0         0.0
 0.0  0.0601908   0.0739127   0.0878812  …  0.0         0.0         0.0
 0.0  0.0447953   0.0602548   0.0738987     0.0         0.0         0.0
 0.0  0.0322435   0.0447976   0.0603187     0.0         0.0         0.0
 0.0  0.0211932   0.0322857   0.0448        0.0         0.0         0.0
 0.0  0.0134871   0.0211994   0.032328      0.0         0.0         0.0
 0.0  0.00782868  0.0135081   0.0212055  …  0.0         0.0         0.0
 0.0  0.00440511  0.00783279  0.0135292     0.0         0.0         0.0
 0.0  0.00225787  0.00441308  0.0078369     0.0         0.0         0.0
 ⋮                                      

## logProbRight 
### (params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int)

* params = [sigma_a, sigma_s, sigma_i, lambda, B, bias, phi, tau_phi, lapse]
* RightClickTimes vector with elements indicating times of right clicks
* LeftClickTimes vector with elements indicating times of left clicks
* Nsteps number of timesteps to simulate 

a (column vector representing distribution of values of accumulator a)

a_trace (length(bin_centers)-by-Nsteps+1), a trace of the distribution of a as 
    a function of time
    
c_trace (row vector Nsteps+1 long, effective value of c as 
    a function of time after adaptation)


In [16]:
"""
version with inter-click interval(ici) for c_eff_net / c_eff_tot (followed the matlab code) 
(which was using dt for c_eff)

function logProbRight(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int)

    Nsteps            number of timesteps to simulate
    RightClickTimes   vector with elements indicating times of right clicks
    LeftClickTimes    vector with elements indicating times of left clicks

    a      (column vector representing distribution of values of accumulator a)

    a_trace (length(bin_centers)-by-Nsteps+1), a trace of the distribution of a as 
            a function of time
    c_trace (row vector Nsteps+1 long, effective value of c as 
            a function of time after adaptation)

Takes params
    sigma_a = params[1]; sigma_s = params[2]; sigma_i = params[3]; 
    lambda = params[4]; B = params[5]; bias = params[6]; 
    phi = params[7]; tau_phi = params[8]; lapse = params[9]

Returns the log of the probability that the agent chose Right. 
"""

function logProbRight(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int)
    sigma_a = params[1]; sigma_s = params[2]; sigma_i = params[3];
    lambda = params[4]; B = params[5]; bias = params[6];
    phi = params[7]; tau_phi = params[8]; lapse = params[9]

    if isempty(RightClickTimes) RightClickTimes = zeros(0) end;
    if isempty(LeftClickTimes ) LeftClickTimes  = zeros(0) end;

    NClicks = zeros(Int, Nsteps);
    Lhere  = zeros(Int, length(LeftClickTimes));
    Rhere = zeros(Int, length(RightClickTimes));

    for i in 1:length(LeftClickTimes)
        Lhere[i] = ceil((LeftClickTimes[i]+epsilon)/dt)
    end
    for i in 1:length(RightClickTimes)
        Rhere[i] = ceil((RightClickTimes[i]+epsilon)/dt)
    end

    for i in Lhere
        NClicks[Int(i)] = NClicks[Int(i)]  + 1
    end
    for i in Rhere
        NClicks[Int(i)] = NClicks[Int(i)]  + 1
    end

    # === Upgrading from ForwardDiff v0.1 to v0.2
    # instead of using convert we can use floor(Int, ForwardDiff.Dual) and
    # ceil(Int, ForwardDiff.Dual)

    binN = ceil(Int, B/dx)#Int(ceil(my_B/dx))
    binBias = floor(Int, bias/dx) + binN+1
    binBias_hp = ceil(Int, bias/dx) + binN+1

    if binBias<1 binBias = 1; end
    if binBias>binN*2+1 binBias = binN*2+1; end

    if binBias_hp<1 binBias_hp = 1; end
    if binBias_hp>binN*2+1 binBias_hp = binN*2+1; end
    
    bin_centers = zeros(typeof(dx), binN*2+1)
    make_bins(bin_centers, B, dx, binN)

    a0 = zeros(typeof(sigma_a),length(bin_centers))
    a0[binN+1] = 1-lapse; a0[1] = lapse/2; a0[end] = lapse/2;

    temp_l = [NumericPair(LeftClickTimes[i],-1) for i=1:length(LeftClickTimes)]
    temp_r = [NumericPair(RightClickTimes[i],1) for i=1:length(RightClickTimes)]
    allbups = sort!([temp_l; temp_r])
    
    if phi == 1
      c_eff = 1.
    else
      c_eff = 0.
    end
    
    cnt = 0

    Fi = zeros(typeof(sigma_i),length(bin_centers),length(bin_centers))
    Fmatrix(Fi,[sigma_i, 0, 0.0], bin_centers)

    a = Fi*a0;

    F0 = zeros(typeof(sigma_a),length(bin_centers),length(bin_centers))
    Fmatrix(F0,[sigma_a*dt, lambda, 0.0], bin_centers)
    for i in 2:Nsteps
        c_eff_tot = 0.
        c_eff_net = 0.
        if NClicks[i-1]==0
            c_eff_tot = 0.
            c_eff_net = 0.
            a = F0*a
        else
            for j in 1:NClicks[i-1]
                if cnt != 0 || j != 1
                    ici = allbups[cnt+j].x - allbups[cnt+j-1].x
                    c_eff = 1 + (c_eff*phi - 1)*exp(-ici/tau_phi)
                    c_eff_tot = c_eff_tot + c_eff
                    c_eff_net = c_eff_net + c_eff*allbups[cnt+j].y
                elseif cnt==0 && j==1
                    ici = 0.
                    c_eff = 1 + (c_eff*phi - 1)*exp(-ici/tau_phi)# bug fixed.   <--- this depends on first 0. 0.
                    # c_eff = 1 #(when there is no first 0.  )
                    c_eff_tot = c_eff_tot + c_eff
                    c_eff_net = c_eff_net + c_eff*allbups[cnt+j].y
                end
                if j == NClicks[i-1]
                    cnt = cnt+j
                end
            end

            net_sigma = sigma_a*dt + (sigma_s*c_eff_tot)/total_rate
            F = zeros(typeof(net_sigma),length(bin_centers),length(bin_centers))
            Fmatrix(F,[net_sigma, lambda, c_eff_net/dt], bin_centers)
            a = F*a
        end
    end

    if binBias == binBias_hp
      pright = sum(a[binBias+1:end])+a[binBias]/2
    else
      pright = sum(a[binBias+2:end]) +
      a[binBias]*((bin_centers[binBias+1] - bias)/dx/2) +
      a[binBias+1]*(0.5 + (bin_centers[binBias+1] - bias)/dx/2)
    end

    if pright-1 < epsilon && pright > 1
        pright = 1
    end
    if pright < epsilon && pright > 0 
        pright = 0
    end

    
    return log(pright)
end


function logLike(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int, rat_choice::Int)
    if rat_choice > 0
        # println("Right")
        return logProbRight(params, RightClickTimes, LeftClickTimes, Nsteps)
    elseif rat_choice < 0
        # println("Left")
        return log(1 - exp(logProbRight(params, RightClickTimes, LeftClickTimes, Nsteps)))
    else
        error("Rat did what?? It was neither R nor L")
    end
end

logLike (generic function with 1 method)

## single_trial
### (params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int, rat_choice::Int)

In [22]:
""" 
function (LL, LLgrad) = 
    single_trial(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int, rat_choice::Int)

Computes the log likelihood according to Bing's model, and returns log likelihood, gradient

params is a vector whose elements, in order, are
    sigma_a    square root of accumulator variance per unit time sqrt(click units^2 per second)
    sigma_s    standard deviation introduced with each click (will get scaled by click adaptation)
    sigma_i    square root of initial accumulator variance sqrt(click units^2)
    lambda     1/accumulator time constant (sec^-1). Positive means unstable, neg means stable
    B          sticky bound height (click units)
    bias       where the decision boundary lies (click units)
    phi        click adaptation/facilitation multiplication parameter
    tau_phi    time constant for recovery from click adaptation (sec)
    lapse      2*lapse fraction of trials are decided randomly

rat_choice     should be either "R" or "L"


RETURNS:


"""
# === Upgrading from ForwardDiff v0.1 -> v0.2 -> v0.3
# for Retrieving Lower-Order Results
#     # old way
#     answer, results = ForwardDiff.hessian(f, x, AllResults)
#     v = ForwardDiff.value(results)
#     g = ForwardDiff.gradient(results)
#     h = ForwardDiff.hessian(results) # == answer

#     # old v0.2 style
#     out = HessianResult(x)
#     ForwardDiff.hessian!(out, f, x)
#     v = ForwardDiff.value(out)
#     g = ForwardDiff.gradient(out)
#     h = ForwardDiff.hessian(out)

#     # current v0.3 style
#     using DiffBase
#     out = DiffBase.HessianResult(x)
#     ForwardDiff.hessian!(out, f, x)
#     v = DiffBase.value(out)
#     g = DiffBase.gradient(out)
#     h = DiffBase.hessian(out)


function single_trial(params::Vector, RightClickTimes::Vector, LeftClickTimes::Vector, Nsteps::Int, rat_choice::Int, hess_mode=0::Int)
    function llikey(params::Vector)
        logLike(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
    end

    if hess_mode > 0
        result =  DiffBase.HessianResult(params) 
        ForwardDiff.hessian!(result, llikey, params);
    else
        result =  DiffBase.GradientResult(params)
        ForwardDiff.gradient!(result, llikey, params);
    end

    LL     = DiffBase.value(result)
    LLgrad = DiffBase.gradient(result)
    
    if hess_mode > 0
        LLhessian = DiffBase.hessian(result)
    end
   
    if hess_mode > 0
        return LL, LLgrad, LLhessian
    else
        return LL, LLgrad
    end
end

single_trial (generic function with 2 methods)

In [23]:
### =============== testing 1 ================= ####

# Parameters
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
lam = -0.5; B = 6.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]   

RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata["rawdata"], 1)
Nsteps = Int(cld(maxT,dt))

@time logLike(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)

### =========================================== #### 

  0.067853 seconds (438.58 k allocations: 7.133 MB)


-2.3370676666063943

In [24]:
Nsteps

18

In [25]:
LeftClickTimes

3-element Array{Float64,1}:
 0.0    
 0.19235
 0.34361

In [33]:
# X = [3.6936, 2.1375, 0.7552, 1.0569, 10.3493, 0.2394, 1.1123, 0.6047, 0.1301];
X = [2.0277     1.1740    1.1142    -0.6808   5.3390    0.7396    1.5260    0.5899    0.1689]
#X = [1.0  0.1  0.2  -0.5  6.1  0.1  0.3  0.1  0.1];

RightClickTimes, LeftClickTimes, maxT, rat_choice = 
trialdata(ratdata["rawdata"], 1)
Nsteps = Int(cld(maxT,dt))

@time logLike(vec(X), RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
params = vec(X)

  0.016094 seconds (369.90 k allocations: 5.812 MB, 19.94% gc time)


9-element Array{Float64,1}:
  2.0277
  1.174 
  1.1142
 -0.6808
  5.339 
  0.7396
  1.526 
  0.5899
  0.1689

In [34]:

# first call for compiling
@time LL, LLgrad = single_trial(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)

  0.020784 seconds (327.99 k allocations: 28.171 MB, 14.83% gc time)


(-2.0692503809913823,[0.10146020263577106,0.031142826708387732,0.23150884407176742,0.20237251853355287,-0.0,0.4736473147030815,-2.726144808160129,1.7123865901824764,3.5608739897947106])

In [28]:
### =============== testing 2 ================= ####
@time LL, LLgrad, LLhess = single_trial(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice, 1)
println(LL)
println(LLgrad)
# println(LLhess)
imshow(log(abs(LLhess)), interpolation="none")
### =========================================== ####

LoadError: UndefVarError: DiffBase not defined

In [None]:
LL

In [None]:
LLgrad[5]==0

In [None]:
LLhess

In [None]:
#Pkg.update()

# Maximize LL over parameter space
### Optimization with Optim.jl


In [None]:
# Pkg.add("Optim")

# import Optim
# using Optim

In [38]:
function SumLikey_LL(params::Vector, ratdata, ntrials::Int)
    LL        = 0.
        
    for i in 1:ntrials
        RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata["rawdata"], i)
        Nsteps = Int(ceil(maxT/dt))

        LLi = logLike(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
        LL        = LL + LLi;
    end
    
    LL = -LL
    return LL
end

function SumLikey(params::Vector, ratdata, ntrials::Int)
    LL        = 0.
    LLgrad    = zeros(Float64,length(params))
    
    for i in 1:ntrials
        if rem(i,1000)==0
            println("     sum_ll_all_trials: running trial ", i, "/", ntrials);
        end

        RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata["rawdata"], i)
        Nsteps = Int(ceil(maxT/dt))

        LLi, LLgradi = single_trial(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
        LL        = LL + LLi;
        LLgrad    = LLgrad + LLgradi;
        
    end

    LL = -LL
    LLgrad = -LLgrad
    return LL, LLgrad
end


function SumLikey_hess(params::Vector, ratdata, ntrials::Int)
    LL        = 0.
    LLhess    = zeros(Float64,length(params),length(params))
    
    for i in 1:ntrials
        RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata["rawdata"], i)
        Nsteps = Int(ceil(maxT/dt))

        LLi, LLgradi, LLhessi = single_trial(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice, 1)
        LL        = LL + LLi;
        LLhess    = LLhess + LLhessi;
    end

    LL = -LL
    return LL, LLhess
end





SumLikey_hess (generic function with 1 method)

In [27]:
function SumLikey_test(params::Vector, ratdata, ntrials::Int)
    LL        = 0.
    LLgrad    = zeros(Float64,length(params))
    LLgrads    = zeros(Float64,ntrials,length(params))
    
    for i in 1:ntrials
        if rem(i,1000)==0
            println("     sum_ll_all_trials: running trial ", i, "/", ntrials);
        end

        RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata["rawdata"], i)
        Nsteps = Int(ceil(maxT/dt))

        LLi, LLgradi = single_trial(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
        LL        = LL + LLi;
        LLgrad    = LLgrad + LLgradi;
        
        LLgrads[i,:] = LLgradi;
        
    end

    LL = -LL
    LLgrad = -LLgrad
    return LL, LLgrad, LLgrads
end



SumLikey_test (generic function with 1 method)

In [28]:
X = [1.0  0.1  0.2  -0.5  6.1  0.1  0.3  0.1  0.1];
# X = [2.0277     1.1740    1.1142    -0.6808   5.3390    0.7396    1.5260    0.5899    0.1689];
LL, LLgrad, LLgrads = SumLikey_test(vec(X), ratdata, 1)

LL

2.3370676666063748

In [None]:
# X = [1.0  0.1  0.2  -0.5  6.1  0.1  0.3  0.1  0.1];
X = [2.0277     1.1740    1.1142    -0.6808   5.3390    0.7396    1.5260    0.5899    0.1689];
LL, LLgrad, LLgrads = SumLikey_test(vec(X), ratdata, 9342)



In [None]:
idx_9p_jtom = [4 1 2 3 5 7 8 6 9]
println(-LLgrad[idx_9p_jtom])

In [None]:
idx_9p_jtom = [4 1 2 3 5 7 8 6 9];
-LLgrad[idx_9p_jtom]

In [29]:
function values_test(params::Vector, ratdata, ntrials::Int)
    LL        = 0.
    LLs       = zeros(Float64,ntrials)
    LLgrad    = zeros(Float64,length(params))
    LLgrads    = zeros(Float64,ntrials,length(params))
    
    for i in 1:ntrials
#         println(i);
        if rem(i,1000)==0
            println("     sum_ll_all_trials: running trial ", i, "/", ntrials);
        end

        RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata["rawdata"], i)
        Nsteps = Int(ceil(maxT/dt))

        LLi, LLgradi = single_trial(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
        LL        = LL + LLi;
        LLgrad    = LLgrad + LLgradi;
        
        LLs[i] = LLi;
        LLgrads[i,:] = LLgradi;
        
    end

    LL = -LL
    LLgrad = -LLgrad
    return LL, LLs, LLgrad, LLgrads
end

idx_9p_jtom = [4 1 2 3 5 7 8 6 9];


In [34]:
X = [1.0  0.1  0.2  -0.5  6.1  0.1  0.3  0.1  0.1];
LL, LLs, LLgrad, LLgrads = values_test(vec(X), ratdata, 3000)


     sum_ll_all_trials: running trial 1000/3000
     sum_ll_all_trials: running trial 2000/3000
     sum_ll_all_trials: running trial 3000/3000


(1775.2428041461042,[-2.33707,-0.0618188,-0.0528979,-2.84852,-0.0699918,-2.26443,-0.0512956,-0.0513216,-1.38283,-0.0577225  …  -0.0516494,-0.0513856,-2.76789,-0.454964,-0.0670277,-0.052299,-0.08573,-0.0803528,-0.061892,-2.8754],[-69.69235815137975,-16.674090441958178,-258.81900222646425,-10.419964376508453,-0.0,-134.8173131793887,312.2165802004942,-2672.3782337810135,-2501.6375128467803],
3000x9 Array{Float64,2}:
  0.375847      0.079608      0.916723     …  10.2667        4.63936 
 -0.0148792    -0.00351455   -0.0363425       -0.482546     -0.520128
 -0.0032281    -0.000749444  -0.0107699       -0.123299     -0.525377
  0.161308      0.0351833     0.809886         6.8455        8.47898 
 -0.0163281    -0.00392919   -0.0932055       -0.69094      -0.515278
  0.341942      0.0706047     1.13845      …   8.33189       4.23645 
 -1.3018e-5    -3.11553e-6   -2.32648e-5      -0.000360273  -0.526314
 -0.000123353  -2.65313e-5   -0.000208954     -0.00338755   -0.526299
  0.139741      0.03149

In [36]:
X_m = [-1.87  1.38    40.6    0.2       7   0.351  0.067   0.25   0.11]
idx_chg = [2,3,4,1,5,8,6,7,9];
LL, LLs, LLgrad, LLgrads = values_test(vec(X_m[idx_chg]), ratdata, 3000)

     sum_ll_all_trials: running trial 1000/3000
     sum_ll_all_trials: running trial 2000/3000
     sum_ll_all_trials: running trial 3000/3000


(1556.1926999579518,[-1.40052,-0.226774,-0.122543,-2.15979,-0.162377,-1.53157,-0.0682467,-0.0868616,-0.99925,-0.171958  …  -0.100019,-0.0807075,-1.80416,-0.441352,-0.128942,-0.148185,-0.158402,-0.234555,-0.132733,-1.87679],[-5.341186765092285,-1.8385211882517973,-11.546263650614423,2.8436261876711515,-0.0,-0.0,97.8430805583469,-1047.0994140787998,-431.3758480572178],
3000x9 Array{Float64,2}:
  0.0288724    0.00865735    0.0405301   …  -0.662526     4.60324    1.15578 
 -0.00855686  -0.00284973   -0.012017        0.0368794   -1.69031   -0.418795
 -0.0062268   -0.00205026   -0.0136494       0.137573    -1.3617    -0.488557
  0.0494535    0.0152037     0.185694       -0.558722     7.96629    3.74681 
 -0.00703888  -0.00244581   -0.03096         0.00304722  -1.09416   -0.462751
  0.0350891    0.0100739     0.0768755   …  -0.116364     1.86913    1.47495 
 -0.00216419  -0.000729293  -0.00186401      0.0300186   -0.324013  -0.522118
 -0.00478068  -0.00133116   -0.00375675      0.0565271   -0

In [31]:
exp(LLs[1:10])

10-element Array{Float64,1}:
 0.0966105
 0.940053 
 0.948477 
 0.0579302
 0.932401 
 0.10389  
 0.949998 
 0.949973 
 0.250868 
 0.943912 

In [None]:
# X = [2.0277     1.1740    1.1142    -0.6808   5.3390    0.7396    1.5260    0.5899    0.1689];
X = [-0.0078    0.4259   16.9251    7.9173   19.3745    0.8666    0.6998    0.4854    0.0475];
idx_chg = [2,3,4,1,5,8,6,7,9];

LL, LLs, LLgrad, LLgrads = values_test(vec(X[idx_chg]), ratdata, 9342)

println("LL : ", LL, ", LL_grad : ", -LLgrad[idx_9p_jtom])


In [41]:
lam_cand = -0.5:0.12:0.5;
X_orig = [1.0  40.0  0.2 -0.5  7  0.1  0.3  0.1  0.1];

GRs_j = zeros(length(lam_cand),9)
for i=1:length(lam_cand)
    X = vec(X_orig);
    X[4] = lam_cand[i]
    
    LL,LLgrad = SumLikey(X, ratdata, 3000);
    GRs_j[i,:] = -LLgrad[idx_9p_jtom];
    println(-LLgrad[idx_9p_jtom])
end

     sum_ll_all_trials: running trial 1000/3000
     sum_ll_all_trials: running trial 2000/3000
     sum_ll_all_trials: running trial 3000/3000
[-0.8697321971645661 3.267542982007349 0.8813023894599701 5.232167132450805 0.0 -52.840647449394105 405.6977821036609 61.010338760419685 174.9258705435296]
     sum_ll_all_trials: running trial 1000/3000
     sum_ll_all_trials: running trial 2000/3000
     sum_ll_all_trials: running trial 3000/3000
[-0.8013559761211032 3.2804566078951525 0.8827473385814747 5.620278464280778 0.0 -52.840159601343466 407.7046807060147 60.587788226345005 175.6943005071867]
     sum_ll_all_trials: running trial 1000/3000
     sum_ll_all_trials: running trial 2000/3000
     sum_ll_all_trials: running trial 3000/3000
[-0.7316044024306101 3.289560039402798 0.883195190508405 6.0143370127031925 0.0 -52.78300097781783 409.4283490735966 60.14903492103904 176.2305325423956]
     sum_ll_all_trials: running trial 1000/3000
     sum_ll_all_trials: running trial 2000/3000
     

In [None]:
sum(-LLgrad[idx_9p_jtom])

In [None]:
LLs[1:20]
# LLs[21:40]

In [54]:
matwrite("GRs_j.mat",Dict([("GRs_j",GRs_j),]))

In [51]:
GRs_j

9x9 Array{Float64,2}:
 -0.869732  3.26754  0.881302  …  -52.8406  405.698  61.0103  174.926
 -0.801356  3.28046  0.882747     -52.8402  407.705  60.5878  175.694
 -0.731604  3.28956  0.883195     -52.783   409.428  60.149   176.231
 -0.661149  3.29489  0.882653     -52.687   410.785  59.694   176.534
 -0.591116  3.29657  0.881132     -52.5066  411.468  59.2237  176.608
 -0.504093  3.29315  0.878516  …  -52.2789  411.76   58.7405  176.424
 -0.429421  3.28582  0.874876     -51.8569  411.006  58.2453  175.999
 -0.354545  3.27463  0.870249     -51.5106  410.74   57.7384  175.335
 -0.282282  3.25937  0.864622     -51.1159  409.682  57.2209  174.43 

In [None]:
@time SumLikey_LL(params, ratdata, 30000) # sum of LL for trial (1-27)

In [None]:
@time SumLikey(params, ratdata, 27)

In [None]:
function Likely_all_trials{T}(LL::AbstractArray{T,1},params::Vector, ratdata, ntrials::Int)     
    for i in 1:ntrials
        RightClickTimes, LeftClickTimes, maxT, rat_choice = trialdata(ratdata["rawdata"], i)
        Nsteps = Int(ceil(maxT/dt))

        LL[i] = logLike(params, RightClickTimes, LeftClickTimes, Nsteps, rat_choice)
    end
end

In [None]:
ntrials = 27
likely_all = zeros(ntrials)
@time Likely_all_trials(likely_all, params, ratdata, ntrials)
likely_all

In [None]:
# Parameters
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
sigma_a_sbin = sigma_a  # remember we need this copy for Fmatrix
lam = -0.0005; B = 6.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]

ntrials = 300

l = [0,   0,    0, -5,  5, -5, 0.01, 0.005, 0]
u = [200, 200, 30, +5, 25, +5, 1.2,  0.7,   1]


In [None]:
function LL_f(params::Vector)
    return SumLikey_LL(params, ratdata, ntrials)
end

function LL_g!(params::Vector, grads::Vector)
#     LL, LLgrad, LLhess = llikey(params)
    LL, LLgrad = SumLikey(params, ratdata, ntrials)
    for i=1:length(params)
        grads[i] = LLgrad[i]
    end
end

function LL_fg!(params::Vector, grads)
    LL, LLgrad = SumLikey(params, ratdata, ntrials)
    for i=1:length(params)
        grads[i] = LLgrad[i]
    end
    return LL
end

function LL_h!(params::Vector, hess)
    LL, LLhess = SumLikey_hess(params, ratdata, ntrials)
    for i=1:length(params)
        for j=1:length(params)
            hess[i,j] = LLhess[i,j]
        end
    end
end

d4 = OnceDifferentiable(LL_f,
                            LL_g!,
                            LL_fg!)

d = TwiceDifferentiableFunction(LL_f, LL_g!, LL_h!)

In [None]:
# Parameters
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
sigma_a_sbin = sigma_a  # remember we need this copy for Fmatrix
lam = -0.0005; B = 6.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]

params = [-0.0078, 0.4259, 16.9251, 7.9173, 19.3745, 0.8666, 0.6998, 0.4854, 0.0475];
params = [-0.6808,    2.0277,    1.1740, 1.1142,    5.3390,    1.5260,    0.5899,    0.7396,    0.1689]
l = [0, 0, 0, -5, 5, -5, 0.01, 0.005, 0]
u = [200, 200, 30, +5, 25, +5, 1.2, 0.7, 1]

# test with max_iteration = 10 
tic()
options = Optim.Options(g_tol = 1e-12, iterations = 10, show_every = true, 
store_trace = true,  extended_trace = true,show_trace = true)
res = optimize(d4, params, GradientDescent(), options)
println(res)
toc()



In [None]:
# Parameters
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
sigma_a_sbin = sigma_a  # remember we need this copy for Fmatrix
lam = -0.0005; B = 6.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]

l = [0, 0, 0, -5, 5, -5, 0.01, 0.005, 0]
u = [200, 200, 30, +5, 25, +5, 1.2, 0.7, 1]

# test with max_iteration = 10 
tic()
res = optimize(d4, params, l, u, Fminbox(); 
         optimizer = ConjugateGradient, optimizer_o = Optim.Options(g_tol = 1e-12,
                                                                        iterations = 10,
                                                                        show_every = true,
                                                                        store_trace = true,
                                                                        extended_trace = true,
                                                                        show_trace = true))
println(res)
toc()


In [None]:
function my_line_search!(df, x, s, x_scratch, gr_scratch, lsr, alpha,
        mayterminate, c1::Real = 1e-4, rhohi::Real = 0.5, rholo::Real = 0.1, iterations::Integer = 1_000)
    initial_alpha = 0.5
    LineSearches.bt2!(df, x, s,x_scratch, gr_scratch, lsr, initial_alpha,
                      mayterminate, c1, rhohi, rholo, iterations)
end



In [None]:
# Parameters
sigma_a = 1; sigma_s = 0.1; sigma_i = 0.2; 
sigma_a_sbin = sigma_a  # remember we need this copy for Fmatrix
lam = -0.0005; B = 6.1; bias = 0.1; 
phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]

l = [0, 0, 0, -5, 5, -5, 0.01, 0.005, 0]
u = [200, 200, 30, +5, 25, +5, 1.2, 0.7, 1]
tic()
res = optimize(d4, params, l, u, Fminbox(), optimizer = GradientDescent, linesearch = my_line_search!, 
                    optimizer_o = Optim.Options(g_tol = 1e-12,
                                                                        iterations = 20,
                                                                        show_every = true,
                                                                        store_trace = true,
                                                                        extended_trace = true,
                                                                        show_trace = true))
println(res)
toc()



In [None]:
res

In [None]:
res.minimizer# res.minimum -> res.minimizer

In [None]:
tt = getfield(res.trace[1],:metadata)
tt["g(x)"]

In [None]:
history = res;
println(fieldnames(history.trace[1]))

Gs = zeros(length(history.trace),length(params))
Xs = zeros(length(history.trace),length(params))
fs = zeros(length(history.trace))

for i=1:length(history.trace)
    tt = getfield(history.trace[i],:metadata)
    fs[i] = getfield(history.trace[i],:value)
    Gs[i,:] = tt["g(x)"]
    Xs[i,:] = tt["x"]
end

Gs
# getfield(history.trace[1],4)
matwrite("res_test.mat", Dict([("f",history.minimum), 
                                    ("x_converged",history.x_converged),
                                    ("f_converged",history.f_converged),
                                    ("g_converged",history.g_converged),
                                    ("x_bf",history.minimizer),
                                    ("grad_trace",Gs),
                                    ("f_trace",fs),
                                    ("x_trace",Xs),
                                    ("myfval", history.minimum)
                                    ]))

In [None]:
res = optimize(DifferentiableFunction(LL_f), params, l, u, autodiff=true, Fminbox(); 
optimizer = GradientDescent, optimizer_o = OptimizationOptions(g_tol = 1e-12,
                                                                        iterations = 10,
                                                                        show_every = true,
                                                                        store_trace = true,
                                                                        extended_trace = true,
                                                                        show_trace = true))



In [None]:
import LineSearches

In [None]:
# function LL_h!(params::Vector, grads::Vector)
# #     LL, LLgrad, LLhess = llikey(params)
#     LL, LLgrad = SumLikey(params, ratdata, ntrials)
#     for i=1:length(params)
#         grads[i] = LLgrad[i]
#     end
# end

results = Optim.optimize(d, params, NewtonTrustRegion(), Optim.Options(g_tol = 1e-12,
                                                                      iterations = 200,
                                                                      show_every = true,
                                                                      store_trace = true,
                                                                      extended_trace = true,
                                                                      show_trace = true))


# algo_mt = Newton(;linesearch = LineSearches.morethuente!)
# results_mt = Optim.optimize(d, params, method=algo_mt)

# options = OptimizationOptions(show_trace = true, iterations = 10)
# Optim.optimize(d4, params, AcceleratedGradientDescent(), options)

# Optim.optimize(DifferentiableFunction(LL_f, LL_g!), params, Newton(), OptimizationOptions(autodiff = true))

# results = Optim.optimize(d4, params, Newton())

In [None]:
res.minimum

In [None]:
getfield(results.trace[end],:value)

In [None]:
# include("AutodiffModule.jl")
# import AutodiffModule
# using MAT
# using Optim

function main()

    server = 0
    if server > 0 
        ratname = readline(STDIN) #<- $echo $ratname | julia t3.jl  #"B069"
        ratname = ratname[1:end-1] 
    else
        ratname = "B069"
    end
    
    # data import
    if server > 0 
        mpath = "/mnt/bucket/people/amyoon/Data/PBupsModel_rawdata/"
    else
        mpath = "./"
    end
    ratdata = matread(*(mpath,"chrono_",ratname,"_rawdata.mat"))

    println("rawdata of ", ratname, " imported" )

    saveto_filename = *("julia_out_",ratname,".mat")

    # number of trials
    ntrials = Int(ratdata["total_trials"])

    # Parameters
    sigma_a = rand()*4.; sigma_s = rand()*4.; sigma_i = rand()*30.; 
    lam = randn(); B = rand()*20.+5.; bias = randn(); 
    phi = rand()*1.19+0.01; tau_phi = 0.695*rand()+0.005; lapse = rand();

    # sigma_a = 1.; sigma_s = 0.1; sigma_i = 0.2; 
    # lam = -0.0005; B = 6.1; bias = 0.1; 
    # phi = 0.3; tau_phi = 0.1; lapse = 0.05*2;
    params = [sigma_a, sigma_s, sigma_i, lam, B, bias, phi, tau_phi, lapse]

    l = [0.,   0.,   0., -5., 5., -5., 0.01, 0.005, 0.]
    u = [200., 200., 30., 5., 25., 5., 1.2,  0.7,   1.]

    # @code_warntype SumLikey(params, ratdata, ntrials)

    function LL_f(params::Vector)
        return AutodiffModule.SumLikey_LL(params, ratdata, ntrials)
    end

    function LL_g!(params::Vector, grads::Vector)
#         LL, LLgrad = AutodiffModule.SumLikey(params, ratdata, ntrials)
        LL, LLgrad = SumLikey(params, ratdata, ntrials)        
        for i=1:length(params)
            grads[i] = LLgrad[i]
        end
    end

    function LL_fg!(params::Vector, grads)
#         LL, LLgrad = AutodiffModule.SumLikey(params, ratdata, ntrials)
        LL, LLgrad = SumLikey(params, ratdata, ntrials)
        for i=1:length(params)
            grads[i] = LLgrad[i]
        end
        return LL
    end

    d4 = DifferentiableFunction(LL_f,
                                LL_g!,
                                LL_fg!)

    tic()
    history = optimize(d4, params, l, u, Fminbox(); 
             optimizer = GradientDescent, optimizer_o = OptimizationOptions(g_tol = 1e-12,
                                                                            x_tol = 1e-10,
                                                                            f_tol = 1e-6,
                                                                            iterations = 200,
                                                                            store_trace = true,
                                                                            ))
    fit_time = toc()
    println(history.minimum)
    println(history)

    ## do a single functional evaluation at best fit parameters and save likely for each trial
    likely_all = zeros(typeof(sigma_i),ntrials)
    x_bf = history.minimum
    Likely_all_trials(likely_all, x_bf, ratdata, ntrials)

    matwrite(saveto_filename, Dict([("ratname",ratname),
                                    ("x_init",params),
                                    ("trials",ntrials),
                                    ("history",history),
                                    ("f",history.f_minimum), 
                                    ("x_converged",history.x_converged),
                                    ("f_converged",history.f_converged),
                                    ("g_converged",history.g_converged),                                    
                                    ("fit_time",fit_time),
                                    ("x_bf",history.minimum),
                                    ("myfval", history.f_minimum),
                                    ("likely",likely_all)
                                    ]))
     # hessian?

end

In [None]:
main()

In [None]:
0:dx:ceil(4.1/0.25)*dx

In [None]:
aa = -floor(4.1/0.25)*dx:dx:floor(4.1/0.25)*dx
collect(aa)

In [None]:
[-aa[end:-1:1]; aa]
[-aa; aa]

In [None]:
Pkg.status()

In [None]:
sqrt(0)

In [None]:
using LineSearches

In [None]:


# d4 = DifferentiableFunction(LL_f,
#                             LL_g!,
#                             LL_fg!)

# d = TwiceDifferentiableFunction(LL_f, LL_g!, LL_h!)


algo_mt = Newton(;linesearch = LineSearches.morethuente!)
results_mt = Optim.optimize(LL_f, LL_g!, LL_h!, params, method=algo_mt)


In [None]:
algo_hz = NewtonTrustRegion(;linesearch = LineSearches.hagerzhang!)
results_hz = Optim.optimize(LL_f, LL_g!, LL_h!, params, method=algo_hz)


In [None]:
algo_grad = GradientDescent(; linesearch = LineSearches.morethuente!,
                  P = nothing,
                  precondprep = (P, x) -> nothing)
results_gd = Optim.optimize(LL_f, LL_g!, LL_h!, params, method=algo_grad)


In [None]:
l = [0, 0, 0, -5, 5, -5, 0.01, 0.005, 0]
u = [200, 200, 30, +5, 25, +5, 1.2, 0.7, 1]

# test with max_iteration = 10 
res = optimize(d4, params, l, u, Fminbox(); 
         optimizer = algo_grad, optimizer_o = Optim.Options(g_tol = 1e-12,
                                                                        iterations = 10,
                                                                        show_every = true,
                                                                        store_trace = true,
                                                                        extended_trace = true,
                                                                        show_trace = true))



In [None]:
algo_lbfgs = LBFGS(;m = 10,
        linesearch = LineSearches.hagerzhang!,
                  P = nothing,
                  precondprep = (P, x) -> nothing)
results_gd = Optim.optimize(LL_f, LL_g!, LL_h!, params, method=algo_lbfgs)



In [None]:

# results_gd = Optim.optimize(LL_f, LL_g!, LL_h!, params, method=algo_grad)
res = optimize(d4, params, l, u, Fminbox(); 
optimizer = GradientDescent, show_trace = true, mu0 = 0.2, optimizer_o = Optim.Options(g_tol = 1e-12,
                                                                        iterations = 10,
                                                                        show_every = true,
                                                                        store_trace = true,
                                                                        extended_trace = true,
                                                                        show_trace = true))





In [None]:
# reduced model.


In [None]:
names = ["a","bd"]

In [None]:
names[1]

In [None]:
"a","b"

In [None]:
randperm(10)

In [None]:
ratdata["rawdata"]["leftbups"][randperm(10)]

In [None]:
ntrials = 10 

In [None]:
ratdata["rawdata"]

In [None]:
bid = randperm(ntrials);
ratdata["rawdata"]["leftbups"] = ratdata["rawdata"]["leftbups"][bid];
ratdata["rawdata"]["rightbups"] = ratdata["rawdata"]["rightbups"][bid];
ratdata["rawdata"]["T"] = ratdata["rawdata"]["T"][bid];
ratdata["rawdata"]["pokedR"] = ratdata["rawdata"]["pokedR"][bid];


In [None]:
ratdata["rawdata"]["rightbups"][3]

In [3]:
using MAT

matwrite("test.mat",Dict(
        "var1" => 0,
        "var2" => 1
))

In [4]:
matread("test.mat")

Dict{ByteString,Any} with 2 entries:
  "myvar1" => 0
  "myvar2" => 1