# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Preparations" data-toc-modified-id="Preparations-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Preparations</a></div><div class="lev1 toc-item"><a href="#Checking-derivatives" data-toc-modified-id="Checking-derivatives-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Checking derivatives</a></div><div class="lev1 toc-item"><a href="#Optimization" data-toc-modified-id="Optimization-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Optimization</a></div>

# Preparations

In [1]:
import PyPlot; plt=PyPlot
using LaTeXStrings
plt.rc("figure", dpi=300.0)
# plt.rc("figure", figsize=(6,4))
plt.rc("figure", autolayout=true)
plt.rc("savefig", dpi=300.0)
plt.rc("text", usetex=true)
plt.rc("font", family="serif")
plt.rc("font", serif="Palatino")
using TimeSeries
using DataFrames
using GaussianProcesses
using GaussianProcesses: num_params, set_params!, get_params, grad_stack, grad_stack!, update_mll!, update_mll_and_dmll!
using Optim
;

In [2]:
station_data = readtable("data2015/725450.14990.processed.2015.2015.csv", 
    header=false,
    names=[:year, :month, :day, :hour, :min, :seconds, :temp])
station_data = station_data[!isnan(station_data[:temp]),:]    
station_ts = DateTime[DateTime(r[:year], r[:month], r[:day], r[:hour], r[:min], r[:seconds]) for r in eachrow(station_data)]
station_ts[1:10]

10-element Array{DateTime,1}:
 2015-01-01T00:52:00
 2015-01-01T01:52:00
 2015-01-01T02:52:00
 2015-01-01T03:52:00
 2015-01-01T04:52:00
 2015-01-01T05:52:00
 2015-01-01T06:52:00
 2015-01-01T07:52:00
 2015-01-01T08:52:00
 2015-01-01T09:52:00

In [3]:
k0 = Noise(0.0)
k1 = FixedPeriodic(log(0.5), log(√(10.0)), log(24.0))
k2 = RQIso(log(0.1*24), log(√10.0), 3.0) # short term
k3 = RQIso(log(3.0*24), log(√20.0), 3.0)
k4 = RQIso(log(10.0*24), log(√20.0), 3.0)
k5 = SE(log(200.0*24), log(√80.0))
_k=k0+k1+k2+k3+k4+k5
_k_nonoise=k1+k2+k3+k4+k5
_logNoise=k0.lσ
hyp=[-0.359207,0.36883,1.45171,2.0342,1.17682,2.88579,4.28659,1.49533,-0.16544,6.81425,1.3045,3.49925,7.34764,2.16028]
GaussianProcesses.set_params!(_k, hyp)

In [4]:
ms_per_hour = 1e3*3600
ts_vec = convert(Vector{Float64}, station_ts.-station_ts[1]) / ms_per_hour
ts_arr = reshape(ts_vec, (1,length(ts_vec)));

In [5]:
mean_func = MeanConst(mean(station_data[:temp]))
temp_vec = convert(Vector{Float64}, station_data[:temp])
;

In [6]:
gp_small = GP(ts_arr[1:100], temp_vec[1:100], mean_func, _k_nonoise, _logNoise);

In [7]:
size(temp_vec)

(11118,)

# Checking derivatives

In [8]:
using Calculus

In [25]:
function get_optim_target(gp::GP; noise::Bool=true, mean::Bool=true, kern::Bool=true)
    Kgrad_buffer = Array(Float64, gp.nobsv, gp.nobsv)
    function mll(hyp::Vector{Float64})
        try
            set_params!(gp, hyp; noise=noise, mean=mean, kern=kern)
            GaussianProcesses.update_mll!!(gp)
            return -gp.mLL
        catch err
             if !all(isfinite(hyp))
                println(err)
                return Inf
            elseif isa(err, ArgumentError)
                println(err)
                return Inf
            elseif isa(err, Base.LinAlg.PosDefException)
                println(err)
                return Inf
            else
                throw(err)
            end
        end        
    end

    function mll_and_dmll!(hyp::Vector{Float64}, grad::Vector{Float64})
        try
            set_params!(gp, hyp; noise=noise, mean=mean, kern=kern)
            update_mll_and_dmll!(gp, Kgrad_buffer; noise=noise, mean=mean, kern=kern)
            grad[:] = -gp.dmLL
            return -gp.mLL
        catch err
             if !all(isfinite(hyp))
                println(err)
                return Inf
            elseif isa(err, ArgumentError)
                println(err)
                return Inf
            elseif isa(err, Base.LinAlg.PosDefException)
                println(err)
                return Inf
            else
                throw(err)
            end
        end 
    end
    function dmll!(hyp::Vector{Float64}, grad::Vector{Float64})
        mll_and_dmll!(hyp::Vector{Float64}, grad::Vector{Float64})
    end

    func = DifferentiableFunction(mll, dmll!, mll_and_dmll!)
    return func
end



get_optim_target (generic function with 1 method)

In [26]:
function check_derivs(func::Optim.DifferentiableFunction, x::Vector{Float64}; kwargs...)
    g2 = Calculus.gradient(func.f)
    analytical_g = copy(x)
    f0 = func.fg!(x, analytical_g)
    numerical_g = g2(x)
    func.f(x)
    @assert isapprox(analytical_g, numerical_g, kwargs...)
    return analytical_g, numerical_g
end



check_derivs (generic function with 1 method)

In [27]:
anal, numer = check_derivs(get_optim_target(gp_small), get_params(gp_small))

collect(zip(anal, numer))

15-element Array{Tuple{Float64,Float64},1}:
 (41.4884,41.4884)        
 (0.118446,0.118446)      
 (-1.7043,-1.7043)        
 (0.888009,0.888009)      
 (29.1435,29.1435)        
 (-6.28409,-6.28409)      
 (0.895152,0.895152)      
 (0.0175197,0.0175197)    
 (-0.154911,-0.154911)    
 (-0.0078769,-0.00787681) 
 (0.000130137,0.000130212)
 (-0.0779734,-0.0779735)  
 (1.85672e-8,8.24904e-8)  
 (0.000244759,0.000244751)
 (-0.431315,-0.431315)    

In [28]:
get_optim_target(gp_small).fg!(get_params(gp_small), copy(get_params(gp_small)))

156.1577338187417

In [29]:
gp_small.dmLL

15-element Array{Float64,1}:
 -41.4884     
  -0.118446   
   1.7043     
  -0.888009   
 -29.1435     
   6.28409    
  -0.895152   
  -0.0175197  
   0.154911   
   0.0078769  
  -0.000130137
   0.0779734  
  -1.85672e-8 
  -0.000244759
   0.431315   

In [30]:
[[:mean, :kern]; GaussianProcesses.get_param_names(gp_small.k)]

15-element Array{Symbol,1}:
 :mean  
 :kern  
 :sk1_ll
 :sk1_lσ
 :sk2_ll
 :sk2_lσ
 :sk2_lα
 :sk3_ll
 :sk3_lσ
 :sk3_lα
 :sk4_ll
 :sk4_lσ
 :sk4_lα
 :sk5_ll
 :sk5_lσ

# Optimization

In [31]:
function GaussianProcesses.optimize!(gp::GP; noise::Bool=true, mean::Bool=true, kern::Bool=true,
                    method=ConjugateGradient(), kwargs...)
    func = get_optim_target(gp, noise=noise, mean=mean, kern=kern)
    init = get_params(gp;  noise=noise, mean=mean, kern=kern)  # Initial hyperparameter values
    results=optimize(func,init; method=method, kwargs...)                     # Run optimizer
    return results
end



In [33]:
gp_small.mLL

-156.1577338187417

In [38]:
@time opt_out=optimize!(gp_small, iterations=100);

New linesearch
bracketing: ia = 1, ib = 2, c = 0.2, phic = 416788.18233096955, dphic = 1.1801992405881917e7
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.2, phi(a) = 265.8839973078502, phi(b) = 416788.18233096955
secant2: a = 0.0, b = 0.2, c = 0.000599223407235137
update: ia = 1, a = 0.0, ib = 2, b = 0.2, c = 0.000599223407235137, phic = 245.2242574640229, dphic = -32736.04420110632
secant2: iA = 3, iB = 2, ic = 3
secant2: second c = 0.007783678095908519
secant2: second c satisfied Wolfe conditions
New linesearch
Wolfe condition satisfied on point alpha = 0.003542846922698607
New linesearch
Wolfe condition satisfied on point alpha = 0.0036247632434167234
New linesearch
Wolfe condition satisfied on point alpha = 0.038458830512263634
New linesearch
Wolfe condition satisfied on point alpha = 0.007381116096415378
New linesearch
Wolfe condition satisfied on point alpha = 0.008711700355763661
New linesearch
Wolfe condition satisfied on point alpha = 0.009444380595889866
New linesearch
Wolfe con

In [39]:
opt_out

Results of Optimization Algorithm
 * Algorithm: Conjugate Gradient
 * Starting Point: [-0.9499134263443492,-6.924741767283399, ...]
 * Minimizer: [-0.9466255395379776,-4.166389238658941, ...]
 * Minimum: 1.082305e+02
 * Iterations: 100
 * Convergence: false
   * |x - x'| < 1.0e-32: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-32: false
   * |g(x)| < 1.0e-08: false
   * Reached Maximum Number of Iterations: true
 * Objective Function Calls: 306
 * Gradient Calls: 206

In [40]:
begin;
    @time gp = GP(ts_arr[1:1000], temp_vec[1:1000], mean_func, _k_nonoise, _logNoise)
    @time opt_out=optimize!(gp, iterations=10, show_trace=true);
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    global opt_out_1000 = opt_out.minimum
end;

  0.211536 seconds (10.23 k allocations: 76.648 MB, 1.74% gc time)
Iter     Function value   Gradient norm 
     0     1.512206e+03     6.543916e+02
Base.LinAlg.PosDefException(3)
Base.LinAlg.PosDefException(401)
New linesearch
bracketing: ia = 1, ib = 2, c = 0.0020000000000000005, phic = 1795.670153067434, dphic = 1.7729432247979625e6
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.0020000000000000005, phi(a) = 1512.2058745840225, phi(b) = 1795.670153067434
secant2: a = 0.0, b = 0.0020000000000000005, c = 0.0004418867739815495
secant2: first c satisfied Wolfe conditions
     1     1.313911e+03     5.194144e+02
New linesearch
Wolfe condition satisfied on point alpha = 0.001212821959310361
     2     1.252359e+03     8.671482e+02
New linesearch
Wolfe condition satisfied on point alpha = 0.00028106649105842527
     3     1.107255e+03     8.956162e+01
New linesearch
Wolfe condition satisfied on point alpha = 0.0003946570114909507
     4     1.104099e+03     2.561977e+01
New linesearch
Wolfe co

In [41]:
begin;
    @time gp = GP(ts_arr[1:2000], temp_vec[1:2000], mean_func, _k_nonoise, _logNoise)
    @time opt_out=optimize!(gp, iterations=10, show_trace=true, method=ConjugateGradient(eta=0.05));
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    global opt_out_2000 = opt_out.minimum
end;

  1.105694 seconds (22.23 k allocations: 305.904 MB, 33.77% gc time)
Iter     Function value   Gradient norm 
     0     3.015223e+03     1.268494e+03
New linesearch
bracketing: ia = 1, ib = 2, c = 0.2, phic = 2.947606187356958e12, dphic = 2.6042046940991615e10
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.2, phi(a) = 3015.2229652737133, phi(b) = 2.947606187356958e12
secant2: a = 0.0, b = 0.2, c = 1.4135114440490361e-5
update: ia = 1, a = 0.0, ib = 2, b = 0.2, c = 1.4135114440490361e-5, phic = 2989.3469464258005, dphic = -1.8204130928647697e6
secant2: iA = 3, iB = 2, ic = 3
secant2: second c = 0.0012846149482150558
update: ia = 3, a = 1.4135114440490361e-5, ib = 2, b = 0.2, c = 0.0012846149482150558, phic = 8171.477850953999, dphic = 2.388651495301821e7
secant2 output: a = 1.4135114440490361e-5, b = 0.0012846149482150558
Linesearch: secant succeeded
linesearch: ia = 3, ib = 4, a = 1.4135114440490361e-5, b = 0.0012846149482150558, phi(a) = 2989.3469464258005, phi(b) = 8171.477850953999
sec

In [42]:
begin;
    @time gp = GP(ts_arr[1:4000], temp_vec[1:4000], mean_func, _k_nonoise, _logNoise)
    @time opt_out=optimize!(gp, iterations=10, show_trace=true, method=ConjugateGradient(eta=0.1));
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    global opt_out_4000 = opt_out.minimum
end;

  3.804290 seconds (46.24 k allocations: 1.194 GB, 16.39% gc time)
Iter     Function value   Gradient norm 
     0     6.675537e+03     2.157892e+03
Base.LinAlg.PosDefException(2)
Base.LinAlg.PosDefException(9)
New linesearch
bracketing: ia = 1, ib = 2, c = 0.0020000000000000005, phic = 4.150125018382583e7, dphic = 2.020471736855663e11
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.0020000000000000005, phi(a) = 6675.536504402365, phi(b) = 4.150125018382583e7
secant2: a = 0.0, b = 0.0020000000000000005, c = 5.994437520104476e-8
update: ia = 1, a = 0.0, ib = 2, b = 0.0020000000000000005, c = 5.994437520104476e-8, phic = 6675.173510647184, dphic = -6.055042346919744e6
secant2: iA = 3, iB = 2, ic = 3
secant2: second c = 0.00038827609671842164
update: ia = 3, a = 5.994437520104476e-8, ib = 2, b = 0.0020000000000000005, c = 0.00038827609671842164, phic = 6894.1067174071795, dphic = 1.2879299111495094e7
secant2 output: a = 5.994437520104476e-8, b = 0.00038827609671842164
Linesearch: secant succee

In [43]:
begin;
    @time gp = GP(ts_arr[1:6000], temp_vec[1:6000], mean_func, _k_nonoise, _logNoise)
    @time opt_out=optimize!(gp, iterations=10, show_trace=true, method=ConjugateGradient(eta=0.1));
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    opt_out_6000 = opt_out.minimum
end;

  8.431103 seconds (70.24 k allocations: 2.684 GB, 8.17% gc time)
Iter     Function value   Gradient norm 
     0     9.849824e+03     3.372417e+03
Base.LinAlg.PosDefException(34)
Base.LinAlg.PosDefException(4395)
New linesearch
bracketing: ia = 1, ib = 2, c = 0.0020000000000000005, phic = 2.2853604651669308e10, dphic = 1.7361417558201794e14
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.0020000000000000005, phi(a) = 9849.823864996542, phi(b) = 2.2853604651669308e10
secant2: a = 0.0, b = 0.0020000000000000005, c = 1.6577548249540753e-10
update: ia = 1, a = 0.0, ib = 2, b = 0.0020000000000000005, c = 1.6577548249540753e-10, phic = 9849.821479405566, dphic = -1.4390479742579395e7
secant2: iA = 3, iB = 2, ic = 3
secant2: second c = 0.0002869782238223787
update: ia = 3, a = 1.6577548249540753e-10, ib = 2, b = 0.0020000000000000005, c = 0.0002869782238223787, phic = 11414.337034809821, dphic = 4.895560495359543e7
secant2 output: a = 1.6577548249540753e-10, b = 0.0002869782238223787
Linesearch: 

In [47]:
print(get_params(gp_small))

[-0.946626,-4.23285,-0.993811,2.52342,21.7698,-2.40059,27.3302,1.56044,1.95804,4.34655,7.05903,0.315237,3.50061,7.05086,-14.4183]

In [71]:
begin;
    @time gp = GP(ts_arr, temp_vec, mean_func, _k_nonoise, _logNoise)
    @time opt_out=optimize!(gp, iterations=10, show_trace=true);
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    opt_out_full = opt_out.minimum
end;

 39.341052 seconds (131.65 k allocations: 9.213 GB, 2.03% gc time)
Iter     Function value   Gradient norm 
     0     1.722684e+04     5.988068e+03
Base.LinAlg.PosDefException(3)
Base.LinAlg.PosDefException(3653)
Base.LinAlg.PosDefException(995)
New linesearch
bracketing: ia = 1, ib = 2, c = 0.00020000000000000006, phic = 25740.946757278052, dphic = 2.3127088971710098e8
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.00020000000000000006, phi(a) = 17226.83881388742, phi(b) = 25740.946757278052
secant2: a = 0.0, b = 0.00020000000000000006, c = 2.9140642551439924e-5
secant2: first c satisfied Wolfe conditions
     1     1.621035e+04     4.630967e+03
New linesearch
Wolfe condition satisfied on point alpha = 8.828188482618482e-5
     2     1.551076e+04     4.517686e+03
New linesearch
Wolfe condition satisfied on point alpha = 6.214371511399274e-5
     3     1.490163e+04     1.685278e+03
New linesearch
Wolfe condition satisfied on point alpha = 0.00014421015277953016
     4     1.468903e+04    