# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Preparations" data-toc-modified-id="Preparations-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Preparations</a></div><div class="lev1 toc-item"><a href="#Checking-derivatives" data-toc-modified-id="Checking-derivatives-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Checking derivatives</a></div><div class="lev1 toc-item"><a href="#Optimization" data-toc-modified-id="Optimization-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Optimization</a></div><div class="lev2 toc-item"><a href="#first-400-points" data-toc-modified-id="first-400-points-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>first 400 points</a></div><div class="lev2 toc-item"><a href="#increasing-size-of-data" data-toc-modified-id="increasing-size-of-data-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>increasing size of data</a></div><div class="lev2 toc-item"><a href="#Full-data" data-toc-modified-id="Full-data-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Full data</a></div>

# Preparations

In [1]:
using TimeSeries
using DataFrames
using GaussianProcesses
using GaussianProcesses: num_params, set_params!, get_params, grad_stack
using GaussianProcesses: grad_stack!, update_mll!, update_mll_and_dmll!
using GaussianProcesses: optimize!, get_optim_target
using Optim
;

In [2]:
import PyPlot; plt=PyPlot
using LaTeXStrings
plt.rc("figure", dpi=300.0)
# plt.rc("figure", figsize=(6,4))
plt.rc("figure", autolayout=true)
plt.rc("savefig", dpi=300.0)
plt.rc("text", usetex=true)
plt.rc("font", family="serif")
plt.rc("font", serif="Palatino")
;

In [3]:
station_data = readtable("data2015/725450.14990.processed.2015.2015.csv", 
    header=false,
    names=[:year, :month, :day, :hour, :min, :seconds, :temp])
station_data = station_data[!isnan(station_data[:temp]),:]    
station_ts = DateTime[DateTime(r[:year], r[:month], r[:day], r[:hour], r[:min], r[:seconds]) for r in eachrow(station_data)]
station_ts[1:10]

10-element Array{DateTime,1}:
 2015-01-01T00:52:00
 2015-01-01T01:52:00
 2015-01-01T02:52:00
 2015-01-01T03:52:00
 2015-01-01T04:52:00
 2015-01-01T05:52:00
 2015-01-01T06:52:00
 2015-01-01T07:52:00
 2015-01-01T08:52:00
 2015-01-01T09:52:00

In [4]:
k0 = Noise(0.0)
k1 = FixedPeriodic(log(0.5), log(√(10.0)), log(24.0))
k2 = RQIso(log(0.1*24), log(√10.0), 3.0) # short term
k3 = RQIso(log(3.0*24), log(√20.0), 3.0)
k4 = RQIso(log(10.0*24), log(√20.0), 3.0)
k5 = SE(log(200.0*24), log(√80.0))
_k=k0+k1+k2+k3+k4+k5
_k_nonoise=k1+k2+k3+k4+k5
_logNoise=log(k0.σ2)/2.0
hyp=[-0.359207,0.36883,1.45171,2.0342,1.17682,2.88579,4.28659,1.49533,-0.16544,6.81425,1.3045,3.49925,7.34764,2.16028]
set_params!(_k, hyp)

In [5]:
ms_per_hour = 1e3*3600
ts_vec = convert(Vector{Float64}, station_ts.-station_ts[1]) / ms_per_hour
ts_arr = reshape(ts_vec, (1,length(ts_vec)));

In [6]:
mean_func = MeanConst(mean(station_data[:temp]))
temp_vec = convert(Vector{Float64}, station_data[:temp])
;

In [7]:
gp_small = GP(ts_arr[1:300], temp_vec[1:300], mean_func, _k_nonoise, _logNoise);

In [8]:
size(temp_vec)

(11118,)

# Checking derivatives

In [10]:
using Calculus

In [29]:
function check_derivs(func::Optim.DifferentiableFunction, x::Vector{Float64}; kwargs...)
    g2 = Calculus.gradient(func.f)
    analytical_g = copy(x)
    f0 = func.fg!(x, analytical_g)
    numerical_g = g2(x)
    func.f(x)
    for i in eachindex(numerical_g)
        @assert isapprox(analytical_g[i], numerical_g[i]; kwargs...)
    end
    return analytical_g, numerical_g
end



check_derivs (generic function with 1 method)

In [37]:
anal, numer = check_derivs(get_optim_target(gp_small), get_params(gp_small), 
                            rtol=1e-3, atol=1e-3)
collect(zip(anal, numer))

15-element Array{Tuple{Float64,Float64},1}:
 (137.926,137.926)      
 (0.189556,0.189556)    
 (-5.31039,-5.31039)    
 (2.14295,2.14295)      
 (52.9203,52.9203)      
 (-27.9177,-27.9177)    
 (1.76545,1.76545)      
 (3.6865,3.6865)        
 (-5.34278,-5.34278)    
 (-0.328318,-0.328318)  
 (0.0184481,0.0184481)  
 (-0.387984,-0.387985)  
 (1.99855e-6,2.04281e-6)
 (0.0348054,0.0348054)  
 (-2.08182,-2.08182)    

In [38]:
get_optim_target(gp_small).fg!(get_params(gp_small), copy(get_params(gp_small)))

443.57244271244116

In [39]:
gp_small.dmLL

15-element Array{Float64,1}:
 -137.926     
   -0.189556  
    5.31039   
   -2.14295   
  -52.9203    
   27.9177    
   -1.76545   
   -3.6865    
    5.34278   
    0.328318  
   -0.0184481 
    0.387984  
   -1.99855e-6
   -0.0348054 
    2.08182   

In [40]:
[[:mean, :kern]; GaussianProcesses.get_param_names(gp_small.k)]

15-element Array{Symbol,1}:
 :mean  
 :kern  
 :sk1_ll
 :sk1_lσ
 :sk2_ll
 :sk2_lσ
 :sk2_lα
 :sk3_ll
 :sk3_lσ
 :sk3_lα
 :sk4_ll
 :sk4_lσ
 :sk4_lα
 :sk5_ll
 :sk5_lσ

# Optimization

In [41]:
optoptions = OptimizationOptions(x_tol=1e-3, f_tol=0.1, iterations=10)

Optim.OptimizationOptions{Void}(0.001,0.1,1.0e-8,10,false,false,false,false,1,nothing)

## first 400 points

In [43]:
@time opt_out=optimize!(gp_small, x_tol=1e-3, f_tol=1e-4);

New linesearch
bracketing: ia = 1, ib = 2, c = 0.2, phic = 36933.028826314134, dphic = 832600.5743543695
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.2, phi(a) = 283.192506385976, phi(b) = 36933.028826314134
secant2: a = 0.0, b = 0.2, c = 5.749133980817787e-5
update: ia = 1, a = 0.0, ib = 2, b = 0.2, c = 5.749133980817787e-5, phic = 283.17889120883297, dphic = -234.23586402806032
secant2: iA = 3, iB = 2, ic = 3
secant2: second c = 0.0026624547026988985
secant2: second c satisfied Wolfe conditions
New linesearch
Wolfe condition satisfied on point alpha = 0.03660635887923911
New linesearch
Wolfe condition satisfied on point alpha = 0.005518043484628428
New linesearch
Wolfe condition satisfied on point alpha = 0.06123339555450636
New linesearch
bracketing: ia = 1, ib = 2, c = 0.012246679110901272, phic = 286.3384748689016, dphic = 6468.7599328589295
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.012246679110901272, phi(a) = 269.68817065429494, phi(b) = 286.3384748689016
secant2: a = 0.0, b = 0.

In [44]:
opt_out

Results of Optimization Algorithm
 * Algorithm: Conjugate Gradient
 * Starting Point: [-1.0076160023244338,9.714625030066108, ...]
 * Minimizer: [-1.613527401190906,9.542854100541549, ...]
 * Minimum: 2.597910e+02
 * Iterations: 23
 * Convergence: true
   * |x - x'| < 1.0e-03: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-04: true
   * |g(x)| < 1.0e-08: false
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 73
 * Gradient Calls: 50

## increasing size of data

In [45]:
begin;
    @time gp = GP(ts_arr[1:1000], temp_vec[1:1000], mean_func, _k_nonoise, _logNoise)
    set_params!(gp, get_params(gp_small))
    @time opt_out=optimize!(gp, iterations=10, show_trace=true, 
        method=ConjugateGradient(eta=0.05), x_tol=1e-3, f_tol=1e-4);
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    set_params!(gp_small, get_params(gp))
    global opt_out_1000 = opt_out.minimum
end;

  0.187088 seconds (11.12 k allocations: 30.883 MB, 5.28% gc time)
Iter     Function value   Gradient norm 
     0     9.306860e+02     2.839599e+01
New linesearch
bracketing: ia = 1, ib = 2, c = 0.2, phic = 3004.547524174712, dphic = 31253.50442665807
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.2, phi(a) = 930.6859746947298, phi(b) = 3004.547524174712
secant2: a = 0.0, b = 0.2, c = 0.01204580605790209
update: ia = 1, a = 0.0, ib = 2, b = 0.2, c = 0.01204580605790209, phic = 931.8734762431313, dphic = 2230.625229772278
secant2: iA = 1, iB = 3, ic = 3
secant2 output: a = 0.0, b = 0.01204580605790209
Linesearch: secant succeeded
linesearch: ia = 1, ib = 3, a = 0.0, b = 0.01204580605790209, phi(a) = 930.6859746947298, phi(b) = 931.8734762431313
secant2: a = 0.0, b = 0.01204580605790209, c = 0.005699086617921417
secant2: first c satisfied Wolfe conditions
     1     9.249948e+02     3.615320e+01
New linesearch
Wolfe condition satisfied on point alpha = 0.0010863995250350394
     2     9.232

In [46]:
begin;
    @time gp = GP(ts_arr[1:2000], temp_vec[1:2000], mean_func, _k_nonoise, _logNoise)
    set_params!(gp, get_params(gp_small))
    @time opt_out=optimize!(gp, iterations=10, show_trace=true, 
            method=ConjugateGradient(eta=0.2), x_tol=1e-3, f_tol=1e-4);
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    set_params!(gp_small, get_params(gp))
    global opt_out_2000 = opt_out.minimum
end;

  0.909559 seconds (24.12 k allocations: 122.825 MB, 16.98% gc time)
Iter     Function value   Gradient norm 
     0     2.017539e+03     1.118135e+02
New linesearch
bracketing: ia = 1, ib = 2, c = 0.2, phic = 43369.19817026393, dphic = 223626.91240996215
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.2, phi(a) = 2017.5386668189312, phi(b) = 43369.19817026393
secant2: a = 0.0, b = 0.2, c = 0.015926756280150188
update: ia = 1, a = 0.0, ib = 2, b = 0.2, c = 0.015926756280150188, phic = 3836.61187350376, dphic = 175736.09783045662
secant2: iA = 1, iB = 3, ic = 3
secant2 output: a = 0.0, b = 0.015926756280150188
Linesearch: secant succeeded
linesearch: ia = 1, ib = 3, a = 0.0, b = 0.015926756280150188, phi(a) = 2017.5386668189312, phi(b) = 3836.61187350376
secant2: a = 0.0, b = 0.015926756280150188, c = 0.0015796604890879636
update: ia = 1, a = 0.0, ib = 3, b = 0.015926756280150188, c = 0.0015796604890879636, phic = 2024.2867807916336, dphic = 26097.640844442558
secant2: iA = 1, iB = 4, ic = 4

In [47]:
begin;
    @time gp = GP(ts_arr[1:4000], temp_vec[1:4000], mean_func, _k_nonoise, _logNoise)
    set_params!(gp, get_params(gp_small))
    @time opt_out=optimize!(gp, iterations=10, show_trace=true, method=ConjugateGradient(eta=0.2), x_tol=1e-3, f_tol=1e-4);
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    set_params!(gp_small, get_params(gp))
    global opt_out_4000 = opt_out.minimum
end;

  3.770353 seconds (50.13 k allocations: 489.813 MB, 4.40% gc time)
Iter     Function value   Gradient norm 
     0     4.963429e+03     8.967731e+02
New linesearch
bracketing: ia = 1, ib = 2, c = 0.2, phic = 717660.5937702471, dphic = 3.58709225138843e6
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.2, phi(a) = 4963.42937007623, phi(b) = 717660.5937702471
secant2: a = 0.0, b = 0.2, c = 0.07998148255035423
update: ia = 1, a = 0.0, ib = 2, b = 0.2, c = 0.07998148255035423, phic = 287143.0998034904, dphic = 3.5870922513884297e6
secant2: iA = 1, iB = 3, ic = 3
secant2 output: a = 0.0, b = 0.07998148255035423
Linesearch: secant succeeded
linesearch: ia = 1, ib = 3, a = 0.0, b = 0.07998148255035423, phi(a) = 4963.42937007623, phi(b) = 287143.0998034904
secant2: a = 0.0, b = 0.07998148255035423, c = 0.031985187754763084
update: ia = 1, a = 0.0, ib = 3, b = 0.07998148255035423, c = 0.031985187754763084, phic = 114975.96266722374, dphic = 3.587092240268135e6
secant2: iA = 1, iB = 4, ic = 4
secant2

In [48]:
begin;
    @time gp = GP(ts_arr[1:6000], temp_vec[1:6000], mean_func, _k_nonoise, _logNoise)
    set_params!(gp, get_params(gp_small))
    @time opt_out=optimize!(gp, iterations=10, show_trace=true, method=ConjugateGradient(eta=0.1));
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    set_params!(gp_small, get_params(gp))
    opt_out_6000 = opt_out.minimum
end;

  8.005551 seconds (76.13 k allocations: 1.075 GB, 2.51% gc time)
Iter     Function value   Gradient norm 
     0     6.805618e+03     2.141594e+02
New linesearch
bracketing: ia = 1, ib = 2, c = 0.2, phic = 8.925675741686933e8, dphic = 2.918278164599065e10
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.2, phi(a) = 6805.617773107602, phi(b) = 8.925675741686933e8
secant2: a = 0.0, b = 0.2, c = 6.41437235802064e-7
update: ia = 1, a = 0.0, ib = 2, b = 0.2, c = 6.41437235802064e-7, phic = 6805.557850306429, dphic = -93244.25374607435
secant2: iA = 3, iB = 2, ic = 3
secant2: second c = 0.00017120628580529504
secant2: second c satisfied Wolfe conditions
     1     6.797748e+03     4.215161e+01
New linesearch
Wolfe condition satisfied on point alpha = 0.0006472374138577468
     2     6.795923e+03     5.772887e+01
New linesearch
Wolfe condition satisfied on point alpha = 0.0006054187691614957
     3     6.794126e+03     3.492812e+01
New linesearch
Wolfe condition satisfied on point alpha = 0.000314

was 800 seconds

## Full data

In [54]:
set_params!(gp_small, opt_out_4000)

In [55]:
begin;
    @time gp = GP(ts_arr, temp_vec, mean_func, _k_nonoise, _logNoise)
    set_params!(gp, get_params(gp_small))
    @time opt_out=optimize!(gp, iterations=30, show_trace=true,
        x_tol=1e-3, f_tol=1e-4);
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    set_params!(gp_small, get_params(gp))
    opt_out_full = opt_out.minimum
end;

 31.510904 seconds (142.65 k allocations: 3.688 GB, 0.97% gc time)
Iter     Function value   Gradient norm 
     0     1.249550e+04     4.712091e+02
Base.LinAlg.PosDefException(1547)
New linesearch
bracketing: ia = 1, ib = 2, c = 0.020000000000000004, phic = 3.9131729728708935e8, dphic = 1.2497784191997646e11
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.020000000000000004, phi(a) = 12495.50470827693, phi(b) = 3.9131729728708935e8
secant2: a = 0.0, b = 0.020000000000000004, c = 6.753068795045158e-8
update: ia = 1, a = 0.0, ib = 2, b = 0.020000000000000004, c = 6.753068795045158e-8, phic = 12495.476220666578, dphic = -421700.36719074746
secant2: iA = 3, iB = 2, ic = 3
secant2: second c = 9.724788133321116e-5
secant2: second c satisfied Wolfe conditions
     1     1.247543e+04     1.600035e+02
New linesearch
Wolfe condition satisfied on point alpha = 0.0005731767233374078
     2     1.246047e+04     1.568763e+02
New linesearch
Wolfe condition satisfied on point alpha = 0.0001684614204311382

In [56]:
begin;
    @time gp = GP(ts_arr, temp_vec, mean_func, _k_nonoise, _logNoise)
    set_params!(gp, get_params(gp_small))
    @time opt_out=optimize!(gp, iterations=100, show_trace=true,
        x_tol=1e-3, f_tol=1e-4);
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    set_params!(gp_small, get_params(gp))
    opt_out_full = opt_out.minimum
end;

 28.818618 seconds (142.65 k allocations: 3.688 GB, 3.30% gc time)
Iter     Function value   Gradient norm 
     0     1.243039e+04     1.529817e+01
New linesearch
bracketing: ia = 1, ib = 2, c = 0.2, phic = 31019.655992772634, dphic = 123483.07986838587
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.2, phi(a) = 12430.390318440306, phi(b) = 31019.655992772634
secant2: a = 0.0, b = 0.2, c = 0.001490346924196886
update: ia = 1, a = 0.0, ib = 2, b = 0.2, c = 0.001490346924196886, phic = 12432.16031720172, dphic = 3277.627768545235
secant2: iA = 1, iB = 3, ic = 3
secant2 output: a = 0.0, b = 0.001490346924196886
Linesearch: secant succeeded
linesearch: ia = 1, ib = 3, a = 0.0, b = 0.001490346924196886, phi(a) = 12430.390318440306, phi(b) = 12432.16031720172
secant2: a = 0.0, b = 0.001490346924196886, c = 0.000328598549118486
secant2: first c satisfied Wolfe conditions
     1     1.243024e+04     1.343813e+01
554.269538 seconds (808.35 k allocations: 1.866 GB, 0.11% gc time)
Results of Optimiza

In [27]:
begin;
    @time gp = GP(ts_arr, temp_vec, mean_func, _k_nonoise, _logNoise)
    set_params!(gp, get_params(gp_small))
    @time opt_out=optimize!(gp, iterations=10, show_trace=true);
    print(opt_out)
    print("\n")
    println(opt_out.minimum)
    set_params!(gp_small, get_params(gp))
    opt_out_full = opt_out.minimum
end;

 46.514229 seconds (131.57 k allocations: 9.213 GB, 3.58% gc time)
Iter     Function value   Gradient norm 
     0     1.242993e+04     8.431445e+01
New linesearch
bracketing: ia = 1, ib = 2, c = 0.2, phic = 33992.46691956253, dphic = 86447.84284636985
linesearch: ia = 1, ib = 2, a = 0.0, b = 0.2, phi(a) = 12429.932225979039, phi(b) = 33992.46691956253
secant2: a = 0.0, b = 0.2, c = 0.021965523389608048
update: ia = 1, a = 0.0, ib = 2, b = 0.2, c = 0.021965523389608048, phic = 51731.45597452744, dphic = 4.560500447216374e6
secant2: iA = 1, iB = 3, ic = 3
secant2 output: a = 0.0, b = 0.021965523389608048
Linesearch: secant succeeded
linesearch: ia = 1, ib = 3, a = 0.0, b = 0.021965523389608048, phi(a) = 12429.932225979039, phi(b) = 51731.45597452744
secant2: a = 0.0, b = 0.021965523389608048, c = 5.1251449009931694e-5
secant2: first c satisfied Wolfe conditions
     1     1.242949e+04     5.110313e+01
New linesearch
Wolfe condition satisfied on point alpha = 0.00011660233057945458
     