# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Data-Import-and-Preprocessing" data-toc-modified-id="Data-Import-and-Preprocessing-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Data Import and Preprocessing</a></div><div class="lev2 toc-item"><a href="#Distances" data-toc-modified-id="Distances-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Distances</a></div><div class="lev1 toc-item"><a href="#Modifications-of-the-spatiotemporal-kernel" data-toc-modified-id="Modifications-of-the-spatiotemporal-kernel-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Modifications of the spatiotemporal kernel</a></div><div class="lev2 toc-item"><a href="#Allowing-for-mean-variation" data-toc-modified-id="Allowing-for-mean-variation-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Allowing for mean variation</a></div><div class="lev2 toc-item"><a href="#Separating-fast-component" data-toc-modified-id="Separating-fast-component-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Separating fast component</a></div><div class="lev2 toc-item"><a href="#Separating-each-component" data-toc-modified-id="Separating-each-component-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Separating each component</a></div><div class="lev2 toc-item"><a href="#Releasing-variances" data-toc-modified-id="Releasing-variances-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Releasing variances</a></div>

In [2]:
using TimeSeries
using DataFrames
using GaussianProcesses
using GaussianProcesses: Mean, Kernel, evaluate, metric, IsotropicData, VecF64
using GaussianProcesses: Stationary, KernelData, MatF64
import GaussianProcesses: optimize!, get_optim_target, cov, grad_slice!
import GaussianProcesses: num_params, set_params!, get_params, update_mll!, update_mll_and_dmll!
import GaussianProcesses: get_param_names, cov!, addcov!, multcov!
import Proj4
using Optim
using Distances
;

In [3]:
import PyPlot; plt=PyPlot
using LaTeXStrings
plt.rc("figure", dpi=300.0)
# plt.rc("figure", figsize=(6,4))
plt.rc("savefig", dpi=300.0)
plt.rc("text", usetex=true)
plt.rc("font", family="serif")
plt.rc("font", serif="Palatino")
;

# Data Import and Preprocessing

In [4]:
include("src/preprocessing.jl")

test_data (generic function with 1 method)

In [7]:
include("src/TempModel.jl")



TempModel

In [8]:
isdList=read_isdList()
isdList[1:5,:]

Unnamed: 0,USAF,WBAN,NAME,CTRY,STATE,ICAO,LAT,LON,ELEV,BEGIN,END,X_PRJ,Y_PRJ
1,10010,99999,JAN MAYEN(NOR-NAVY),NO,,ENJA,70.933,-8.667,9.0,1931,2015,4554500.0,6113440.0
2,10060,99999,EDGEOYA,NO,,,78.25,22.817,14.0,1973,2015,4049820.0,7556400.0
3,10070,99999,NY-ALESUND,SV,,,78.917,11.933,7.7,1973,2015,3867800.0,7265490.0
4,10080,99999,LONGYEAR,SV,,ENSB,78.246,15.466,26.8,1975,2015,3997050.0,7336690.0
5,10090,99999,KARL XII OYA,SV,,,80.65,25.0,5.0,1955,2015,3692590.0,7685450.0


In [9]:
isdSubset=isdList[[(usaf in (725450,725460,725480,725485)) for usaf in isdList[:USAF].values],:]
isdSubset

Unnamed: 0,USAF,WBAN,NAME,CTRY,STATE,ICAO,LAT,LON,ELEV,BEGIN,END,X_PRJ,Y_PRJ
1,725450,14990,THE EASTERN IOWA AIRPORT,US,IA,KCID,41.883,-91.717,264.6,1973,2015,1647990.0,1044100.0
2,725460,14933,DES MOINES INTERNATIONAL AIRPORT,US,IA,KDSM,41.534,-93.653,291.7,1973,2015,1487230.0,1003790.0
3,725480,94910,WATERLOO MUNICIPAL AIRPORT,US,IA,KALO,42.554,-92.401,264.6,1960,2015,1590250.0,1117660.0
4,725485,14940,MASON CITY MUNICIPAL ARPT,US,IA,KMCW,43.154,-93.327,373.4,1973,2015,1514070.0,1183740.0


In [10]:
hourly_cat=read_Stations(isdSubset)
hourly_cat[1:5,:]

Unnamed: 0,year,month,day,hour,min,seconds,temp,ts,station,ts_hours
1,2015,1,1,0,52,0,-7.8,2015-01-01T00:52:00,1,0.866667
2,2015,1,1,1,52,0,-8.3,2015-01-01T01:52:00,1,1.86667
3,2015,1,1,2,52,0,-8.3,2015-01-01T02:52:00,1,2.86667
4,2015,1,1,3,52,0,-9.4,2015-01-01T03:52:00,1,3.86667
5,2015,1,1,4,52,0,-9.4,2015-01-01T04:52:00,1,4.86667


## Distances

To get distances between stations, we can either use a function to compute distances on a sphere, or we can first project the coordinates onto a Euclidean plane, and then compute normal distances. I'll do it both ways to check they're consistent (equal up to a multiplication constant), and then use Euclidean distances for convenience.

In [11]:
# http://www.johndcook.com/blog/python_longitude_latitude/
function distance_on_unit_sphere(lat1, long1, lat2, long2)
 
    # Convert latitude and longitude to 
    # spherical coordinates in radians.
    degrees_to_radians = π/180.0
         
    # phi = 90 - latitude
    phi1 = (90.0 - lat1)*degrees_to_radians
    phi2 = (90.0 - lat2)*degrees_to_radians
         
    # theta = longitude
    theta1 = long1*degrees_to_radians
    theta2 = long2*degrees_to_radians
         
    # Compute spherical distance from spherical coordinates.
         
    # For two locations in spherical coordinates 
    # (1, theta, phi) and (1, theta', phi')
    # cosine( arc length ) = 
    #    sin phi sin phi' cos(theta-theta') + cos phi cos phi'
    # distance = rho * arc length
     
    cosangle = (sin(phi1)*sin(phi2)*cos(theta1 - theta2) +
           cos(phi1)*cos(phi2))
    arc = acos( cosangle )
 
    # Remember to multiply arc by the radius of the earth 
    # in your favorite set of units to get length.
    return arc
end

distance_on_unit_sphere (generic function with 1 method)

In [12]:
numstations = nrow(isdSubset)
pairwiseSphere = zeros(numstations, numstations)
for i in 1:numstations
    for j in 1:i
        if i==j
            continue
        end
        station1 = isdSubset[i,:]
        station2 = isdSubset[j,:]
        lat1= get(station1[1,:LAT])
        lon1 = get(station1[1,:LON])
        lat2 = get(station2[1,:LAT])
        lon2 = get(station2[1,:LON])
        pairwiseSphere[i,j] = distance_on_unit_sphere(lat1, lon1, lat2, lon2)
        pairwiseSphere[j,i] = pairwiseSphere[i,j]
    end
end
pairwiseSphere

4×4 Array{Float64,2}:
 0.0        0.0259496  0.0146736  0.0303475
 0.0259496  0.0        0.024088   0.0285853
 0.0146736  0.024088   0.0        0.0158124
 0.0303475  0.0285853  0.0158124  0.0      

In [13]:
pairwiseEuclid=pairwise(Euclidean(), Matrix(isdSubset[[:X_PRJ,:Y_PRJ]])')

4×4 Array{Float64,2}:
      0.0        165736.0        93510.4        1.93474e5
 165736.0             0.0            1.53559e5  1.81942e5
  93510.4             1.53559e5      0.0        1.00846e5
      1.93474e5       1.81942e5      1.00846e5  0.0      

Ratio of the two distance matrices: close enough to a constant!

In [14]:
pairwiseEuclid ./ pairwiseSphere

4×4 Array{Float64,2}:
 NaN            6.38684e6    6.37271e6    6.37527e6
   6.38684e6  NaN            6.37493e6    6.36489e6
   6.37271e6    6.37493e6  NaN            6.37765e6
   6.37527e6    6.36489e6    6.37765e6  NaN        

# Modifications of the spatiotemporal kernel

Start with the time-series kernel.

In [15]:
k1 = fix(Periodic(0.0,0.0,log(24.0)), :lp)
k2 = RQIso(0.0,0.0,0.0)
k3 = SEIso(0.0,0.0)
k4 = RQIso(0.0,0.0,0.0)
k5 = RQIso(0.0,0.0,0.0)
k6 = SE(0.0,0.0)
k_time=k1+k2+k3+k4+k5+k6
# hyperparameters fitted in JuliaGP_timeseries_chunks.ipynb
hyp=[-1.46229,-0.0777809,1.03854,1.45757,1.06292,-1.23699,-1.2353,-1.05117,3.10614,1.29327,2.84729,3.67464,0.537794,3.0094,7.70676,-5.30466]
set_params!(k_time, hyp[2:end])

## Allowing for mean variation

In [16]:
k_spatial = SEIso(log(2*10^5), log(1.0))
k_means = SEIso(log(10^4), log(10.0))

Type: GaussianProcesses.SEIso, Params: [9.21034,2.30259]


In [17]:
k_spatiotemporal_1 = fix(Masked(k_time, [1])) * Masked(k_spatial, [2,3]) + 
    fix(Masked(k_means, [2,3]))

Type: GaussianProcesses.SumKernel
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.Masked{GaussianProcesses.SEIso}, Params: [12.2061,0.0]
  Type: GaussianProcesses.FixedKern, Params: Float64[]


In [18]:
begin
    global opt_out
    k_spatiotemporal = k_spatiotemporal_1
    chunks=GP[]
    chunk_width=24*10
    tstart=0.0
    tend=tstart+chunk_width
    nobsv=0
    while tstart < get(maximum(hourly_cat[:ts_hours]))
        in_chunk=(tstart .<= hourly_cat[:ts_hours].values) & (hourly_cat[:ts_hours].values .< tend)
        hourly_chunk = hourly_cat[in_chunk,:]
        nobsv_chunk = sum(in_chunk)
        nobsv += nobsv_chunk

        chunk_X_PRJ = isdSubset[:X_PRJ].values[hourly_chunk[:station].values]
        chunk_Y_PRJ = isdSubset[:Y_PRJ].values[hourly_chunk[:station].values]
        chunk_X = [hourly_chunk[:ts_hours].values chunk_X_PRJ chunk_Y_PRJ]

        y = hourly_chunk[:temp].values
        chunk = GP(chunk_X', y, MeanConst(mean(y)), k_spatiotemporal, 0.0)
        push!(chunks, chunk)

        tstart=tend
        tend+=chunk_width
    end
    reals = TempModel.GPRealisations(chunks)
    update_mll_and_dmll!(reals, mean=false)
    println(reals.dmLL)
    @time opt_out=optimize!(reals, mean=false, show_trace=true, x_tol=1e-4, f_tol=1e-4)
end

[-26071.5,415.221,-3770.39]
Iter     Function value   Gradient norm 
     0     6.774166e+04     2.607146e+04
     1     6.208402e+04     2.694167e+04
     2     5.293036e+04     7.173942e+03
     3     5.196959e+04     4.711842e+03
     4     5.100534e+04     7.596923e+03
     5     5.018484e+04     3.455185e+03
     6     4.975538e+04     1.967081e+02
     7     4.975456e+04     9.122938e+01
1201.822098 seconds (10.54 M allocations: 181.106 GB, 3.89% gc time)


Results of Optimization Algorithm
 * Algorithm: Conjugate Gradient
 * Starting Point: [0.0,12.206072645530174,0.0]
 * Minimizer: [-1.5619405263410124,11.077887959926558, ...]
 * Minimum: 4.975456e+04
 * Iterations: 7
 * Convergence: true
   * |x - x'| < 1.0e-04: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-04: true
   * |g(x)| < 1.0e-08: false
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 35
 * Gradient Calls: 28

In [19]:
print(Optim.minimizer(opt_out))

[-1.56194,11.0779,0.0132451]

In [20]:
print(Optim.minimum(opt_out))

49754.56185567802

In [21]:
print("\nk: Spatial kernel \n=================\n")
@printf("σ: %5.3f\n", √k_spatial.σ2)
@printf("l: %5.3f\n", √k_spatial.ℓ2)
print("\n=================\n")
@printf("σy: %5.3f\n", exp(reals.logNoise))


k: Spatial kernel 
σ: 1.013
l: 64724.039

σy: 0.210


## Separating fast component

Now we'll remove the fastest component, $k_3$, and give it its own spatial kernel. The idea is that short-term variations (20 minute lengthscale) might be local (short-ranged), while long-term and periodic components are regional (long-ranged).

In [22]:
k_spatial_1 = SEIso(log(2*10^5), log(1.0))
k_spatial_2 = SEIso(log(2*10^5), log(1.0))

Type: GaussianProcesses.SEIso, Params: [12.2061,0.0]


In [23]:
√k3.ℓ2

0.29074752502539686

In [24]:
k_slow = k1+k2+k4+k5+k6
k_fast = k3

Type: GaussianProcesses.SEIso, Params: [-1.2353,-1.05117]


In [25]:
k_spatiotemporal_2 = fix(Masked(k_slow, [1])) * Masked(k_spatial_1, [2,3]) + 
                   fix(Masked(k_fast, [1])) * Masked(k_spatial_2, [2,3]) +
                   fix(Masked(k_means, [2,3]))

Type: GaussianProcesses.SumKernel
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.Masked{GaussianProcesses.SEIso}, Params: [12.2061,0.0]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.Masked{GaussianProcesses.SEIso}, Params: [12.2061,0.0]
  Type: GaussianProcesses.FixedKern, Params: Float64[]


In [26]:
begin
    global opt_out
    k_spatiotemporal = k_spatiotemporal_2
    chunks=GP[]
    chunk_width=24*10
    tstart=0.0
    tend=tstart+chunk_width
    nobsv=0
    while tstart < get(maximum(hourly_cat[:ts_hours]))
        in_chunk=(tstart .<= hourly_cat[:ts_hours].values) & (hourly_cat[:ts_hours].values .< tend)
        hourly_chunk = hourly_cat[in_chunk,:]
        nobsv_chunk = sum(in_chunk)
        nobsv += nobsv_chunk

        chunk_X_PRJ = isdSubset[:X_PRJ].values[hourly_chunk[:station].values]
        chunk_Y_PRJ = isdSubset[:Y_PRJ].values[hourly_chunk[:station].values]
        chunk_X = [hourly_chunk[:ts_hours].values chunk_X_PRJ chunk_Y_PRJ]

        y = hourly_chunk[:temp].values
        chunk = GP(chunk_X', y, MeanConst(mean(y)), k_spatiotemporal, 0.0)
        push!(chunks, chunk)

        tstart=tend
        tend+=chunk_width
    end
    reals = TempModel.GPRealisations(chunks)
    update_mll_and_dmll!(reals, mean=false)
    println(reals.dmLL)
    @time opt_out=optimize!(reals, mean=false, show_trace=true, x_tol=1e-4, f_tol=1e-4)
end

[-26071.5,319.382,-1712.56,95.8386,-2057.83]
Iter     Function value   Gradient norm 
     0     6.774166e+04     2.607146e+04
     1     6.028650e+04     2.292979e+04
     2     5.300570e+04     5.360503e+03
     3     5.234450e+04     3.249772e+03
     4     5.145671e+04     1.138163e+03
     5     5.135977e+04     7.249001e+02
     6     5.127454e+04     1.649730e+03
     7     5.083380e+04     2.665671e+03
     8     4.949440e+04     3.688826e+03
     9     4.892992e+04     4.563910e+03
    10     4.828598e+04     1.555393e+03
    11     4.820275e+04     4.455338e+02
    12     4.818555e+04     2.349826e+02
    13     4.818349e+04     5.240228e+01
1680.050347 seconds (13.88 M allocations: 327.438 GB, 4.63% gc time)


Results of Optimization Algorithm
 * Algorithm: Conjugate Gradient
 * Starting Point: [0.0,12.206072645530174, ...]
 * Minimizer: [-1.648952624627339,11.667312481610702, ...]
 * Minimum: 4.818349e+04
 * Iterations: 13
 * Convergence: true
   * |x - x'| < 1.0e-04: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-04: true
   * |g(x)| < 1.0e-08: false
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 51
 * Gradient Calls: 38

In [27]:
print(Optim.minimizer(opt_out))

[-1.64895,11.6673,-0.0617534,9.10855,0.142355]

In [28]:
print(Optim.minimum(opt_out))

48183.48806790547

In [29]:
print("\nk: Spatial kernel (slow component) \n=================\n")
@printf("σ: %5.3f\n", √k_spatial_1.σ2)
@printf("l: %5.3f\n", √k_spatial_1.ℓ2)
print("\nk: Spatial kernel (fast component) \n=================\n")
@printf("σ: %5.3f\n", √k_spatial_2.σ2)
@printf("l: %5.3f\n", √k_spatial_2.ℓ2)
print("\n=================\n")
@printf("σy: %5.3f\n", exp(reals.logNoise))


k: Spatial kernel (slow component) 
σ: 0.940
l: 116694.243

k: Spatial kernel (fast component) 
σ: 1.153
l: 9032.208

σy: 0.192


Interesting! The optimization did exactly what we thought it might do, and picked a long lengthscale (100km) for the slow component, and a short lengthscale (9km) for the fast component.

## Separating each component

Let's just go ahead and give each and every component of the temporal kernel its own spatial decay. The $\sigma$'s fitted above were pretty close to 1, so it seems reasonable to fix all the $\sigma$'s of the spatial kernels to 1. That gives us 6 kernel parameters + 1 noise parameter to fit.

I also change the initial value of the noise parameter to fix some numerical issues.

In [30]:
ksp1 = SEIso(log(2*10^5), log(1.0))
ksp2 = SEIso(log(2*10^5), log(1.0))
ksp3 = SEIso(log(2*10^5), log(1.0))
ksp4 = SEIso(log(2*10^5), log(1.0))
ksp5 = SEIso(log(2*10^5), log(1.0))
ksp6 = SEIso(log(2*10^5), log(1.0))
k_spatiotemporal_3 = fix(Masked(k1, [1])) * fix(Masked(ksp1, [2,3]), :lσ) +
                     fix(Masked(k2, [1])) * fix(Masked(ksp2, [2,3]), :lσ) +
                     fix(Masked(k3, [1])) * fix(Masked(ksp3, [2,3]), :lσ) +
                     fix(Masked(k4, [1])) * fix(Masked(ksp4, [2,3]), :lσ) +
                     fix(Masked(k5, [1])) * fix(Masked(ksp5, [2,3]), :lσ) +
                     fix(Masked(k6, [1])) * fix(Masked(ksp6, [2,3]), :lσ) +
                     fix(Masked(k_means, [2,3]))

Type: GaussianProcesses.SumKernel
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.FixedKern, Params: [12.2061]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.FixedKern, Params: [12.2061]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.FixedKern, Params: [12.2061]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.FixedKern, Params: [12.2061]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.FixedKern, Params: [12.2061]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.FixedKern, Params: [12.2061]
  Type: GaussianProcesses.FixedKern, Params: Float64[]


In [31]:
cov(k_spatiotemporal_3, [0.0,0.0,0.0], [0.0,0.0,0.0])

132.69870506723657

In [32]:
begin
    global opt_out
    k_spatiotemporal = k_spatiotemporal_3
    chunks=GP[]
    chunk_width=24*10
    tstart=0.0
    tend=tstart+chunk_width
    nobsv=0
    while tstart < get(maximum(hourly_cat[:ts_hours]))
        in_chunk=(tstart .<= hourly_cat[:ts_hours].values) & (hourly_cat[:ts_hours].values .< tend)
        hourly_chunk = hourly_cat[in_chunk,:]
        nobsv_chunk = sum(in_chunk)
        nobsv += nobsv_chunk

        chunk_X_PRJ = isdSubset[:X_PRJ].values[hourly_chunk[:station].values]
        chunk_Y_PRJ = isdSubset[:Y_PRJ].values[hourly_chunk[:station].values]
        chunk_X = [hourly_chunk[:ts_hours].values chunk_X_PRJ chunk_Y_PRJ]

        y = hourly_chunk[:temp].values
        chunk = GP(chunk_X', y, MeanConst(mean(y)), k_spatiotemporal, -1.0)
        push!(chunks, chunk)

        tstart=tend
        tend+=chunk_width
    end
    reals = TempModel.GPRealisations(chunks)
    update_mll_and_dmll!(reals, mean=false)
    println(reals.dmLL)
    @time opt_out=optimize!(reals, mean=false, show_trace=true, x_tol=1e-5, f_tol=1e-5)
end

[5932.37,64.7881,-11224.1,-1399.72,243.684,57.078,1.68727e-6]
Iter     Function value   Gradient norm 
     0     5.557020e+04     1.122412e+04
     1     5.350688e+04     1.022201e+04
     2     5.112013e+04     1.697401e+03
     3     5.078535e+04     1.239815e+03
     4     4.891522e+04     1.534870e+03
     5     4.882333e+04     2.564050e+02
     6     4.865305e+04     1.217179e+02
     7     4.865143e+04     1.028406e+02
     8     4.863625e+04     5.996108e+01
     9     4.862396e+04     4.603747e+02
    10     4.861669e+04     1.377515e+02
    11     4.861335e+04     1.584461e+02
    12     4.861029e+04     3.319758e+01
    13     4.860942e+04     3.882107e+01
    14     4.860873e+04     7.955644e+01
    15     4.860622e+04     5.221113e+01
    16     4.860282e+04     8.391893e+01
    17     4.859245e+04     2.151750e+02
    18     4.858967e+04     4.080071e+01
    19     4.858917e+04     9.432550e+00
    20     4.858900e+04     3.205165e+00
1910.657345 seconds (19.43 M allocat

Results of Optimization Algorithm
 * Algorithm: Conjugate Gradient
 * Starting Point: [-1.0,12.206072645530174, ...]
 * Minimizer: [-1.6037476146199583,15.42589782578613, ...]
 * Minimum: 4.858900e+04
 * Iterations: 20
 * Convergence: true
   * |x - x'| < 1.0e-05: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-05: true
   * |g(x)| < 1.0e-08: false
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 71
 * Gradient Calls: 50

In [33]:
print(Optim.minimizer(opt_out))

[-1.60375,15.4259,9.86868,9.12749,16.4496,15.0163,12.2061]

In [34]:
print(Optim.minimum(opt_out))

48589.00158150999

In [35]:
print("k₁: Periodic \n=================\n")
@printf("σ: %5.3f\n", √k1.kern.σ2)
@printf("l: %5.3f\n", √k1.kern.ℓ2)
@printf("p: %5.0f hours\n", k1.kern.p)
print("> spatial decay:\n")
@printf("l: %5.3f\n", √ksp1.ℓ2)
print("\nk₂: RQIso \n=================\n")
@printf("σ: %5.3f\n", √ k2.σ2)
@printf("l: %5.3f hours\n", √ k2.ℓ2)
@printf("α: %5.3f\n", k2.α)
print("> spatial decay:\n")
# @printf("σ: %5.3f\n", √ksp2.σ2)
@printf("l: %5.3f\n", √ksp2.ℓ2)
print("\nk₃: SEIso \n=================\n")
@printf("σ: %5.3f\n", √k3.σ2)
@printf("l: %5.3f hours\n", √k3.ℓ2)
print("> spatial decay:\n")
# @printf("σ: %5.3f\n", √ksp3.σ2)
@printf("l: %5.3f\n", √ksp3.ℓ2)
print("\nk₄: RQIso \n=================\n")
@printf("σ: %5.3f\n", √k4.σ2)
@printf("l: %5.3f days\n", √k4.ℓ2 / 24)
@printf("α: %5.3f\n",  k4.α)
print("> spatial decay:\n")
# @printf("σ: %5.3f\n", √ksp4.σ2)
@printf("l: %5.3f\n", √ksp4.ℓ2)
print("\nk₅: RQIso \n=================\n")
@printf("σ: %5.3f\n", √k5.σ2)
@printf("l: %5.3f days\n", √k5.ℓ2 / 24)
@printf("α: %5.3f\n",  k5.α)
print("> spatial decay:\n")
# @printf("σ: %5.3f\n", √ksp5.σ2)
@printf("l: %5.3f\n", √ksp5.ℓ2)
print("\nk₆ SEIso \n=================\n")
@printf("σ: %5.3f\n", √k6.σ2)
@printf("l: %5.3f days\n", √k6.ℓ2 / 24)
print("> spatial decay:\n")
# @printf("σ: %5.3f\n", √ksp6.σ2)
@printf("l: %5.3f\n", √ksp6.ℓ2)
print("\n=================\n")
@printf("σy: %5.3f\n", exp(Optim.minimizer(opt_out)[1]))

k₁: Periodic 
σ: 2.825
l: 0.925
p:    24 hours
> spatial decay:
l: 5004749.031

k₂: RQIso 
σ: 2.895
l: 4.296 hours
α: 0.290
> spatial decay:
l: 19315.738

k₃: SEIso 
σ: 0.350
l: 0.291 hours
> spatial decay:
l: 9204.873

k₄: RQIso 
σ: 3.645
l: 0.931 days
α: 17.241
> spatial decay:
l: 13931194.158

k₅: RQIso 
σ: 1.712
l: 1.643 days
α: 20.275
> spatial decay:
l: 3322875.947

k₆ SEIso 
σ: 0.005
l: 92.639 days
> spatial decay:
l: 200000.036

σy: 0.201


This is extremely promising! We are indeed seeing that slower components of the temporal kernel also decay spatially over longer lengthscales.

## Releasing variances

In [36]:
k_spatiotemporal_4 = fix(Masked(k1, [1])) * Masked(ksp1, [2,3]) +
                     fix(Masked(k2, [1])) * Masked(ksp2, [2,3]) +
                     fix(Masked(k3, [1])) * Masked(ksp3, [2,3]) +
                     fix(Masked(k4, [1])) * Masked(ksp4, [2,3]) +
                     fix(Masked(k5, [1])) * Masked(ksp5, [2,3]) +
                     fix(Masked(k6, [1])) * Masked(ksp6, [2,3]) +
                     fix(Masked(k_means, [2,3]))

Type: GaussianProcesses.SumKernel
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.Masked{GaussianProcesses.SEIso}, Params: [12.2061,0.0]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.Masked{GaussianProcesses.SEIso}, Params: [12.2061,0.0]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.Masked{GaussianProcesses.SEIso}, Params: [12.2061,0.0]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.Masked{GaussianProcesses.SEIso}, Params: [12.2061,0.0]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Params: Float64[]
    Type: GaussianProcesses.Masked{GaussianProcesses.SEIso}, Params: [12.2061,0.0]
  Type: GaussianProcesses.ProdKernel
    Type: GaussianProcesses.FixedKern, Param

In [37]:
begin
    global opt_out
    k_spatiotemporal = k_spatiotemporal_4
    chunks=GP[]
    chunk_width=24*10
    tstart=0.0
    tend=tstart+chunk_width
    nobsv=0
    while tstart < get(maximum(hourly_cat[:ts_hours]))
        in_chunk=(tstart .<= hourly_cat[:ts_hours].values) & (hourly_cat[:ts_hours].values .< tend)
        hourly_chunk = hourly_cat[in_chunk,:]
        nobsv_chunk = sum(in_chunk)
        nobsv += nobsv_chunk

        chunk_X_PRJ = isdSubset[:X_PRJ].values[hourly_chunk[:station].values]
        chunk_Y_PRJ = isdSubset[:Y_PRJ].values[hourly_chunk[:station].values]
        chunk_X = [hourly_chunk[:ts_hours].values chunk_X_PRJ chunk_Y_PRJ]

        y = hourly_chunk[:temp].values
        chunk = GP(chunk_X', y, MeanConst(mean(y)), k_spatiotemporal, -1.0)
        push!(chunks, chunk)

        tstart=tend
        tend+=chunk_width
    end
    reals = TempModel.GPRealisations(chunks)
    update_mll_and_dmll!(reals, mean=false)
    println(reals.dmLL)
    @time opt_out=optimize!(reals, mean=false, show_trace=true, x_tol=1e-5, f_tol=1e-5)
end

[5932.37,64.7881,-130.213,-11224.1,5786.12,-1399.72,-1611.98,243.684,-224.299,57.078,-42.1138,1.68727e-6,-2.93528e-5]
Iter     Function value   Gradient norm 
     0     5.557020e+04     1.122412e+04
     1     5.488597e+04     1.329534e+04
     2     5.131610e+04     5.205859e+03
     3     5.078003e+04     1.600448e+03
     4     5.049983e+04     1.837527e+03
     5     5.038848e+04     1.336321e+03
     6     5.019158e+04     1.264262e+03
     7     4.916541e+04     4.728498e+03
     8     4.801690e+04     1.895439e+03
     9     4.774955e+04     2.329232e+03
    10     4.734335e+04     1.286051e+03
    11     4.723742e+04     3.406529e+02
    12     4.721982e+04     4.250593e+02
    13     4.719816e+04     3.361377e+02
    14     4.715590e+04     3.073294e+02
    15     4.713855e+04     1.360608e+02
    16     4.713776e+04     6.110578e+01
    17     4.713000e+04     3.353194e+02
    18     4.711864e+04     2.521104e+02
    19     4.711389e+04     3.211497e+02
    20     4.710969e+

Results of Optimization Algorithm
 * Algorithm: Conjugate Gradient
 * Starting Point: [-1.0,12.206072645530174, ...]
 * Minimizer: [-1.6502890060237414,14.239844714240363, ...]
 * Minimum: 4.708213e+04
 * Iterations: 27
 * Convergence: true
   * |x - x'| < 1.0e-05: false
   * |f(x) - f(x')| / |f(x)| < 1.0e-05: true
   * |g(x)| < 1.0e-08: false
   * Reached Maximum Number of Iterations: false
 * Objective Function Calls: 93
 * Gradient Calls: 65

In [38]:
print(Optim.minimizer(opt_out))

[-1.65029,14.2398,0.111707,11.5002,-0.0791469,8.76624,0.126258,14.4041,0.147028,13.0326,-0.635492,12.2061,-8.08864e-7]

In [39]:
print(Optim.minimum(opt_out))

47082.12648618504

In [42]:
print("k₁: Periodic \n=================\n")
@printf("σ: %5.3f\n", √k1.kern.σ2)
@printf("l: %5.3f\n", √k1.kern.ℓ2)
@printf("p: %5.0f hours\n", k1.kern.p)
print("> spatial decay:\n")
@printf("σ: %5.3f\n", √ksp1.σ2)
@printf("l: %5.3f\n", √ksp1.ℓ2)
print("\nk₂: RQIso \n=================\n")
@printf("σ: %5.3f\n", √ k2.σ2)
@printf("l: %5.3f hours\n", √ k2.ℓ2)
@printf("α: %5.3f\n", k2.α)
print("> spatial decay:\n")
@printf("σ: %5.3f\n", √ksp2.σ2)
@printf("l: %5.3f\n", √ksp2.ℓ2)
print("\nk₃: SEIso \n=================\n")
@printf("σ: %5.3f\n", √k3.σ2)
@printf("l: %5.3f hours\n", √k3.ℓ2)
print("> spatial decay:\n")
@printf("σ: %5.3f\n", √ksp3.σ2)
@printf("l: %5.3f\n", √ksp3.ℓ2)
print("\nk₄: RQIso \n=================\n")
@printf("σ: %5.3f\n", √k4.σ2)
@printf("l: %5.3f days\n", √k4.ℓ2 / 24)
@printf("α: %5.3f\n",  k4.α)
print("> spatial decay:\n")
@printf("σ: %5.3f\n", √ksp4.σ2)
@printf("l: %5.3f\n", √ksp4.ℓ2)
print("\nk₅: RQIso \n=================\n")
@printf("σ: %5.3f\n", √k5.σ2)
@printf("l: %5.3f days\n", √k5.ℓ2 / 24)
@printf("α: %5.3f\n",  k5.α)
print("> spatial decay:\n")
@printf("σ: %5.3f\n", √ksp5.σ2)
@printf("l: %5.3f\n", √ksp5.ℓ2)
print("\nk₆ SEIso \n=================\n")
@printf("σ: %5.3f\n", √k6.σ2)
@printf("l: %5.3f days\n", √k6.ℓ2 / 24)
print("> spatial decay:\n")
@printf("σ: %5.3f\n", √ksp6.σ2)
@printf("l: %5.3f\n", √ksp6.ℓ2)
print("\n=================\n")
@printf("σy: %5.3f\n", exp(Optim.minimizer(opt_out)[1]))

k₁: Periodic 
σ: 2.825
l: 0.925
p:    24 hours
> spatial decay:
σ: 1.118
l: 1528572.290

k₂: RQIso 
σ: 2.895
l: 4.296 hours
α: 0.290
> spatial decay:
σ: 0.924
l: 98740.086

k₃: SEIso 
σ: 0.350
l: 0.291 hours
> spatial decay:
σ: 1.135
l: 6413.993

k₄: RQIso 
σ: 3.645
l: 0.931 days
α: 17.241
> spatial decay:
σ: 1.158
l: 1801403.018

k₅: RQIso 
σ: 1.712
l: 1.643 days
α: 20.275
> spatial decay:
σ: 0.530
l: 457074.599

k₆ SEIso 
σ: 0.005
l: 92.639 days
> spatial decay:
σ: 1.000
l: 200000.011

σy: 0.192
