$$
    \newcommand{\genericdel}[3]{%
      \left#1#3\right#2
    }
    \newcommand{\del}[1]{\genericdel(){#1}}
    \newcommand{\sbr}[1]{\genericdel[]{#1}}
    \newcommand{\cbr}[1]{\genericdel\{\}{#1}}
    \newcommand{\abs}[1]{\genericdel||{#1}}
    \DeclareMathOperator*{\argmin}{arg\,min}
    \DeclareMathOperator*{\argmax}{arg\,max}
    \DeclareMathOperator{\Pr}{\mathbb{p}}
    \DeclareMathOperator{\E}{\mathbb{E}}
    \DeclareMathOperator{\Ind}{\mathbb{I}}
    \DeclareMathOperator{\var}{var}
    \DeclareMathOperator{\cov}{cov}
    \DeclareMathOperator{\invchi}{\mathrm{Inv-\chi}^2}
    \newcommand{\effect}{\mathrm{eff}}
    \newcommand{\xtilde}{\widetilde{X}}
    \DeclareMathOperator{\normal}{\mathcal{N}}
    \DeclareMathOperator{\unif}{Uniform}
    \DeclareMathOperator{\GP}{\mathcal{GP}}
    \newcommand{\Tn}{\mathrm{T}_{n}}
    \newcommand{\Tx}{\mathrm{T}_{x}}
    \newcommand{\station}[1]{\mathrm{station}\sbr{#1}}
    \newcommand{\xvec}{\mathbf{x}}
    \newcommand{\indep}{\perp}
    \newcommand{\iid}{iid}
    \newcommand{\trans}{^{\intercal}}
    \newcommand{\sigmaf}{\sigma_{\mathrm{GP}}}
    \newcommand{\sigman}{\sigma_{\epsilon}}
$$

In this notebook, we implement the much simpler model:
\begin{align}
    k_{st}(\xvec,\xvec',t,t') &= k_{time}(t,t') \cdot k_{space}(\xvec, \xvec') + k_{mean}(\xvec, \xvec'),,\\
    k_{space}(\xvec, \xvec') &= \sigmaf^2 \exp\del{-\frac{\del{\xvec-\xvec'}\trans\del{\xvec-\xvec'}}{2\ell_x^2}}\,,\\
    k_{time}(t, t') &= \exp\del{-\frac{\del{t-t'}^2}{2\ell_t^2}}\,,\\
    k_{mean}(\xvec, \xvec') &= \sigma_\mu^2 \exp\del{-\frac{\del{\xvec-\xvec'}\trans\del{\xvec-\xvec'}}{2\ell_\mu^2}}\,,\\
\end{align}

\begin{align}
    T_i &= f(\xvec_i, t_i) + \epsilon_i\\
    f(\xvec_i, t_i) &\sim \GP\del{0, k_{st}(\xvec,\xvec',t,t')}\\
    \epsilon_i &\overset{\iid}{\sim} \normal\del{0,\sigman^2}\\
\end{align}

We then add a diurnal component.

In [63]:
using TimeSeries
using DataFrames
using GaussianProcesses
using GaussianProcesses: Mean, Kernel, evaluate, metric, IsotropicData, VecF64
using GaussianProcesses: Stationary, KernelData, MatF64
import GaussianProcesses: optimize!, get_optim_target, cov, grad_slice!
import GaussianProcesses: num_params, set_params!, get_params, update_mll!, update_mll_and_dmll!
import GaussianProcesses: get_param_names, cov!, addcov!, multcov!
import Proj4
using Optim
using Distances
;

In [64]:
import PyPlot; plt=PyPlot
using LaTeXStrings
plt.rc("figure", dpi=300.0)
plt.rc("figure", figsize=(6,4))
plt.rc("savefig", dpi=300.0)
plt.rc("text", usetex=true)
plt.rc("font", family="serif")
plt.rc("font", serif="Palatino")
;

# Data Import and Preprocessing

In [65]:
include("iowa.jl")
iowa = prepare_iowa_data(data_dir);



## Distances

To get distances between stations, we can either use a function to compute distances on a sphere, or we can first project the coordinates onto a Euclidean plane, and then compute normal distances. I'll do it both ways to check they're consistent (equal up to a multiplication constant), and then use Euclidean distances for convenience.

In [66]:
numstations = nrow(iowa[:isdSubset])
pairwiseSphere = zeros(numstations, numstations)
for i in 1:numstations
    for j in 1:i
        if i==j
            continue
        end
        station1 = iowa[:isdSubset][i,:]
        station2 = iowa[:isdSubset][j,:]
        lat1=  station1[1,:LAT]
        lon1 = station1[1,:LON]
        lat2 = station2[1,:LAT]
        lon2 = station2[1,:LON]
        pairwiseSphere[i,j] = TempModel.distance_on_unit_sphere(lat1, lon1, lat2, lon2)
        pairwiseSphere[j,i] = pairwiseSphere[i,j]
    end
end
pairwiseSphere

4×4 Array{Float64,2}:
 0.0        0.0259496  0.0146736  0.0303475
 0.0259496  0.0        0.024088   0.0285853
 0.0146736  0.024088   0.0        0.0158124
 0.0303475  0.0285853  0.0158124  0.0      

In [67]:
pairwiseEuclid=pairwise(Euclidean(), Matrix(iowa[:isdSubset][[:X_PRJ,:Y_PRJ]])')

4×4 Array{Float64,2}:
      0.0        165736.0        93510.4        1.93474e5
 165736.0             0.0            1.53559e5  1.81942e5
  93510.4             1.53559e5      0.0        1.00846e5
      1.93474e5       1.81942e5      1.00846e5  0.0      

Ratio of the two distance matrices: close enough to a constant!

In [68]:
pairwiseEuclid ./ pairwiseSphere

4×4 Array{Float64,2}:
 NaN            6.38684e6    6.37271e6    6.37527e6
   6.38684e6  NaN            6.37493e6    6.36489e6
   6.37271e6    6.37493e6  NaN            6.37765e6
   6.37527e6    6.36489e6    6.37765e6  NaN        

# Product of SE kernels

In [69]:
k_time = SEIso(0.0,0.0)
k_spatial = fix(SEIso(log(2*10^5), log(1.0)), :lσ)
k_means = SEIso(log(10^4), log(10.0))

k_spatiotemporal = Masked(k_time, [1]) * Masked(k_spatial, [2,3]) + 
    fix(Masked(k_means, [2,3]))
;

Type: GaussianProcesses.SEIso, Params: [9.21034, 2.30259]


In [71]:
@time opt_out = optim_kernel(k_spatiotemporal, 0.0, iowa[:isdSubset], iowa[:hourly_data], :Optim);

Base.LinAlg.PosDefException(2)
Base.LinAlg.PosDefException(3)
361.621533 seconds (10.34 M allocations: 40.515 GiB, 3.21% gc time)


In [75]:
print(opt_out[:hyp])

[-0.822313, 0.996795, 1.31718, 12.0805]

In [74]:
opt_out[:mll]

-55614.520020422955

In [80]:
print("\nk: Temporal kernel \n=================\n")
@printf("σ: %5.3f\n", √k_time.σ2)
@printf("l: %5.3f hours\n", √k_time.ℓ2)
print("\nk: Spatial kernel \n=================\n")
@printf("σ: %5.3f\n", √k_spatial.kern.σ2)
@printf("l: %5.3f km\n", √k_spatial.kern.ℓ2 / 1000)
print("\n=================\n")
@printf("σy: %5.3f\n", exp(opt_out[:hyp][1]))


k: Temporal kernel 
σ: 3.733
l: 2.710 hours

k: Spatial kernel 
σ: 1.000
l: 176.390 km

σy: 0.439


## NLopt

Using an alternative optimizer.
The `:NLopt` method uses the L-BFGS implemented in [NLopt](https://nlopt.readthedocs.io/en/latest/), while the `:Optim` method uses the Conjugate Gradient Descent method of the julia [Optim package](https://github.com/JuliaNLSolvers/Optim.jl).

In [81]:
k_time = SEIso(0.0,0.0)
k_spatial = fix(SEIso(log(10^5), log(1.0)), :lσ)
k_means = SEIso(log(10^4), log(10.0))

k_spatiotemporal = Masked(k_time, [1]) * Masked(k_spatial, [2,3]) + 
    fix(Masked(k_means, [2,3]))
;

In [82]:
@time nlopt_out = optim_kernel(k_spatiotemporal, 0.0, iowa[:isdSubset], iowa[:hourly_data], :NLopt);

582.971651 seconds (11.50 M allocations: 45.151 GiB, 3.04% gc time)


In [83]:
print(nlopt_out[:hyp])

[-0.822316, 0.996798, 1.31717, 12.0804]

In [86]:
nlopt_out[:mll]

-55614.52001889782

In [85]:
print("\nk: Temporal kernel \n=================\n")
@printf("σ: %5.3f\n", √k_time.σ2)
@printf("l: %5.3f hours\n", √k_time.ℓ2)
print("\nk: Spatial kernel \n=================\n")
@printf("σ: %5.3f\n", √k_spatial.kern.σ2)
@printf("l: %5.3f km\n", √k_spatial.kern.ℓ2 / 1000)
print("\n=================\n")
@printf("σy: %5.3f\n", exp(nlopt_out[:hyp][1]))


k: Temporal kernel 
σ: 3.733
l: 2.710 hours

k: Spatial kernel 
σ: 1.000
l: 176.388 km

σy: 0.439
