In [6]:
push!(LOAD_PATH,"/media/roels/_disk2/git/netatmo")
using Interact, Plots, Dates, DataFrames, Netatmo, LinearAlgebra, IterativeSolvers

#  Quality control using Gaussian process regression. 

In [2]:
dtg       = DateTime(2018,5,10,0)
period    = Hour(24)
timerange = dtg:Minute(10):dtg + period
latrange  = 59.9:0.01:60  
lonrange  = 10.7:0.01:10.8
df = Netatmo.read(timerange, latrange=latrange, lonrange=lonrange);

In [3]:
groupbyid = groupby(df,:id);

# Temporal Gaussian process regression  

We use Gaussian process regression to compute the pressure anomaly as the difference between a smoothed surface pressure signal. 
The Squared exponential kernel is 

$$K_{se}(t_1,t_2) = \exp \left( - \frac{ |t_1-t_2|^2 }{2 l^2} \right) $$

The Ornstein Uhlenbek kernel is 

$$K_{ou}(t_1,t_2) = \exp \left( - \frac{|t_1-t_2| }{l} \right) $$


## Impact of length scales

Try Lengthscale=24  in the $K_{se}$ kernel and see the  impact that the asssumed $\sigma_o$ has on the anomaly at the the end of the time window. I.e. $\sigma_o$ is more than just a regularization parameter. 
Check station 371 on 20180510

In [1]:
lengthscales = 1:1:24 
sigmaos = 0.0001:0.0001:0.001
indices=1:length(groupbyid)

mp = @manipulate for index in indices, sigmao  in sigmaos,  lt in slider(lengthscales; label="lengthscale")     
    s1 = groupbyid[index]
    Kou(t1,t2) = exp(-1/2*abs(t1-t2)/(1000000*60*60*lt))   # Ornstein–Uhlenbeck
    Kse(t1,t2) = exp(-1/2*(t1-t2)^2/(60*60*lt)^2)          # squared-exponential 
    K  = [Kse(t1,t2) for t1 in s1[:time_utc], t2 in s1[:time_utc] ] 
    # Ks = [rbf(t1,t2) for t1 in datetime2unix.(timerange),     t2 in s1[:time_utc] ]     
    KpI = copy(K)  
    KpI[diagind(KpI)] .= diag(KpI) .+ (sigmao)^2
    timecg  = @elapsed q, cglog = cg(KpI,s1[:pressure],log=true)       
    timenor = @elapsed q2 = KpI\s1[:pressure]
    
    pshat2 = K* q2
    pshat = K*q
    #print(KpI-K)    
    scatter(unix2datetime.(s1[:time_utc]),s1[:pressure],marker=:o,label="Ps") #,xlims=(datetime2unix(timerange[1]),datetime2unix(timerange[end])))
    # plot!(datetime2unix.(timerange),pshat)    
    plot!(unix2datetime.(s1[:time_utc]),pshat,label="cg")  
    plot!(unix2datetime.(s1[:time_utc]),pshat2,label="nor")  
    
    plot!(title = "CG $cglog speedupfactor=$(round(timenor/timecg,digits=2))")
    plot!(legend=:bottomleft)
    
    # scatter(groupid[:][lat])
    
end

UndefVarError: UndefVarError: groupbyid not defined

In [208]:
latlons = unique(df[[:lon,:lat]]); # don't use scatter. Perhaps lea