In [1]:
import Pkg
Pkg.add("Flux")
Pkg.add("NCDatasets")
Pkg.add("TSVD")
Pkg.add("Statistics")
Pkg.add("Compat")
Pkg.add("LinearAlgebra")
Pkg.add("Glob")
Pkg.add("CSV")
Pkg.add("DataFrames")
Pkg.add("Distributions")
Pkg.add("ProgressMeter")
Pkg.add("PyPlot")
Pkg.add("Random")
Pkg.add("SpecialFunctions")
Pkg.add("BSON")
using Turing
using Flux
using Flux: train!
using TSVD
using Statistics
using LinearAlgebra
using Compat
using Glob
using NCDatasets
using CSV
using DataFrames
using Distributions: Categorical, Dirichlet
using ProgressMeter
using PyPlot
using Random
using SpecialFunctions: loggamma
using BSON: @save

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.8/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.ju

In [2]:
obs_file ="../data/observed_speeds/greenland_vel_mosaic250_v1_g9000m.nc"
d_obs = NCDataset(obs_file)
v_obs = d_obs["velsurf_mag"][:]
v_obs = nomissing(v_obs, 0.0);
idx = findall(v_obs .> 0)
Obs = v_obs[idx];

n_grid_points = size(idx)[1];

## Load the training data

In [3]:
training_files = sort(glob("../tests/training_data/*.nc"))

nf = length(training_files)
d = NCDataset(training_files[1], "r")
v = d["velsurf_mag"]
nx, ny, nt = size(v)

Data = zeros(n_grid_points, nf * nt)
ids = zeros(Int64, nf)
@showprogress for (k, training_file) in enumerate(training_files)
    m_id = match(r"id_(.+?)_", training_file)
    ids[k] = parse(Int, m_id[1])
    d = NCDataset(training_file, "r")
    v = d["velsurf_mag"][:]
    v = nomissing(v, 0.0)
    Data[:, k] = v[idx]
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:03[39m


## Read training samples

In [4]:
X_df = DataFrame(CSV.File("../data/samples/velocity_calibration_samples_50.csv"))
X_df = X_df[ [x in ids for x in X_df[!, :id]] ,:]
X = transpose(Matrix(X_df[!, 2:9]))
X_mean = mean(X, dims=2);
X_std = std(X, dims=2);
X_scaled = (X .- X_mean) ./ X_std;
X_train = X_scaled;
n_parameters, n_samples = size(X);

## Data preprocessing

Log10-transform the training data and set -Inf to 0

In [5]:
F = log10.(Data)
F = replace!(F, -Inf=>0)
dirichlet_dist = Dirichlet(n_samples, 1)

area = ones(n_grid_points);
area = area ./ sum(area);

# Number of eigenglaciers
q = 50;

## Function to get Eigenglaciers using SVD

In [6]:
function get_eigenglaciers(omegas, F, q)
    
    F_mean = sum(F .* omegas, dims=2);
    F_bar = F .- F_mean;

    Z = diagm(sqrt.(omegas[1, :] * n_grid_points))
    U, S, V = tsvd(Z * transpose(F_bar), q);
    lamda = S.^2 / n_grid_points
    V_hat = V * diagm(sqrt.(lamda));
    
    return V_hat, F_bar, F_mean
end;

## Set up the Neural Network

In [7]:
n_hidden = 128

struct NNModel
    chain::Chain
    V_hat::AbstractArray
    F_mean::AbstractArray
end

function (m::NNModel)(x, add_mean=false)
    if add_mean
        return V_hat * m.chain(x) .+ F_mean
    else
        return V_hat * m.chain(x)
    end

end

# Call @functor to allow for training.
Flux.@functor NNModel

chain = Chain(
    Dense(n_parameters, n_hidden),
    LayerNorm(n_hidden),
    Dropout(0.0),
    Dense(n_hidden, n_hidden),
    LayerNorm(n_hidden),
    Dropout(0.5),
    Dense(n_hidden, n_hidden),
    LayerNorm(n_hidden),
    Dropout(0.5),
    Dense(n_hidden, n_hidden),
    LayerNorm(n_hidden),
    Dropout(0.3),
    Dense(n_hidden, q, bias=false),
    );

In [8]:
no_models = 1
n_epochs = 101
opt = Adam(0.1, (0.9, 0.8));

## Loss function

In [9]:
loss(y_pred, y, o) = sum(sum(abs.((y_pred - y)).^2 .* area, dims=1) .* o);

In [14]:
model_index = 1
omegas = transpose(rand(dirichlet_dist, 1))
omegas_0 = omegas ./ size(omegas)[1];
    
V_hat, F_bar, F_mean = get_eigenglaciers(omegas, F, q);


## We're ready for training

In [15]:
models = []
for model_index in 1:no_models
    println("Training surrogate model ", model_index)
    Random.seed!(model_index)
    omegas = transpose(rand(dirichlet_dist, 1))
    omegas_0 = omegas ./ size(omegas)[1];
    
    V_hat, F_bar, F_mean = get_eigenglaciers(omegas, F, q);
    
    train_loader = Flux.DataLoader((X_train, F_bar, omegas), batchsize = 128, shuffle = true)
    model = NNModel(chain, V_hat, F_mean);
    ps = Flux.params(model);
    opt_state = Flux.setup(opt, model);
    
    println("  epoch, train_loss, test_loss")
    @showprogress for epoch in 1:n_epochs
        for (x, y, o) in train_loader

          # Calculate the gradient of the objective
          # with respect to the parameters within the model:
          grads = Flux.gradient(model) do m
              y_pred = m(x)
              loss(y_pred, y, o)
          end

          # Update the parameters so as to reduce the objective,
          # according the chosen optimisation rule:
          Flux.update!(opt_state, model, grads[1])
        end
        F_pred = model(X_scaled)
        train_loss = loss(F_pred, F_bar, omegas)
        test_loss = loss(F_pred, F_bar, omegas_0)
        if epoch % 5 == 0
            println("  ", epoch, " ", train_loss, " ", test_loss)
        end
    end
    @save "emulator_$model_index.bson" model
    push!(models, model)
end

Training surrogate model 1
  epoch, train_loss, test_loss


[32mProgress:   5%|██                                       |  ETA: 0:04:12[39m

  5 0.09013035254169333 0.09013035254169333


[32mProgress:  10%|████                                     |  ETA: 0:02:29[39m

  10 0.0609165497103593 0.0609165497103593


[32mProgress:  15%|██████▏                                  |  ETA: 0:01:53[39m

  15 0.05372222577752815 0.05372222577752815


[32mProgress:  20%|████████▏                                |  ETA: 0:01:34[39m

  20 0.05298454923924772 0.05298454923924772


[32mProgress:  25%|██████████▏                              |  ETA: 0:01:20[39m

  25 0.05685400074721315 0.05685400074721315


[32mProgress:  30%|████████████▏                            |  ETA: 0:01:11[39m

  30 0.05149964453077624 0.05149964453077624


[32mProgress:  35%|██████████████▎                          |  ETA: 0:01:03[39m

  35 0.05375838452666935 0.05375838452666935


[32mProgress:  40%|████████████████▎                        |  ETA: 0:00:56[39m

  40 0.049757399418643586 0.049757399418643586


[32mProgress:  45%|██████████████████▎                      |  ETA: 0:00:50[39m

  45 0.04942102599950743 0.04942102599950743


[32mProgress:  50%|████████████████████▎                    |  ETA: 0:00:44[39m

  50 0.051849056505940586 0.051849056505940586


[32mProgress:  54%|██████████████████████▍                  |  ETA: 0:00:39[39m

  55 0.04891371216966721 0.04891371216966721


[32mProgress:  59%|████████████████████████▍                |  ETA: 0:00:34[39m

  60 0.04837168538164376 0.04837168538164376


[32mProgress:  64%|██████████████████████████▍              |  ETA: 0:00:30[39m

  65 0.05002558726626114 0.05002558726626114


[32mProgress:  69%|████████████████████████████▍            |  ETA: 0:00:25[39m

  70 0.04736029373968405 0.04736029373968405


[32mProgress:  74%|██████████████████████████████▌          |  ETA: 0:00:21[39m

  75 0.04853064114674704 0.04853064114674704


[32mProgress:  79%|████████████████████████████████▌        |  ETA: 0:00:17[39m

  80 0.046892538644599954 0.046892538644599954


[32mProgress:  84%|██████████████████████████████████▌      |  ETA: 0:00:13[39m

  85 0.04763205439322923 0.04763205439322923


[32mProgress:  89%|████████████████████████████████████▌    |  ETA: 0:00:09[39m

  90 0.04610598511101663 0.04610598511101663


[32mProgress:  94%|██████████████████████████████████████▋  |  ETA: 0:00:05[39m

  95 0.04644347092302507 0.04644347092302507


[32mProgress:  99%|████████████████████████████████████████▋|  ETA: 0:00:01[39m

  100 0.04607863046492772 0.04607863046492772


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:01:18[39m


In [12]:
n_glaciers = 10
p = ones(n_samples)
p = p / sum(p)
# This does sampling with replacement, need to figure out how to do
# sampling without replacement
P = Categorical(p)
glaciers = rand(P, n_glaciers);

## Now calcuate some metrics to assess the surrogate committee

In [13]:
F_train = F
maes = []
for m in glaciers
    for (model_index, model) in enumerate(models)
        X_val = X_train[:, m]
        Y_val = F_train[:, m]
        Y_pred = model(X_val, true)
        mae = Flux.mae(10 .^ mean(Y_pred, dims=2), 10 .^ mean(Y_val, dims=2))
        push!(maes, mae)
    end
end
mae = mean(maes)
print("MAE: ", mae)

LoadError: MethodError: no method matching zero(::Type{Any})
[0mClosest candidates are:
[0m  zero(::Type{Union{Missing, T}}) where T at missing.jl:105
[0m  zero([91m::Union{Type{P}, P}[39m) where P<:Dates.Period at /opt/local/share/julia/stdlib/v1.8/Dates/src/periods.jl:53
[0m  zero([91m::StatsBase.Histogram{T, N, E}[39m) where {T, N, E} at ~/.julia/packages/StatsBase/XgjIN/src/hist.jl:562
[0m  ...