In [1]:
import sys
sys.path.insert(0, "../pismemulator/")


import numpy as np
import os
from scipy.stats import dirichlet

import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import (
    ModelCheckpoint,
)
from pytorch_lightning.loggers import TensorBoardLogger
from nnemulator import NNEmulator, DNNEmulator, PISMDataset, PISMDataModule
from utils import plot_eigenglaciers, plot_validation


from collections import OrderedDict
from glob import glob
import numpy as np
import pandas as pd
from os.path import join
import re
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import xarray as xr

import torch
import torch.nn as nn
from torch import Tensor
from torchmetrics.utilities.checks import _check_same_shape
from torchmetrics import Metric
import pytorch_lightning as pl
from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau
from torch.utils.data import DataLoader, TensorDataset

from pismemulator.metrics import AbsoluteError, absolute_error


In [3]:
max_epochs = 2
emulator_dir = "test_dnn"
model_index = 0
train_size = 1.0
num_workers = 4
hparams = {"n_hidden": 128, 
           "n_hidden_1": 128, 
           "n_hidden_2": 128, 
           "n_hidden_3": 128, 
           "n_hidden_4": 128, 
           "n_layers": 5,
           "learning_rate": 0.01}

In [4]:
    dataset = PISMDataset(
        data_dir="../data/speeds_v2",
        samples_file="../data/samples/velocity_calibration_samples_100.csv",
        target_file="../data/observed_speeds/greenland_obs_g1800m.nc",
        thinning_factor=5,
    )

    X = dataset.X
    F = dataset.Y
    area = dataset.normed_area
    n_grid_points = dataset.n_grid_points
    n_parameters = dataset.n_parameters
    n_samples = dataset.n_samples

    torch.manual_seed(0)
    pl.seed_everything(0)
    np.random.seed(model_index)

    if not os.path.isdir(emulator_dir):
        os.makedirs(emulator_dir)
        os.makedirs(os.path.join(emulator_dir, "emulator"))

    print(f"Training model {model_index}")
    omegas = torch.Tensor(dirichlet.rvs(np.ones(n_samples))).T
    omegas = omegas.type_as(X)
    omegas_0 = torch.ones_like(omegas) / len(omegas)

    if train_size == 1.0:
        data_loader = PISMDataModule(X, F, omegas, omegas_0, num_workers=num_workers)
    else:
        data_loader = PISMDataModule(X, F, omegas, omegas_0, train_size=train_size, num_workers=num_workers)

    data_loader.prepare_data()
    data_loader.setup(stage="fit")
    n_eigenglaciers = data_loader.n_eigenglaciers
    V_hat = data_loader.V_hat
    F_mean = data_loader.F_mean
    F_train = data_loader.F_bar

The following simulations are missing:
   [337, 595, 539, 542]
  ... adjusting priors
  Loading data sets...


996it [00:29, 34.10it/s]
  response = np.log10(response)
Global seed set to 0


Training model 0
Generating eigenglaciers
...using the first 99 eigen values


In [5]:
    if train_size == 1.0:
        train_loader = data_loader.train_all_loader
        val_loader = data_loader.val_all_loader
    else:
        train_loader = data_loader.train_loader
        val_loader = data_loader.val_loader

In [33]:
    trainer_e = pl.Trainer(
        deterministic=True,
        num_sanity_val_steps=0,
        max_epochs=max_epochs,
    )

    e = NNEmulator(
        n_parameters,
        n_eigenglaciers,
        V_hat,
        F_mean,
        area,
        hparams,
    )

    trainer_e.fit(e, train_loader, val_loader)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

   | Name      | Type          | Params
---------------------------------------------
0  | l_1       | Linear        | 1.2 K 
1  | norm_1    | LayerNorm     | 256   
2  | dropout_1 | Dropout       | 0     
3  | l_2       | Linear        | 16.5 K
4  | norm_2    | LayerNorm     | 256   
5  | dropout_2 | Dropout       | 0     
6  | l_3       | Linear        | 16.5 K
7  | norm_3    | LayerNorm     | 256   
8  | dropout_3 | Dropout       | 0     
9  | l_4       | Linear        | 16.5 K
10 | norm_4    | LayerNorm     | 256   
11 | dropout_4 | Dropout       | 0     
12 | l_5       | Linear        | 12.9 K
13 | train_ae  | AbsoluteError | 0     
14 | test_ae   | AbsoluteError | 0     
---------------------------------------------
64.6 K    Trainable params
2.1 M     Non-trainable params
2.1 M     Total params
8.506     Total estimated model params size (MB)


Adjusting learning rate of group 0 to 1.0000e-02.
                                                                      

Global seed set to 0


Epoch 0:  44%|████▍     | 7/16 [00:00<00:00, 17.21it/s, loss=0.0401, v_num=10]Adjusting learning rate of group 0 to 9.9750e-03.
Epoch 0:  56%|█████▋    | 9/16 [00:00<00:00, 17.69it/s, loss=0.0355, v_num=10]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/8 [00:00<?, ?it/s][A
Validating:  12%|█▎        | 1/8 [00:00<00:02,  3.15it/s][A
Epoch 0:  81%|████████▏ | 13/16 [00:01<00:00, 13.74it/s, loss=0.0355, v_num=10]
Epoch 0: 100%|██████████| 16/16 [00:01<00:00, 13.14it/s, loss=0.0355, v_num=10, train_loss=0.0389, test_loss=0.0389]
Epoch 1:  44%|████▍     | 7/16 [00:00<00:00, 18.11it/s, loss=0.0227, v_num=10, train_loss=0.0389, test_loss=0.0389]   Adjusting learning rate of group 0 to 9.9501e-03.
Epoch 1:  50%|█████     | 8/16 [00:00<00:00, 15.26it/s, loss=0.0214, v_num=10, train_loss=0.0389, test_loss=0.0389]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/8 [00:00<?, ?it/s][A
Epoch 1:  75%|███████▌  | 12/16 [00:01<00:00, 12.62it/s, loss=0.0214, v_num=1

Exception ignored in: <function _releaseLock at 0x7f6b4ca24700>
Traceback (most recent call last):
  File "/home/andy/miniconda3/envs/pytorch/lib/python3.8/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


In [6]:
    trainer_de = pl.Trainer(
        deterministic=True,
        num_sanity_val_steps=1,
        max_epochs=max_epochs,
    )
    de = DNNEmulator(
        n_parameters,
        n_eigenglaciers,
        V_hat,
        F_mean,
        area,
        hparams,
    )

    trainer_de.fit(de, train_loader, val_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name          | Type          | Params
------------------------------------------------
0 | l_first       | Linear        | 1.2 K 
1 | norm_first    | LayerNorm     | 256   
2 | dropout_first | Dropout       | 0     
3 | dnn           | ModuleList    | 50.3 K
4 | l_last        | Linear        | 12.8 K
5 | train_ae      | AbsoluteError | 0     
6 | test_ae       | AbsoluteError | 0     
------------------------------------------------
64.5 K    Trainable params
2.0 M     Non-trainable params
2.1 M     Total params
8.424     Total estimated model params size (MB)


Adjusting learning rate of group 0 to 1.0000e-02.


Validation sanity check: 0it [00:00, ?it/s]

RuntimeError: Predictions and targets are expected to have the same shape