In [None]:
import logging
logging.captureWarnings(False)

import deepsensor.torch
from deepsensor.data import DataProcessor, TaskLoader
from deepsensor.model import ConvNP
from deepsensor.train import Trainer

import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs

import pandas as pd
import numpy as np
from tqdm import tqdm
from tqdm import notebook
from deepsensor.train import set_gpu_default_device


In [None]:
def compute_val_rmse(model, val_tasks):
    errors = []
    target_var_ID = task_loader.target_var_IDs[0][0]  # assume 1st target set and 1D
    for task in np.random.choice(val_tasks, 50, replace = False):
#         print("im in for loop")
        mean = data_processor.map_array(model.mean(task), target_var_ID, unnorm=True)
#         print("mean calc")
        true = data_processor.map_array(task["Y_t"][0], target_var_ID, unnorm=True)
#         print("true calc")
        errors.extend(np.abs(mean - true))
    return np.sqrt(np.mean(np.concatenate(errors) ** 2))
def gen_tasks(dates, progress=True):
    tasks = []
    for date in notebook.tqdm(dates, disable=not progress):
#         N_c = np.random.randint(0, 500)
        task = task_loader(date, context_sampling=["all"], target_sampling="all")
        tasks.append(task)
    return tasks


In [None]:
dat15 ='/nfs/turbo/seas-dannes/SST-sensor-placement-input/GLSEA3_NETCDF/GLSEA3_2015.nc'
dat14 ='/nfs/turbo/seas-dannes/SST-sensor-placement-input/GLSEA3_NETCDF/GLSEA3_2014.nc'
dat16 ='/nfs/turbo/seas-dannes/SST-sensor-placement-input/GLSEA3_NETCDF/GLSEA3_2016.nc'

dat = xr.open_mfdataset([dat14, dat15, dat16],
                                concat_dim='time',
                                combine='nested',
                                chunks={'lat': 'auto', 'lon': 'auto'})

In [None]:
mdat = dat.where(np.isnan(dat.sst) == False, -0.009)
climatology = mdat.groupby('time.dayofyear').mean('time')
anomalies = mdat.groupby('time.dayofyear') - climatology
data_processor = DataProcessor(x1_name="lat", x2_name="lon")
anom_ds = data_processor(anomalies)
task_loader = TaskLoader(
    context = anom_ds,
    target = anom_ds, 
)

In [None]:
train_tasks = []
for date in pd.date_range('2015-01-02T12:00:00.000000000', '2015-12-31T12:00:00.000000000')[::5]:
#     N_context = np.random.randint(0, 100)
    task = task_loader(date, context_sampling="all", target_sampling="all")
    train_tasks.append(task)
val_tasks = []
for date in pd.date_range('2016-01-01T12:00:00.000000000', '2016-12-31T12:00:00.000000000'):
    N_context = np.random.randint(0, 100)
    task = task_loader(date, context_sampling="all", target_sampling="all")
    val_tasks.append(task)
set_gpu_default_device()
model = ConvNP(data_processor, task_loader)

In [None]:
losses = []
val_rmses = []
train_range = pd.date_range('2015-01-02T12:00:00.000000000', '2015-12-31T12:00:00.000000000')
val_range = pd.date_range('2016-01-01T12:00:00.000000000', '2016-12-31T12:00:00.000000000')
val_rmse_best = np.inf
trainer = Trainer(model, lr=5e-5)
for epoch in range(10):
#     print("step1")
    train_tasks = gen_tasks(pd.date_range(train_range[0], train_range[1])[::5], progress=False)

    batch_losses = trainer(train_tasks)
#     print("step3")
    losses.append(np.mean(batch_losses))
    val_rmses.append(compute_val_rmse(model, val_tasks))
    if val_rmses[-1] < val_rmse_best:
        val_rmse_best = val_rmses[-1]
        

Below is the cell that generated "OutOfMemoryError: CUDA out of memory. Tried to allocate 5.25 GiB. GPU" with 1gpu, and 15.75GiB wth 4 sgpu


In [None]:
test_task = task_loader("2016-07-19T12:00:00.000000000", ["all"], seed_override=42)
pred = model.predict(test_task, X_t=anomalies, n_samples=3, ar_sample=True, ar_subsample_factor=10)