In [1]:
import torch
from torch.nn import functional as F
from torch import nn
from pytorch_lightning.core.lightning import LightningModule
import pytorch_lightning as pl

import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from src.models import *
from src.dataloader import *
from src.utils import *

import xarray as xr
import numpy as np
import matplotlib.pyplot as plt

import pickle
import json

## Train and Val

In [2]:
data_dir = '/home/jupyter/data/'

In [3]:
args = {'tigge_dir':data_dir + f'tigge/32km/',
    'tigge_vars':['total_precipitation_ens10','total_column_water_ens10', '2m_temperature', 'convective_available_potential_energy', 'convective_inhibition'],
    'mrms_dir':data_dir + f'mrms/4km/RadarOnly_QPE_06H/',
    'rq_fn':data_dir + f'mrms/4km/RadarQuality.nc',
#     'const_fn':data_dir + 'tigge/32km/constants.nc',
#     'const_vars':['orog', 'lsm'],
    'data_period':('2018-01', '2019-12'),
    'val_days':1,
    'split':'train',
#     'pure_sr_ratio':8, 
    'tp_log':0.01, 
    'scale':True,
    'ensemble_mode':'stack_by_variable',
    'pad_tigge':15,
    'pad_tigge_channel': True, 
    'idx_stride': 2
    }

save_dir = '/home/jupyter/data/data_patches/'

In [4]:
ds_train = TiggeMRMSDataset(**args)

setting nans in convective_inhibition to 0


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


Loading data


In [5]:
print(len(ds_train))

6748004


In [None]:
save_images(ds_train, save_dir, 'train')

In [6]:
#pickle.dump(ds_train, open(data_dir + f"saved_datasets/traindataset_{dataset_name}.pkl", "wb"))
#pickle.dump(args, open(data_dir + f"saved_datasets/traindataset_{dataset_name}_args.pkl", "wb"))

In [7]:
val_args = args
val_args['maxs'] = ds_train.maxs
val_args['mins'] = ds_train.mins
val_args['split'] = 'valid'

ds_valid = TiggeMRMSDataset(**val_args)

setting nans in convective_inhibition to 0


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


Loading data


In [None]:
len(ds_valid)

In [None]:
save_images(ds_valid, save_dir, 'valid')

In [8]:
#pickle.dump(ds_valid, open(data_dir + f"saved_datasets/validdataset_{dataset_name}.pkl", "wb"))
#pickle.dump(val_args, open(data_dir + f"saved_datasets/validdataset_{dataset_name}_args.pkl", "wb"))

In [9]:
test_args = args
test_args['maxs'] = ds_train.maxs
test_args['mins'] = ds_train.mins
test_args.pop('val_days')
test_args.pop('split')
test_args['first_days'] = 5
test_args['data_period'] = ('2020-01', '2020-12')
test_dataset_name = dataset_name + f"_first_days_{test_args['first_days']}"

ds_test = TiggeMRMSDataset(**test_args)

setting nans in convective_inhibition to 0


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


Loading data


In [None]:
len(ds_test)

In [None]:
save_images(ds_test, save_dir, 'test')

In [10]:
#pickle.dump(ds_test, open(data_dir + f"saved_datasets/testdataset_{test_dataset_name}.pkl", "wb"))
#pickle.dump(test_args, open(data_dir + f"saved_datasets/testdataset_{test_dataset_name}_args.pkl", "wb"))