# transform the target variables from CSV to netcdf for use with the GCM outputs (using `xr.align``)

In [83]:
%matplotlib inline

### os 
import os 
import sys

### datetimes 
from datetime import datetime, timedelta

### scipy 
import numpy as np 
import pandas as pd
import xarray as xr

### plotting 
from matplotlib import pyplot as plt


In [84]:
import pathlib

HOME = pathlib.Path.home()
CWD = pathlib.Path.cwd() 

In [85]:
csv_paths = HOME.joinpath('research/Smart_Ideas/outputs/targets/NZ_regions/NZ_6_regions')

In [86]:
var_name = 'RAIN'

In [87]:
num_quantiles = 3

In [88]:
list_regions = ['NNI','WNI','ENI','NSI','WSI','WSI']

In [89]:
region_name = list_regions[0]

In [90]:
lfiles = list(csv_paths.joinpath(f"{var_name}/{region_name}").glob(f"TS_NZ_region_*_{num_quantiles}_quantiles_*_regionmask.csv")) 

In [91]:
data = pd.read_csv(lfiles[0], index_col=0, parse_dates=True)

In [92]:
data = data.to_xarray()

In [93]:
# data['anomalies'].plot()

### now read some GCM 

In [94]:
gcm_path = pathlib.Path('/media/nicolasf/END19101/data/GCMs/processed/CDS/ECMWF/SST') 

In [95]:
lfiles = list(gcm_path.glob("CDS_ECMWF_SST_*_seasonal_anomalies_1981_2010_clim.nc")) 

In [96]:
lfiles.sort()

In [97]:
dset = xr.open_dataset(lfiles[0])

In [98]:
dset_sst = xr.open_mfdataset(lfiles, concat_dim='init_time', combine='nested', parallel=True)

In [99]:
dset_sst = dset_sst.rename({'init_time':'time'})

In [100]:
dset_sst['time'] = dset_sst['time'].to_index() + pd.offsets.MonthEnd(0)

### aligns the datasets 

In [103]:
dset_sst, data = xr.align(dset_sst, data, join='inner', copy=False)

In [104]:
dset_sst

Unnamed: 0,Array,Chunk
Bytes,12.20 GB,26.06 MB
Shape,"(468, 4, 25, 181, 360)","(1, 4, 25, 181, 360)"
Count,1872 Tasks,468 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.20 GB 26.06 MB Shape (468, 4, 25, 181, 360) (1, 4, 25, 181, 360) Count 1872 Tasks 468 Chunks Type float32 numpy.ndarray",4  468  360  181  25,

Unnamed: 0,Array,Chunk
Bytes,12.20 GB,26.06 MB
Shape,"(468, 4, 25, 181, 360)","(1, 4, 25, 181, 360)"
Count,1872 Tasks,468 Chunks
Type,float32,numpy.ndarray


In [105]:
data

In [107]:
dset = xr.merge([dset_sst, data])

In [108]:
dset

Unnamed: 0,Array,Chunk
Bytes,12.20 GB,26.06 MB
Shape,"(468, 4, 25, 181, 360)","(1, 4, 25, 181, 360)"
Count,1872 Tasks,468 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.20 GB 26.06 MB Shape (468, 4, 25, 181, 360) (1, 4, 25, 181, 360) Count 1872 Tasks 468 Chunks Type float32 numpy.ndarray",4  468  360  181  25,

Unnamed: 0,Array,Chunk
Bytes,12.20 GB,26.06 MB
Shape,"(468, 4, 25, 181, 360)","(1, 4, 25, 181, 360)"
Count,1872 Tasks,468 Chunks
Type,float32,numpy.ndarray


In [109]:
dset = dset[['sst','cat_3']]

In [112]:
dset.sel(step=3).mean('member')

Unnamed: 0,Array,Chunk
Bytes,121.98 MB,260.64 kB
Shape,"(468, 181, 360)","(1, 181, 360)"
Count,3276 Tasks,468 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 121.98 MB 260.64 kB Shape (468, 181, 360) (1, 181, 360) Count 3276 Tasks 468 Chunks Type float32 numpy.ndarray",360  181  468,

Unnamed: 0,Array,Chunk
Bytes,121.98 MB,260.64 kB
Shape,"(468, 181, 360)","(1, 181, 360)"
Count,3276 Tasks,468 Chunks
Type,float32,numpy.ndarray


In [113]:
!pwd

/home/nicolasf/research/Smart_Ideas/code/targets/VCSN
