In [1]:
import matplotlib.pyplot as plt
import xarray as xr

# Step 1. Read variables

In [62]:
def read_ATM():
    """
    6-hourly ATM variables 
    
    no corrections needed on these ones
    """
    ds = xr.open_mfdataset('hus/*.nc', combine='by_coords')
    hus = ds.hus
    
    ds = xr.open_mfdataset('ta/*.nc', combine='by_coords')
    ta = ds.ta
    
    assert(len(ta) == 125561)
    assert(len(hus) == 125561)
    
    return hus,ta

In [55]:
hus, ta = read_ATM()

  use_cftime=use_cftime,
  use_cftime=use_cftime,


In [56]:
lat, lon = hus.lat, hus.lon

In [65]:
#ds_tos = xr.open_mfdataset('tos/fv1_grid/*.6h.nc', combine='by_coords')

def read_tos(lat, lon):
    """
    Read SST from ocean model POP
    
    TOS has been interpolated to the ATM grid, and has been interpolated in time as well (daily to 6-hourly). 
    Some coordinates have been messed up as a result, need to align with the ATM variables before merging. 
    
    Also: because it is POP output, the first of January at the beginning of the simulation is missing. 
    This will be fixed (interpolated) later when we process that particular month. Doing it here would require
    a large overhead (xr.concat is very slow on big datasets...)
    """
    ds = xr.open_dataset('tos/fv1_grid/tos_Oday_CESM2_ssp126_r11i1p1f1_gn_20150102-21010101.6h.nc')
    tos = ds.tos
    
    del tos['lat']
    del tos['lon']
    
    tos = tos.rename({'y':'lat', 'x':'lon'})
    
    tos['lat'] = lat
    tos['lon'] = lon
    
    assert(len(tos) == 125561 - 4) # POP output, first day missing
    return tos

In [66]:
tos = read_tos(lat, lon)

## 1 January missing in POP variables, correct this

In [8]:
tos2 = tos[[0,1,2,3]].copy()

In [9]:
tos2.time

In [10]:
x = [y.replace(day=1) for y in tos2.time.data]
x_da = xr.DataArray(x, coords=[x,], dims='time')
#x_da

In [11]:
tos2['time'] = x_da

## Dask

In [None]:
import dask.array as da

In [None]:

shape = (1000, 4000)


chunk_shape = (1000, 1000)
ones = da.ones(shape, chunks=chunk_shape)

ones = da.ones(shape)

In [None]:
sum_of_ones = ones.sum()
sum_of_ones.visualize()

# Step 2 - convert to monthly

In [16]:
pwd

'/glade/work/lvank/for_Melchior/b.e21.BSSP126cmip6.f09_g17.CMIP6-SSP1-2.6.102'

In [75]:
def make_month(mon_str):
       
    var_list = [x.sel(time=slice(mon_str,mon_str)) for x in [hus,tos,ta]]
    
    ds_out = xr.merge(var_list)
    return ds_out

In [76]:
mon_str = '2015-02'

In [77]:
ds = make_month(mon_str)

In [81]:
ds.attrs

{}

In [84]:
import datetime

In [87]:
import os
print(__file__)

NameError: name '__file__' is not defined

In [82]:
ds.attrs['description'] = "6-hourly output for forcing RACMO2 RCM"
ds.attrs['author'] = "Leo van Kampenhout"
ds.attrs['creation date'] = datetime.datetime.now()
ds.attrs['source script'] = ""

In [83]:
ds

In [79]:
ds.to_netcdf(f'monthly/{mon_str}.nc')