In [None]:
import sys
import xarray as xr
import numpy as np
from datetime import timedelta
from glob import glob
import cftime
from future_wind_copy import combine_hemispheres 
from datetime import timedelta
from datetime import datetime
from datetime import 

In [4]:
def cut_europe_and_interpolate(ds_rsds,ds_rsdsdiff,ds_tas):
    

    ds_rsds_europe = combine_hemispheres(ds_rsds,minlat=20.,maxlat=75.,minlon=330.,maxlon=50.)
    ds_rsdsdiff_europe = combine_hemispheres(ds_rsdsdiff,minlat=20.,maxlat=75.,minlon=330.,maxlon=50.)    
    ds_tas_europe = combine_hemispheres(ds_tas,minlat=20.,maxlat=75.,minlon=330.,maxlon=50.) 

    # Interpolate tas to match rsds time
    tas_interp= ds_tas_europe['tas'].interp(time=ds_rsds['time'], method="linear")
    # Create a mask for the NaN values using .isnull()
    nan_mask = tas_interp.isnull()

    # For each time step, replace NaNs with the values from the next time step
    for t in range(len(tas_interp.time) - 1):  # Exclude the last time step
        # Use .isel() to ensure the correct alignment of coordinates
        tas_interp[t] = tas_interp[t].where(~nan_mask[t], tas_interp.isel(time=t + 1))
    
    ds_tas_europe['tas'] = tas_interp
    ds_tas_europe['time'] = ds_rsds_europe['time']

    return ds_rsds_europe, ds_rsdsdiff_europe, ds_tas_europe

In [6]:
def make_data_set(du,rsds,rsdsdiff,tas):
    """Creates xarray DataArray for netCDF write

    Args:
        du (dataset): sample dataset with attributes
        rsds (DataArray): wind speed 
        rsdsdiff (DataArray): wind direction
        tas (DataArray): surface air density

    Returns:
        xarray DataArray: DataArray for write
    """
    lat = xr.DataArray(
        data=rsds.lat.values.astype('float32'),
        dims=["lat"],
        coords=dict(
            lat=(["lat"], rsds.lat.values)
        ),
        attrs=dict(
        long_name="latitude",
        units="degrees_north",
        axis="Y"
        ),
    )
    lon = xr.DataArray(
        data=rsds.lon.values.astype('float32'),
        dims="lon",
        coords=dict(
            lon=(["lon"], rsds.lon.values)
        ),
        attrs=dict(
        long_name="longitude",
        units="degrees_east",
        axis="X"
        ),
    )
    
    ds = xr.Dataset(
        data_vars=dict(
            rsds = (
                ["time","lat","lon"],rsds.values.astype('float32'),
                dict(long_name = "rsds",
                units = "W/m2")),
            rsdsdiff = (
                ["time","lat","lon"],rsdsdiff.values.astype('float32'),
                dict(long_name = "rsdsdiff",
                units = "W/m2",
                vert_units = "W/m2")),
            tas = (
                ["time","lat","lon"],tas.values.astype('float32'),
                dict(long_name = "surface air density",
                units = "K",
                height = "surface")),
            ),
        coords=dict(
            lon=lon,
            lat=lat,
            time=rsds.time
            ),
        attrs=dict(
            data_source = "Processed data from CMIP6 runs",
            experiment = du.experiment_id,
            source = du.source_id,
            variant_label = du.variant_label,
            data_written = datetime.now().strftime("%d/%m/%Y %H:%M")
            )
    )   
    return ds

In [None]:


def make_path_to_file(root_url,model,experiment,variant,table,var,grid,version,dates):

    root_file = "/".join((root_url,experiment,variant,table,var,grid,version))
    filename = "_".join((var,table,model,experiment,variant,grid,dates))+".nc"
    path_to_file = "/".join((root_file,filename))
    return(path_to_file)


def main():
    model = "CanESM5"
    root_url = "/groups/FutureWind/SFCRAD/CanESM5/historical/r1i1p2f1/"
    #"http://crd-esgf-drc.ec.gc.ca/thredds/dodsC/esgI_dataroot/AR6/CMIP6/ScenarioMIP/CCCma/CanESM5-1"
    # ssp585/r1i1p2f1/6hrLev/ua/gn/v20190429/ua_6hrLev_CanESM5-1_ssp585_r1i1p2f1_gn_205101010000-205112311800.nc"
    
    grid = "gn"
    version = "v20190429"
    
    calendar = 'noLeap'
    experiment = sys.argv[1]
    if (experiment == "historical"):
        year = 1980; last_year = 2014
    else:
        # year = 2015; last_year = 2050
        year = 2050; last_year = 2070
    variant = sys.argv[2]  
    print("Retrieve data for",\
        "\n model:  ",model,"\n experiment:",experiment,\
        "\n variant:",variant)

    # What filenames already exist in the directory
    filenames = "rsds_rsdsdiff_tas_????.nc"
    old_files = sorted(glob(filenames))

    if not old_files:    # This is necessary for the scenario files that start at 00Z
        print("No previous files")
        month = 1
        date = cftime.datetime(year,1,1,6,calendar=calendar) # Files start at 06 not 00
    else:
        ff = xr.open_dataset(old_files[-1],decode_times=True,use_cftime=True)
        date = ff.time[-1] + timedelta(hours=6)
        print("Next date:",date.values)
        year = date.dt.year.values
        month = date.dt.month.values
        date = datetime_to_cftime(date,calendar=calendar)

    print("year",year)
    last_date = cftime.datetime(last_year+1,1,1,0,calendar=calendar)
    print(date,last_date)

    while (year <= last_year):

        # What is the date string in the files, 1 year each
        start_date = cftime.datetime(year,1,1,0,calendar=calendar)
        end_date = cftime.datetime(year,12,31,18,calendar=calendar)
        dates = "-".join((start_date.strftime("%Y%m%d%H%M"),end_date.strftime("%Y%m%d%H%M")))
        print("Yearly file dates",dates)

        # Find the file where next_day is found 
        var = "rsds"; table = "3hr"
        path_to_file = make_path_to_file(root_url,model,experiment,variant,table,var,grid,version,dates)
        print("file open:",path_to_file)
        ds_rsds = xr.open_dataset(path_to_file,decode_times=True,use_cftime=True)
        
        var = "rsdsdiff"; table = "3hr"
        path_to_file = make_path_to_file(root_url,model,experiment,variant,table,var,grid,version,dates)
        print("file open:",path_to_file)
        ds_rsdsdiff = xr.open_dataset(path_to_file,decode_times=True,use_cftime=True)

        var = "tas"; table = "3hr"
        path_to_file = make_path_to_file(root_url,model,experiment,variant,table,var,grid,version,dates)
        print("file open:",path_to_file)
        ds_tas = xr.open_dataset(path_to_file,decode_times=True,use_cftime=True)



        for i in range(12):
            date = cftime.datetime(year,month,1,0,calendar=calendar)
            year = year + month // 12
            month = month % 12 + 1
            print(year,month)
            date_end = cftime.datetime(year,month,1,0,calendar=calendar) - timedelta(hours=6)
            print(date,date_end)

            ws,wd,rho,filename = read_and_interp(
                dt,dq,du,dv,slice(date,date_end))

            ds = fw.make_data_set(du,ws,wd,rho)
            ds.to_netcdf(filename,mode="w",engine="netcdf4",
                        unlimited_dims='time')
            print(filename," written to disk")
                
        # date = cftime.datetime(year,1,1,6,calendar=calendar)
        # print("Next date:",date.strftime("%Y-%m-%d_%H"),"year:",year)

if __name__ == "__main__":
    main()