In [1]:
import xarray as xr
import glob
from dask.diagnostics import ProgressBar
import numpy as np
import os

In [2]:
def reformat_time_dim(da):
    """
    Takes xarray dataarray and converts its time dim to an array of numpy
    
    Inputs:
    ----------------------------------------------------------------------
    da: Xarray dataarray(set maybe?) 
    
    Outputs:
    ----------------------------------------------------------------------
    da_reformat: Identical data to da, just different time dim format (numpy datetime46)
    """    
    # Extract the numerical values of year, month, day, hour, minute, & second
    years = da['time.year'].data
    months = da['time.month'].data
    days = da['time.day'].data
    hours = da['time.hour'].data
    minutes = da['time.minute'].data
    seconds = da['time.second'].data
    
    # Make list of tuples of (y, m, d, H, M, S)
    ymdHMS_list = []
    for i, _ in enumerate(da['time.year'].data):
        ymdHMS_list.append((years[i], months[i], days[i], hours[i], minutes[i], seconds[i]))

    # Convert tuples to datetime string and then make array of numpy datetimes
    dt_string_list = [f'{y:04d}-{m:02d}-{d:02d}T{H:02d}:{M:02d}:{S:02d}' 
                      for y, m, d, H, M, S in ymdHMS_list]
    dt_array = np.array(list(map(np.datetime64, dt_string_list)))
    da_reformatted = da.copy()
    da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
    
    return(da_reformatted)

In [3]:
korea_slice = dict(lat=slice(34, 38), lon=slice(125, 130))

In [4]:
# model_list =  ['ACCESS-ESM1-5', 'CanESM5', 'IPSL-CM6A-LR', 'MIROC6', 'MRI-ESM2-0']
model_list = ['ACCESS-CM2', 'BCC-CSM2-MR', 'NorESM2-LM']

In [21]:
# model_list = ['HadGEM3-GC31-LL', 'CNRM-CM6-1']

In [6]:
historical_files

['/home/disk/tc/pangulo/CMIP6/ACCESS-CM2/historical/compiled_tasmax_day_ACCESS-CM2_historical_r1i1p1f1_gn_18500101-20141231.nc',
 '/home/disk/tc/pangulo/CMIP6/ACCESS-CM2/historical/compiled_tasmax_day_ACCESS-CM2_historical_r2i1p1f1_gn_18500101-20141231.nc',
 '/home/disk/tc/pangulo/CMIP6/ACCESS-CM2/historical/compiled_tasmax_day_ACCESS-CM2_historical_r3i1p1f1_gn_18500101-20141231.nc']

In [8]:
for model_name in model_list:
    print(model_name)
    print('Historical')
    historical_files = glob.glob(f'/home/disk/tc/pangulo/CMIP6/{model_name}/historical/compiled_tasmax_day_{model_name}_historical_r1*.nc')
    historical_ds = xr.open_mfdataset(historical_files, chunks=dict(time=50)).sel(korea_slice).tasmax.squeeze().mean(('lat', 'lon'))
    historical_ds = historical_ds.convert_calendar('noleap', align_on='year', use_cftime=True)
    historical_ds = reformat_time_dim(historical_ds)
    print('SSP')
    ssp_files = glob.glob(f'/home/disk/tc/pangulo/CMIP6/{model_name}/ssp245/compiled_tasmax_day_{model_name}_ssp245_r1*.nc')
    ssp_ds = xr.open_mfdataset(ssp_files, chunks=dict(time=50)).sel(korea_slice).tasmax.squeeze().mean(('lat', 'lon'))
    ssp_ds = ssp_ds.convert_calendar('noleap', align_on='year', use_cftime=True)
    ssp_ds = reformat_time_dim(ssp_ds)
    with ProgressBar():
        print('Merging & Saving SSP')
        ssp_extended_ds = xr.concat((historical_ds, ssp_ds), dim='time').sortby('time')
        file_name = f'/home/disk/p/pangulo/CATER-Project/HeatWave_Statistics/Data/{model_name}_ssp_extended_koreaTmax.nc'
        if os.path.isfile(file_name):
            os.remove(file_name)
        ssp_extended_ds.to_netcdf(file_name, mode='w')
        
    print('Hist-Nat')
    histnat_files = glob.glob(f'/home/disk/tc/pangulo/CMIP6/{model_name}/hist-nat/compiled_tasmax_day_{model_name}_hist-nat_r1*.nc')
    histnat_ds = xr.open_mfdataset(histnat_files, chunks=dict(time=50)).sel(korea_slice).tasmax.squeeze().mean(('lat', 'lon'))
    histnat_ds = histnat_ds.convert_calendar('noleap', align_on='year', use_cftime=True)
    histnat_ds = reformat_time_dim(histnat_ds)
    with ProgressBar():
        print('Saving histnat')
        file_name = f'/home/disk/p/pangulo/CATER-Project/HeatWave_Statistics/Data/{model_name}_hist-nat_koreaTmax.nc'
        if os.path.isfile(file_name):
            os.remove(file_name)
        histnat_ds.to_netcdf(file_name, mode='w')
        
    print('Hist-GHG')
    histGHG_files = glob.glob(f'/home/disk/tc/pangulo/CMIP6/{model_name}/hist-GHG/compiled_tasmax_day_{model_name}_hist-GHG_r1*.nc')
    histGHG_ds = xr.open_mfdataset(histGHG_files, chunks=dict(time=50)).sel(korea_slice).tasmax.squeeze().mean(('lat', 'lon'))
    histGHG_ds = histGHG_ds.convert_calendar('noleap', align_on='year', use_cftime=True)
    histGHG_ds = reformat_time_dim(histGHG_ds)
    with ProgressBar():
        print('Saving histGHG')
        file_name = f'/home/disk/p/pangulo/CATER-Project/HeatWave_Statistics/Data/{model_name}_hist-GHG_koreaTmax.nc'
        if os.path.isfile(file_name):
            os.remove(file_name)
        histGHG_ds.to_netcdf(file_name, mode='w')

ACCESS-CM2
Historical


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


SSP


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


Merging & Saving SSP
[########################################] | 100% Completed |  3min  1.8s
Hist-Nat
Saving histnat
[                                        ] | 0% Completed |  0.0s

  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


[########################################] | 100% Completed | 50.5s
Hist-GHG
Saving histGHG
[                                        ] | 0% Completed |  0.0s

  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


[########################################] | 100% Completed | 50.7s
BCC-CSM2-MR
Historical


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


SSP


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


Merging & Saving SSP
[########################################] | 100% Completed |  7min 27.0s
Hist-Nat


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


Saving histnat
[########################################] | 100% Completed |  5min  9.9s
Hist-GHG


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


Saving histGHG
[########################################] | 100% Completed |  5min 11.7s
NorESM2-LM
Historical


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


SSP


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


Merging & Saving SSP
[########################################] | 100% Completed |  1min 59.9s
Hist-Nat


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


Saving histnat
[########################################] | 100% Completed |  1min 32.6s
Hist-GHG


  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))
  da_reformatted = da_reformatted.assign_coords(dict(time=dt_array))


Saving histGHG
[########################################] | 100% Completed |  1min 31.4s


In [17]:
##
## Code to check if compiled tasmax data exists
##
for model_name in model_list[1:]:
    historical_files = glob.glob(f'/home/disk/tc/pangulo/CMIP6/{model_name}/historical/compiled_tasmax_day_{model_name}_historical*.nc')
    if len(historical_files)==0:
        print(f'Missing Historical {model_name}')
    ssp_files = glob.glob(f'/home/disk/tc/pangulo/CMIP6/{model_name}/ssp245/compiled_tasmax_day_{model_name}_ssp245*.nc')
    if len(ssp_files)==0:
        print(f'Missing SSP {model_name}')
        
    histnat_files = glob.glob(f'/home/disk/tc/pangulo/CMIP6/{model_name}/hist-nat/compiled_tasmax_day_{model_name}_hist-nat*.nc')
    if len(histnat_files)==0:
        print(f'Missing histnat {model_name}')
        
    histGHG_files = glob.glob(f'/home/disk/tc/pangulo/CMIP6/{model_name}/hist-GHG/compiled_tasmax_day_{model_name}_hist-GHG*.nc')
    if len(histGHG_files)==0:
        print(f'Missing histghg {model_name}')


Missing SSP CNRM-CM6-1
Missing Historical GFDL-CM4
Missing SSP GFDL-CM4
Missing histnat GFDL-CM4
Missing histghg GFDL-CM4
Missing Historical HadGEM3-GC31-LL
Missing SSP HadGEM3-GC31-LL
Missing histnat HadGEM3-GC31-LL
Missing histghg HadGEM3-GC31-LL
