## Prepare CMIP6 Data for ML Models

In [80]:
# import packages

import numpy as np
import xarray as xr
import pandas as pd

# plotting
try:
    import matplotlib.pyplot as plt
    import cartopy.crs as ccrs
except:
    print('not installed')

In [81]:
# define path to CMIP6 resolution 5°
datadir = '/home/steidani/hackathon/2021_ai_climate/data/cmip6/gfdl-esm4/'
pathway = 'ssp370' # CHANGE HEREs

In [82]:
t2m = xr.open_mfdataset(f'{datadir}gfdl-esm4_r1i1p1f1_w5e5_' + pathway + '_tas_global_daily_*.nc', combine='by_coords').tas
precip = xr.open_mfdataset(f'{datadir}gfdl-esm4_r1i1p1f1_w5e5_' + pathway + '_pr_global_daily_*.nc', combine='by_coords').pr

In [83]:
# units:

# t2m in K
# precip in kg m-2 s-1 (flux)

# ERA5 has precip unit m (sum over one day): convert precip unit
#  1 kg/m2/s = 86400 mm/day = 86.4 m / day
precip = precip * 86.4

In [84]:
# make monthly means for each year
t2m_mean = t2m.resample(time='1M', keep_attrs=True).mean(keep_attrs=True).dropna('time', how='all')
t2m_std = t2m.resample(time='1M', keep_attrs=True).std(keep_attrs=True).dropna('time', how='all')
precip_mean = precip.resample(time='1M', keep_attrs=True).mean(keep_attrs=True).dropna('time', how='all')
precip_std = precip.resample(time='1M', keep_attrs=True).std(keep_attrs=True).dropna('time', how='all')

In [85]:
# function to get the lat lon boundary of the MODIS tiles (h,v)

def get_lat_lon(h,v):  
    def _get_sinu_grid_df():
        from pandas import read_csv
        f = 'sn_bound_10deg.txt'
        td = read_csv(f, skiprows=5, delim_whitespace=True)
        td = td.assign(ihiv='h' + td.ih.astype(str).str.zfill(2) +
                       'v' + td.iv.astype(str).str.zfill(2))
        return td

    td = _get_sinu_grid_df()
    o = td.loc[(td.ih == int(h)) & (td.iv == int(v))]
    latmin = o.lat_min.iloc[0]
    lonmin = o.lon_min.iloc[0]
    latmax = o.lat_max.iloc[0]
    lonmax = o.lon_max.iloc[0]
    
    return lonmin, latmin, lonmax, latmax    

In [86]:
# get unique tiles (combination of h and v)
fire_counts=pd.read_csv('/home/steidani/hackathon/2021_ai_climate/data/MCD64A1/fire_counts_from_mcd64a1_meta.csv')
hs_vs = fire_counts[["h",'v']].drop_duplicates().sort_values(['h', 'v'])

vs = []
hs = []
mean_t2m = []
std_t2m = []
mean_precip = []
std_precip = []
years = []
months = [] 
#years = t2m_mean.time['time.year'].values
#months = t2m_mean.time['time.month'].values
# loop through each unique tile and get temporal evolution of precip and temp:
for index, row in hs_vs.iterrows():
    h = row['h']
    v = row['v']
    
    # tile boundary in lat lon
    lonmin, latmin, lonmax, latmax = get_lat_lon(h,v)
    
    # mean and std value
    _mean_t2m = t2m_mean.sel(lat=slice(latmin,latmax), lon=slice(lonmin,lonmax)).mean(("lon", "lat"),skipna=True).values
    _std_t2m = t2m_std.sel(lat=slice(latmin,latmax), lon=slice(lonmin,lonmax)).mean(("lon", "lat")).values
    
    _mean_precip = precip_mean.sel(lat=slice(latmin,latmax), lon=slice(lonmin,lonmax)).mean(("lon", "lat"),skipna=True).values
    _std_precip = precip_std.sel(lat=slice(latmin,latmax), lon=slice(lonmin,lonmax)).mean(("lon", "lat")).values
                      
    # append to list
    vs.extend(np.repeat(v, len(_mean_t2m)))
    hs.extend(np.repeat(h, len(_mean_t2m)))
    years.extend(t2m_mean.time['time.year'].values)
    months.extend(t2m_mean.time['time.month'].values)
    mean_t2m.extend(_mean_t2m)
    std_t2m.extend(_std_t2m)
    mean_precip.extend(_mean_precip)
    std_precip.extend(_std_precip)

  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)


In [87]:
# quick check
print(len(months))
len(months) == len(years) == len(vs) == len(mean_t2m) == len(mean_precip)

64320


True

In [88]:
# data to dataframe
input_csv = pd.DataFrame(sorted(list(zip(vs,hs,years,months,mean_t2m,std_t2m, mean_precip, std_precip)), key=lambda x: (x[0], x[1])) , columns=['v','h','year','month','t2m_mean','t2m_std', 'precip_mean', 'precip_std'])
input_csv = input_csv.dropna() # unfortunately, some tiles are to small (at the edge)?
input_csv

Unnamed: 0,v,h,year,month,t2m_mean,t2m_std,precip_mean,precip_std
0,2,9,2051,1,259.454834,5.971936,0.001270,0.002139
1,2,9,2051,2,266.466919,5.932872,0.001880,0.002858
2,2,9,2051,3,264.654724,4.026393,0.002620,0.003891
3,2,9,2051,4,266.299377,4.163392,0.000721,0.001195
4,2,9,2051,5,273.952820,2.477456,0.000812,0.001568
...,...,...,...,...,...,...,...,...
64315,14,14,2100,8,275.371552,2.234290,0.003379,0.003949
64316,14,14,2100,9,276.401947,1.704601,0.003373,0.003530
64317,14,14,2100,10,278.187225,1.333710,0.002549,0.003164
64318,14,14,2100,11,279.342834,1.133068,0.003176,0.004388


In [89]:
# save as csv
input_csv.round(4).to_csv('/home/steidani/hackathon/2021_ai_climate/data/cmip6_' + pathway + '.csv',index=False)