In [3]:
%config Completer.use_jedi = False

In [2]:
import os
import glob
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf

In [7]:
dx = xr.open_dataset('data/near_surface_air_temperature/historical/MPI-ESM1-2-HR/mpi-esm1-2-hr_r1i1p1f1_w5e5_historical_tas_global_daily_1850_1850.nc')

In [12]:
dx.tas.loc["1850-01-01":"1850-01-02"]  # demonstrate slicing using timestamp

In [158]:
# TODO: solve differenct calendar mismatch
# https://climate-cms.org/2019/11/12/Calendars-and-monthly-data.html

batch_size = 32
time_len = 12  # how long each training sample should be, in months or days
models = ['GFDL-ESM4','IPSL-CM6A-LR','MPI-ESM1-2-HR']  # models for temp, prec, LAI

def gen_data_card():  
    model = np.random.choice(np.array(models))  # which of 3 models to choose from
    
    # MONTHLY PICK
    start_year = np.random.randint(1850,2014+1)
    start_month = np.random.randint(1,12+1)
    end_year = start_year + ((start_month+time_len-1) // 12)
    end_month = (start_month+time_len) % 12
    if end_month == 0:
        end_month = 12
    print(model,start_year, start_month, end_year, end_month)
    
    temp = xr.open_mfdataset('data/near_surface_air_temperature/historical/{}/*.nc'.format(model))
    temp = temp.tas.loc["{}-{}-16".format(start_year, start_month):"{}-{}-16".format(end_year, end_month)]  
    
    prec = xr.open_mfdataset('data/precipitation_flux/historical/{}/*.nc'.format(model))
    prec = prec.pr.loc["{}-{}-16".format(start_year, start_month):"{}-{}-16".format(end_year, end_month)]  
    
    lai = xr.open_mfdataset('data/leaf_area_index/historical/{}/*.nc'.format(model))
    try:  # TODO: feburary is 15th, try-catch still doesn't work sometimes?
        lai = lai.lai.loc["{}-{}-16".format(start_year, start_month):"{}-{}-16".format(end_year, end_month)]
        print("16,16")
    except ValueError:
        try:
            lai = lai.lai.loc["{}-{}-16".format(start_year, start_month):"{}-{}-15".format(end_year, end_month)]
            print('16,15')
        except ValueError:
            try:
                lai = lai.lai.loc["{}-{}-15".format(start_year, start_month):"{}-{}-16".format(end_year, end_month)]
                print('15,16')
            except ValueError:
                lai = lai.lai.loc["{}-{}-15".format(start_year, start_month):"{}-{}-15".format(end_year, end_month)]
                print('15,15')
                
    # TODO: currently select randomly, but averaging or using only one is also an option
    npp_files = glob.glob('data/net_primary_production_on_land/historical/**', recursive=True) 
    print(npp_files)
    npp = xr.open_mfdataset(np.random.choice(np.array(npp_files)))
    try:  # TODO: feburary is 15th, try-catch still doesn't work sometimes?
        npp = npp.npp.loc["{}-{}-16".format(start_year, start_month):"{}-{}-16".format(end_year, end_month)]
        print("16,16")
    except ValueError:
        try:
            npp = npp.npp.loc["{}-{}-16".format(start_year, start_month):"{}-{}-15".format(end_year, end_month)]
            print('16,15')
        except ValueError:
            try:
                npp = npp.npp.loc["{}-{}-15".format(start_year, start_month):"{}-{}-16".format(end_year, end_month)]
                print('15,16')
            except ValueError:
                npp = npp.npp.loc["{}-{}-15".format(start_year, start_month):"{}-{}-15".format(end_year, end_month)]
                print('15,15')
                
    inputs = xr.concat((temp,prec), dim='lat')  # two maps next to each other
    outputs = xr.concat((lai,npp), dim='lat')
    
    yield(inputs, outputs)
    
    
    
    # DAILY PICK
#     start_time = np.random.choice(np.arange(0, (2014-1850+1)*365-time_slice, ))
#     start_time = np.random.randint(0, (2014-1850+1)*365-time_slice)  # choose a random slice start point
# #     file_i_start = (start_time-365) // (365*10)  # figure out which file should be opened
# #     file_i_end = (start_time+time_len-365) // (365*10)  # eventually, two or multiple should be opened
#     temp = xr.open_mfdataset('data/near_surface_air_temperature/historical/{}/*.nc'.format(model))  # load all in one xarray
#     temp = temp.tas[start_time : start_time+time_len]  # select the right slice
#     prec = xr.open_mfdataset('data/precipitation_flux/historical/{}/*.nc'.format(model))
#     prec = prec.pr[start_time : start_time+time_len]
#     lai = xr.open_mfdataset('data/leaf_area_index/historical/{}/*.nc'.format(model))
#     lai = lai.lai[start_time//31 : ]  # TODO: daily slice and monthly slice don't match (17.03-25.04 /= 01.03-01.05)
    
    # LOAD SEPARATE DATA
#     files = sorted(glob.glob('./data/near_surface_air_temperature/historical/{}/*'.format(model)))  # all files in dir
#     years_i = np.random.randint(0, len(files))  # which years data should be used?
#     temp = xr.open_dataset(files[years_i]).tas  # choose a random dataset
#     day = np.random.randint(0, temp.shape[0]-time_len)  # choose a day where slice start # TODO: doesn't work with 1850
#     temp = temp[day:day+time_len]  # select a slice
    
#     # precipitation
#     files = glob.glob('./data/precipitation_flux/historical/{}/*'.format(model))  # all files in dir
#     temp = xr.open_dataset(np.random.choice(files)).tas  # choose a random dataset
#     day = np.random.randint(0, temp.shape[0]-time_slice)  # choose a day where slice start # TODO: doesn't work with 1850
#     temp = temp[day:day+time_slice]  # select a slice

# #     lai = 
# #     npp = 
#     inputs = xr.concat((temp,prec), dim='lat')

In [160]:
data_gen = gen_data_card()
din, dout = next(data_gen)

GFDL-ESM4 1880 4 1881 4


ValueError: no ISO-8601 or cftime-string-like match for string: 1880-4-15

In [118]:
lai = xr.open_mfdataset('data/leaf_area_index/historical/GFDL-ESM4/lai_Lmon_GFDL-ESM4_historical_r1i1p1f1_185001-201412.nc')

In [127]:
lai.time.loc['1949-08-16']  # das hier funzt? but in generator try-catch doesn't work somehow