In [1]:
import os
import time
import numpy as np
import h5py
import pandas as pd
import conusfun as cfun
import downscale as down
from datetime import datetime
import xarray as xr
import dask.array as da

import cartopy
import cartopy.feature as cf
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.io.shapereader as shpreader

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.gridspec as gridspec

In [2]:
outdir  = os.path.join('..', 'data', 'tmpa_conus_data')
start_time = time.time()

In [3]:
# relevant quantities:
# TODO: get them from conusfun
thresh = cfun.pixelkwargs['thresh']
maxmiss = cfun.pixelkwargs['maxmiss']
TR = cfun.Tr
domain = 'conus'
outname = "evd_conus_map_{}.hdf5".format(domain)
# land_sea_mask = os.path.join(cfun.tmpa_dir, 'TRMM_TMPA_LandSeaMask.2.nc4')

In [4]:
# TODO: get them from conusfun
if domain == 'conus':
    nb = 50.0
    sb = 22.0
    eb = -60.0
    wb = -130.0
    tmpa_hdf_file = os.path.join(cfun.tmpa_dir, 'data_tmpa_daily.hdf5')
elif domain == 'world':
    nb = 50.0
    sb = -50.0
    eb = 180.0
    wb = -180.0
    tmpa_hdf_file = os.path.join(cfun.tmpa_dir, 'data_tmpa_world_daily.hdf5')
else:
    print('main_evd_maps ERROR:: must specify a valid domain!')

print(f'File: {tmpa_hdf_file}')

File: ../data/tmpa_conus_data/data_tmpa_daily.hdf5


In [5]:
# read dask array with all daily precipitation data
f = h5py.File(tmpa_hdf_file, "r")
print(list(f.keys()))

tmpalat = f['lat'][:]
tmpalon = f['lon'][:]
nlat = np.size(tmpalat)
nlon = np.size(tmpalon)
dates_int = f['dates'][:]
# hours_int = f['hours'][:]
dset = f['prcp']
# print('dataset shape = {}'.format(dset.shape))
x = da.from_array(dset, chunks=(6, 6, 300))
# UTC time
dates = [datetime.strptime(str(integd), '%Y%m%d') for integd in dates_int]
xconus = xr.DataArray(x,
        coords={'lon':tmpalon, 'lat':tmpalat, 'time':dates},
        dims=('lon', 'lat', 'time'))
xconus = xconus.where(xconus >= -0.001)
### end reading prcp dataset ###

['dates', 'lat', 'lon', 'prcp']


In [13]:
years_all  = df['YEAR']
years      = np.unique(years_all)
nyears     = np.size(years)
maxima     = np.zeros(nyears)

print(years, nyears)

[2018] 1


In [12]:
# for each grid cell do the following:
ntr = np.size(TR)
Fi = 1 - 1 / TR
qmev = np.zeros((nlon, nlat, ntr))
qgev = np.zeros((nlon, nlat, ntr))
for ii, clon in enumerate(tmpalon):
    print(ii)
    for jj, clat in enumerate(tmpalat):
        xpixel = xconus.sel(lat=clat, lon=clon).dropna(dim='time', how='any').load()
        ts = xpixel.values
        years = xpixel.time.dt.year.values
        df = pd.DataFrame({'PRCP': ts, 'YEAR': years})
        # df = down.remove_missing_years(df, maxmiss)[0]
        
        Ny, Cy, Wy = down.mev_fit(df, thresh=thresh)
        x0 = 9.0 * np.mean(Cy)
        qmev[ii, jj, :] = down.mev_quant(Fi, x0, Ny, Cy, Wy, thresh=thresh)[0]
        # fit GEV and compute quantiles
        XIemp, Fiemp, TRemp = down.tab_rain_max(df)
        
        csi, psi, mu = down.gev_fit_lmom(XIemp) # origianl: csi, psi, mu = down.gev_fit_lmom(XIemp)  # fit to annual maxima
        
        qgev[ii, jj, :] = down.gev_quant(Fi, csi, psi, mu)

0


ZeroDivisionError: division by zero

In [70]:
outdir_data  = os.path.join('..', 'output', 'pixel_stats')
outname2 = "tmpa_mev_global_quants.hdf5"

In [71]:
with h5py.File(os.path.join(outdir_data, outname2), "w") as f:
    f.create_dataset("qmev", data = qmev, dtype='f')
    f.create_dataset("qgev", data = qgev, dtype='f')
    # f.create_dataset("num_complete_years", data = num_complete_years, dtype='int32')
    f.create_dataset("Tr", data = TR,  dtype='int32')
    f.create_dataset("lat", data = tmpalat,  dtype='f')
    f.create_dataset("lon", data =tmpalon,  dtype='f')
    # f.create_dataset("nmax_miss", data =nmax_miss,  dtype='f')
    f.create_dataset("thresh", data =thresh,  dtype='f')
    # f.create_dataset("min_n_complete_years", data =min_n_years,  dtype='int32')