In [1]:

import geopandas as gp
import numpy as np
import itertools
import pickle
import pandas as pd

import rasterio
from rasterio.features import shapes

# create a vectorized raster of the conus

## create grid_xy array

In [2]:
grid_x, grid_y = np.meshgrid(np.arange(1121),np.arange(0,881))
grid_xy = grid_x*10**4 + grid_y + 10**8


## convert raster to shapefile with substituting the pixel values to grid_xy

In [4]:

mask = None
with rasterio.Env():
    with rasterio.open('/storage/home/hcoda1/6/njadidoleslam3/p-rbras6-0/projects/stochsm/data/nws_precip_1day_20210921_conus.tif') as src:
        image = src.read(1) # first band
        image = grid_xy.astype(np.int32)
        results = (
        {'properties': {'raster_val': grid_xy.ravel()[i]}, 'geometry': s}
        for i, (s, v) in enumerate(
            shapes(image,  transform=src.transform)))
geoms = list(results)
gpd_polygonized_raster  = gp.GeoDataFrame.from_features(geoms)
gpd_polygonized_raster.columns = ['geometry', 'grid_xy']
gpd_polygonized_raster['grid_xy'] = gpd_polygonized_raster['grid_xy'].astype(np.int64)
gpd_polygonized_raster.crs = "+proj=stere +lat_0=90 +lat_ts=60 +lon_0=-105 +x_0=0 +y_0=0 +a=6371200 +b=6371200 +units=m +no_defs"
gpd_polygonized_raster.to_file('/storage/coda1/p-rbras6/0/njadidoleslam3/projects/stochsm/data/gis_files/stage4_grid.shp', driver='ESRI Shapefile')


In [None]:
def get_mit(year):
    fn_in = '/storage/coda1/p-rbras6/0/njadidoleslam3/projects/stochsm/stage4_analysis/events/{year}.pickle'.format(year = year)
    with open(fn_in, 'rb') as handle:
        _mit = pickle.load(handle)
    mit_array = np.reshape(np.array(_mit[1:]), (int(len(_mit[1:])/4),4))
    mit_array = pd.DataFrame(mit_array)
    mit_valid = mit_array.loc[~mit_array[2].isna()]
    mit_valid.columns=['grid_xy','min_mit','cv', 'mean_it']
    mit_valid['grid_xy'] = mit_valid['grid_xy'].astype(np.int64)

    # gpd_polygonized_raster.set_index('grid_xy', inplace=True)
    data = gpd_polygonized_raster.join(mit_valid, lsuffix='l')
    data_valid = data.loc[~data['min_mit'].isna()]
    data_valid['mean_it'] = data_valid['mean_it'].astype(np.float16)
    return data_valid

In [None]:
# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %%
import pandas as pd
import itertools
import matplotlib.pyplot as plt
from matplotlib import cm
from datetime import datetime
import numpy as np
import os
import geopandas

fn_temp = 'mean_it_{year}.png'
for year in range(2000, 2021):
    try:

        data  = get_mit(year)
        cm1 = plt.cm.get_cmap('Reds',10)
        fig, ax = plt.subplots(figsize=(10, 8))

        # model_domain.plot(ax=ax, facecolor="none", edgecolor='black', zorder= 5, alpha= 1 )
        a = data.plot(ax=ax, column='mean_it', cmap=cm1, vmin = 0, vmax=200)
        ax.set_axis_off()
        ax.invert_yaxis()
        plt.title(str(year), fontsize=25)
        cax = fig.add_axes([0.1, 0, 0.8, 0.04])
        fig.colorbar( a.collections[0], cax=cax, orientation='horizontal')
        cax.set_xlabel('Mean Interarrival Time', fontsize=18)
        cax.tick_params(labelsize=16)
        # cax.locator_params(nbins=metric_list[met_name]['nbins'] + 1)
        cax.tick_params(labelsize=16)
        cax.locator_params(nbins=11)
        fn_out = os.path.join('/storage/home/hcoda1/6/njadidoleslam3/p-rbras6-0/projects/stochsm/figures/st4/mean_it',  fn_temp.format(year = year))
        fig.savefig(fn_out, dpi=300, bbox_inches='tight')
        plt.close(fig)

    except:
        None
    

# %%





# Test the result of new algorithm

In [30]:
import sys
import netCDF4
import pickle
import numpy as np
import itertools
import pandas as pd

#######################

def extract_events1(p_data):
    def storm_def():
        def check_mits(mit):
            idx = (bin_events  <=-mit)*(bin_events<0)
            dry_periods = bin_events[idx]*-1
            return np.array([mit, np.std(dry_periods[1:]) / np.mean(dry_periods[1:]),
                                np.int32(np.mean(dry_periods[1:]))])

        v = (p_data==0)*1
        n = v==0
        a = ~n
        c = np.cumsum(a)
        d = np.diff(np.append([0.], c[n]))
        v[n] = -d
        dry_vec = np.cumsum(v)
        bin_events = np.append([1], np.diff(dry_vec))

        
        mit_dry = []
        for mit in mit_list:
            _mit = check_mits(mit)
            mit_dry.append(_mit)
            if ((_mit[1]<1.0) & (len(mit_dry)>1)):
                break    # condition satisfied
        mit_dry = np.array(mit_dry)
        min_mit = calc_min_mit(mit_dry)
        
        idx = (bin_events  <= -min_mit) * (bin_events < 0)
        dry_periods = bin_events[idx]*-1
        date_idx = np.where(idx[1:])[0]
        event_durations = date_idx[1:] -dry_periods[1:] - date_idx[0:-1]
        event_indices = date_idx[0:-1] + event_durations
        n_events = len(event_durations)
        # storm_def_new(data, mit)

        p_totals = [0]*n_events
        for i in range(n_events):
            p_totals[i] = np.sum(p_data[date_idx[i]:event_indices[i]+1])

        
        # summary = pd.DataFrame({'dt':dt_vec[date_idx[1:]], 't0':event_durations, 'tb':dry_periods[0:-1], 'intnesity':p_totals/event_durations})
        summary = [dt_vec[date_idx[1:]], event_durations, dry_periods[0:-1], p_totals/event_durations]
        return min_mit, summary
        # return np.array([min_mit, np.std(dry_periods[1:]) / np.mean(dry_periods[1:]), np.int32(np.mean(dry_periods[1:]))])

    #######################


    def calc_min_mit(p_events):
        idx, = np.where((np.diff(np.sign(p_events[:,1]-1)) != 0)*1==1)
        if len(idx)==0:
            a = np.abs(p_events[:,1]-1)
            idx, = np.where(a == a.min())
            return p_events[:,0][idx][0]
        return (p_events[:,0][idx][0]+p_events[:,0][idx+1][0])/2.0

    #######################

    result = np.empty((1,1), dtype=object)   
    n_data = len(p_data)
    # event_metrics = get_events(p_data, set_min_mit)
    # %timeit np.sum(p_data < 0)
    pos_idx = np.sum((p_data>=0) & (p_data<65535))

    # if float(np.sum(neg_idx)/n_data)<0.1:
    #     print('No data 1', float(np.sum(p_data < 0)/n_data))
    # p_data[p_data<0.5] = 0
        
    if (float(np.sum(pos_idx)/n_data)==0):
        result = np.array([np.nan,np.nan,np.nan])
        print('No data 2')
    else:
        # try:
        event_metrics = storm_def()
        result = event_metrics
        # except:
            # result = np.array([np.nan,np.nan,np.nan])
            # print('Try-except')

    # with open(fn_out_pickle, 'wb') as handle:
    #     pickle.dump(result, handle, protocol= pickle.HIGHEST_PROTOCOL)
    return result

######### INPUTS ##########

year = 2009

######### Global Variables #########

## Formatting variables
# fn_fmt = '/home/navid/Downloads/{year}_stage4_hourly.nc'
fn_fmt = '/storage/coda1/p-rbras6/0/njadidoleslam3/precipitation/stage4/{year}_stage4_hourly.nc'
out_pickle_fmt = '/storage/coda1/p-rbras6/0/njadidoleslam3/projects/stochsm/stage4_analysis/events/{year}.pickle'
# out_pickle_fmt = '/storage/home/hcoda1/6/njadidoleslam3/p-rbras6-0/projects/stochsm/stage4_analysis/events/{year}.pickle'


start_dt = '{year}-1-1'.format(year = year)
end_dt = '{year}-1-1'.format(year = year+1)
fn_nc_in = fn_fmt.format(year=year)
fn_out_pickle = out_pickle_fmt.format(year=year)
# technical variables
set_min_mit = 3
mit_list = [x for x in range(set_min_mit,12*24, 6)]


dt_vec = pd.date_range(start=start_dt, end=end_dt, freq='60min',closed='left')
dt_vec = dt_vec.month

# read dataset row by row, i.e., 
f = netCDF4.Dataset(fn_nc_in)
# f.set_auto_maskandscale(False)
# f.set_auto_mask(True)

grid_y_list = list(range(881))
grid_x_list = list(range(1121))

# with open(fn_out_pickle, 'wb') as handle:
    
for grid_y in grid_y_list:
    data = np.array(f.variables['p01m'][:, grid_y , :].data)
    lol = []
    for grid_x in grid_x_list:
        gid = int('1{gid_x}{gid_y}'.format(gid_x = str(grid_x).zfill(4), gid_y = str(grid_y).zfill(4)))
        events = extract_events1(data[:,grid_x])
        lol.append((gid,year,) + events)
        # pickle.dump(lol, handle, protocol= pickle.HIGHEST_PROTOCOL)


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [2]:
'{year}-1-1'.format(year = str(2000+1))

'2001-1-1'

In [1]:
import pickle
data_test = []
with open('/storage/coda1/p-rbras6/0/njadidoleslam3/projects/stochsm/stage4_analysis/events/2019.pickle', 'rb') as handle:
# with open(filename, 'rb') as fr:
    try:
        while True:
            data_test.append(pickle.load(handle))
    except EOFError:
        pass

In [2]:
import numpy as np

In [3]:
summary = []
for i_1 in range(len(data_test)):
    for i_2 in range(len(data_test[i_1])):
        gid = int(data_test[i_1][i_2][0])
        dry_vec = data_test[i_1][i_2][3][2]
        summary.append((gid,np.mean(dry_vec)))

summary = np.array(summary)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [9]:
import pandas as pd
import itertools
import matplotlib.pyplot as plt
from matplotlib import cm
from datetime import datetime
import numpy as np
import os
import geopandas
mit_array = pd.DataFrame({'grid_xy':summary[:,0], 'mean_it':summary[:,1]})
# mit_valid = mit_array.loc[~mit_array[2].isna()]
# mit_valid.columns=['grid_xy','min_mit','cv', 'mean_it']
mit_array['grid_xy'] = mit_array['grid_xy'].astype(np.int64)

# # gpd_polygonized_raster.set_index('grid_xy', inplace=True)
data = gpd_polygonized_raster.join(mit_array, lsuffix='l')
# data_valid = data.loc[~data['min_mit'].isna()]
data['mean_it'] = data['mean_it'].astype(np.float16)

In [10]:

fn_temp = 'mean_it_{year}.png'
cm1 = plt.cm.get_cmap('Reds',10)
fig, ax = plt.subplots(figsize=(10, 8))
year = 2019
# model_domain.plot(ax=ax, facecolor="none", edgecolor='black', zorder= 5, alpha= 1 )
a = data.plot(ax=ax, column='mean_it', cmap=cm1, vmin = 0, vmax=200)
ax.set_axis_off()
ax.invert_yaxis()
plt.title(str(year), fontsize=25)
cax = fig.add_axes([0.1, 0, 0.8, 0.04])
fig.colorbar( a.collections[0], cax=cax, orientation='horizontal')
cax.set_xlabel('Mean Interarrival Time', fontsize=18)
cax.tick_params(labelsize=16)
# cax.locator_params(nbins=metric_list[met_name]['nbins'] + 1)
cax.tick_params(labelsize=16)
cax.locator_params(nbins=11)
fn_out = os.path.join('/storage/home/hcoda1/6/njadidoleslam3/p-rbras6-0/projects/stochsm/trash',  fn_temp.format(year = year))
fig.savefig(fn_out, dpi=300, bbox_inches='tight')
plt.close(fig)

In [1]:
import numpy as np
year_list = np.arange(2000,2021)
np.random.seed()
np.random.shuffle(year_list)


In [2]:
year_list

array([2020, 2013, 2008, 2017, 2014, 2019, 2015, 2016, 2001, 2003, 2010,
       2006, 2011, 2009, 2018, 2012, 2005, 2000, 2002, 2007, 2004])

In [2]:
year_list

array([2003, 2005, 2012, 2001, 2018, 2010, 2011, 2013, 2004, 2020, 2019,
       2009, 2006, 2014, 2000, 2017, 2007, 2008, 2015, 2002, 2016])