In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import cm
import dask

In [2]:
import object_detection as obj

In [3]:
def addcols(df,dfsm):
    df['time_zone'] = np.round(pd.to_numeric(df['longitude'],errors = 'coerce')/15)
    df['local_time'] = df['time'] - (-1*df['time_zone']).astype('timedelta64[h]')
    df['mean_ob'] = np.round(pd.to_numeric(df['mean'],errors='coerce'),2)
    df['area_ob'] = np.round(pd.to_numeric(df['area'],errors='coerce'),2)
    df['max_ob'] = np.round(pd.to_numeric(df['max_prec'],errors='coerce'),2)
    df['radius'] = np.sqrt(df.area_ob/np.pi)
    return df

In [4]:
#Open IMERGdata
imerg2015 = xr.open_dataset('../../IMERG/IMERG_hourly_2015.nc')
imerg2016 = xr.open_dataset('../../IMERG/IMERG_hourly_2016.nc')
imerg2017 = xr.open_dataset('../../IMERG/IMERG_hourly_2017.nc')
imerg2018 = xr.open_dataset('../../IMERG/IMERG_hourly_2018.nc')
imerg2019 = xr.open_dataset('../../IMERG/IMERG_hourly_2019.nc')
imerg2020 = xr.open_dataset('../../IMERG/IMERG_hourly_2020.nc')

In [5]:
# Disable a few warnings:
import warnings
warnings.filterwarnings('ignore', category=UserWarning, append=True)
warnings.filterwarnings('ignore', category=RuntimeWarning, append=True)
warnings.filterwarnings('ignore', category=FutureWarning, append=True)
warnings.filterwarnings('ignore',category=pd.io.pytables.PerformanceWarning)

In [6]:
#Open AB mask
onlyab = xr.open_dataset('../onlyab5km.nc'); 
maskAB = onlyab.interp(lat = imerg2015.lat.values,lon = imerg2015.lon.values)

In [7]:
grid_area = xr.open_dataset('../gridarea_dom03r10.nc')
grid_area.coords['lon'] = (grid_area.coords['lon'] + 180) % 360 - 180
grid_area = grid_area.sortby(grid_area.lon)

In [8]:
grid_area = grid_area.interp(lat = imerg2015.lat.values,lon = imerg2015.lon.values)

In [9]:
xi=-80; xf=-50; yi=-21; yf=6; #del(grid_np,lon,lat)
grid_np =  grid_area.sel(lon=slice(xi,xf),lat=slice(yi,yf)).cell_area.values / 1e6
lon = imerg2015.sel(lon=slice(xi,xf),lat=slice(yi,yf)).coords['lon'].values
lat = imerg2015.sel(lon=slice(xi,xf),lat=slice(yi,yf)).coords['lat'].values

In [10]:
##hourly precipitation
array_2015 = np.squeeze(imerg2015.precip.where((maskAB.Band1>0),-1).sel(
    lon=slice(-80,-50),lat=slice(-21,6)).values)
array_2016 = np.squeeze(imerg2016.precip.where((maskAB.Band1>0),-1).sel(
    lon=slice(-80,-50),lat=slice(-21,6)).values)
array_2017 = np.squeeze(imerg2017.precip.where((maskAB.Band1>0),-1).sel(
    lon=slice(-80,-50),lat=slice(-21,6)).values)
array_2018 = np.squeeze(imerg2018.precip.where((maskAB.Band1>0),-1).sel(
    lon=slice(-80,-50),lat=slice(-21,6)).values)
array_2019 = np.squeeze(imerg2019.precip.where((maskAB.Band1>0),-1).sel(
    lon=slice(-80,-50),lat=slice(-21,6)).values)
array_2020 = np.squeeze(imerg2020.precip.where((maskAB.Band1>0),-1).sel(
    lon=slice(-80,-50),lat=slice(-21,6)).values)

In [11]:
##Only with hourly precipitation:
labels_2015, df_2015 = obj.precip_objects(array_2015,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2015.time))
labels_2016, df_2016 = obj.precip_objects(array_2016,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2016.time));
labels_2017, df_2017 = obj.precip_objects(array_2017,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2017.time));
labels_2018, df_2018 = obj.precip_objects(array_2018,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2018.time))
labels_2019, df_2019 = obj.precip_objects(array_2019,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2019.time))
labels_2020, df_2020 = obj.precip_objects(array_2020,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2020.time))


In [18]:
df_2015.columns

Index(['time', 'idx', 'area', 'mean', 'tot_prec', 'max_prec', 'y', 'x',
       'longitude', 'latitude'],
      dtype='object')

In [19]:
### only hourly IMERG
df_2015_imerg = addcols(df_2015,""); 
df_2016_imerg = addcols(df_2016,""); 
df_2017_imerg = addcols(df_2017,""); 
df_2018_imerg = addcols(df_2018,""); 
df_2019_imerg = addcols(df_2019,""); 
df_2020_imerg = addcols(df_2020,"");

In [20]:
df_2015_imerg.to_pickle('pkl_files/df_2015_imergmax.pkl')
df_2016_imerg.to_pickle('pkl_files/df_2016_imergmax.pkl')
df_2017_imerg.to_pickle('pkl_files/df_2017_imergmax.pkl')
df_2018_imerg.to_pickle('pkl_files/df_2018_imergmax.pkl')
df_2019_imerg.to_pickle('pkl_files/df_2019_imergmax.pkl')
df_2020_imerg.to_pickle('pkl_files/df_2020_imergmax.pkl')

In [29]:
## *************** SESA region  ***************
xi=-64; xf=-50; yi=-26; yf=-15; del(grid_np,lon,lat)
grid_np =  grid_area.sel(lon=slice(xi,xf),lat=slice(yi,yf)).cell_area.values / 1e6
lon = imerg2015.sel(lon=slice(xi,xf),lat=slice(yi,yf)).coords['lon'].values
lat = imerg2015.sel(lon=slice(xi,xf),lat=slice(yi,yf)).coords['lat'].values


In [30]:
def select_array_values(datarray,xi=xi,xf=xf,yi=yi,yf=yf):
    narray = np.squeeze(datarray.sel(lon=slice(xi,xf),lat=slice(yi,yf)))
    # Check if all grid points for each time step are equal to 0
    all_zeros = (narray == 0).all(dim=('lat', 'lon'));
    # Replace the grid points with 1 where all values are 0
    narray = narray.where(~all_zeros, 1)
    return(narray.values)

In [31]:
array_2015_sesa = select_array_values(imerg2015.precip); 

In [32]:
array_2016_sesa = select_array_values(imerg2016.precip); 
array_2017_sesa = select_array_values(imerg2017.precip); 
array_2018_sesa = select_array_values(imerg2018.precip); 
array_2019_sesa = select_array_values(imerg2019.precip); 
array_2020_sesa = select_array_values(imerg2020.precip); 


In [29]:
arraySM_2015_sesa = np.squeeze(smap2015.sm.sel(lon=slice(xi,xf),lat=slice(yi,yf)).values).transpose(2, 0, 1)

In [30]:
print(np.shape(arraySM_2015_sesa))

(468, 110, 140)


In [34]:
## SESA Only with hourly precipitation:
labels_2015_sesa, df_2015_sesa = obj.precip_objects(array_2015_sesa,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2015.time))
labels_2016_sesa, df_2016_sesa = obj.precip_objects(array_2016_sesa,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2016.time));
labels_2017_sesa, df_2017_sesa = obj.precip_objects(array_2017_sesa,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2017.time));
labels_2018_sesa, df_2018_sesa = obj.precip_objects(array_2018_sesa,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2018.time))
labels_2019_sesa, df_2019_sesa = obj.precip_objects(array_2019_sesa,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2019.time))
labels_2020_sesa, df_2020_sesa = obj.precip_objects(array_2020_sesa,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(imerg2020.time))


In [36]:
### only hourly IMERG
df_2015_sesa_imerg = addcols(df_2015_sesa,""); 
df_2016_sesa_imerg = addcols(df_2016_sesa,""); 
df_2017_sesa_imerg = addcols(df_2017_sesa,""); 
df_2018_sesa_imerg = addcols(df_2018_sesa,""); 
df_2019_sesa_imerg = addcols(df_2019_sesa,""); 
df_2020_sesa_imerg = addcols(df_2020_sesa,"");

In [37]:
df_2015_sesa_imerg.to_pickle('pkl_files/df_2015_sesa_imergmax.pkl')
df_2016_sesa_imerg.to_pickle('pkl_files/df_2016_sesa_imergmax.pkl')
df_2017_sesa_imerg.to_pickle('pkl_files/df_2017_sesa_imergmax.pkl')
df_2018_sesa_imerg.to_pickle('pkl_files/df_2018_sesa_imergmax.pkl')
df_2019_sesa_imerg.to_pickle('pkl_files/df_2019_sesa_imergmax.pkl')
df_2020_sesa_imerg.to_pickle('pkl_files/df_2020_sesa_imergmax.pkl')

### saving masks 

In [13]:
def to_ds_mask(labels,ds,xi=-80,xf=-50,yi=-21,yf=6):
    mask_label = [np.where(labels[i]!=0,1,labels[i]) for i in range(np.size(labels,0))]

    ds_ocs = xr.Dataset(
        { "mask_ocs": (["time", "lat", "lon"], mask_label),"labels_ocs": (["time", "lat", "lon"], labels)},
        coords={ "time": ds.time, 
                "lat": (["lat"], ds.sel(lat=slice(yi,yf)).lat.values),
                "lon": (["lon"], ds.sel(lon=slice(xi,xf)).lon.values)})
    return(ds_ocs)

In [17]:
xi=-80; xf=-50; yi=-21; yf=6;
ds_ocs_2015 = to_ds_mask(labels_2015,imerg2015)
ds_ocs_2016 = to_ds_mask(labels_2016,imerg2016)
ds_ocs_2017 = to_ds_mask(labels_2017,imerg2017)
ds_ocs_2018 = to_ds_mask(labels_2018,imerg2018)
ds_ocs_2019 = to_ds_mask(labels_2019,imerg2019)
ds_ocs_2020 = to_ds_mask(labels_2020,imerg2020)

In [18]:
ds_ocs_2015.to_netcdf('imerg_masks/ds_ocs_hIMERG2015_AB_sizeT2500i1.nc')
ds_ocs_2016.to_netcdf('imerg_masks/ds_ocs_hIMERG2016_AB_sizeT2500i1.nc')
ds_ocs_2017.to_netcdf('imerg_masks/ds_ocs_hIMERG2017_AB_sizeT2500i1.nc')
ds_ocs_2018.to_netcdf('imerg_masks/ds_ocs_hIMERG2018_AB_sizeT2500i1.nc')
ds_ocs_2019.to_netcdf('imerg_masks/ds_ocs_hIMERG2019_AB_sizeT2500i1.nc')
ds_ocs_2020.to_netcdf('imerg_masks/ds_ocs_hIMERG2020_AB_sizeT2500i1.nc')

In [40]:
## *************** SESA region  ***************
xi=-64; xf=-50; yi=-26; yf=-15;

ds_ocs_sesa_2015 = to_ds_mask(labels_2015_sesa,imerg2015,xi=xi,xf=xf,yi=yi,yf=yf)
ds_ocs_sesa_2016 = to_ds_mask(labels_2016_sesa,imerg2016,xi=xi,xf=xf,yi=yi,yf=yf)
ds_ocs_sesa_2017 = to_ds_mask(labels_2017_sesa,imerg2017,xi=xi,xf=xf,yi=yi,yf=yf)
ds_ocs_sesa_2018 = to_ds_mask(labels_2018_sesa,imerg2018,xi=xi,xf=xf,yi=yi,yf=yf)
ds_ocs_sesa_2019 = to_ds_mask(labels_2019_sesa,imerg2019,xi=xi,xf=xf,yi=yi,yf=yf)
ds_ocs_sesa_2020 = to_ds_mask(labels_2020_sesa,imerg2020,xi=xi,xf=xf,yi=yi,yf=yf)

In [41]:
ds_ocs_sesa_2015.to_netcdf('imerg_masks/ds_ocs_hIMERG2015_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2016.to_netcdf('imerg_masks/ds_ocs_hIMERG2016_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2017.to_netcdf('imerg_masks/ds_ocs_hIMERG2017_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2018.to_netcdf('imerg_masks/ds_ocs_hIMERG2018_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2019.to_netcdf('imerg_masks/ds_ocs_hIMERG2019_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2020.to_netcdf('imerg_masks/ds_ocs_hIMERG2020_sesa_sizeT2500i1.nc')