In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import cm
import dask

In [2]:
from open_experiment import control_deaccu,control_land, fixedSM_land, fixedSM_deaccu

In [3]:
import object_detection as obj

In [4]:
# Disable a few warnings:
import warnings
warnings.filterwarnings('ignore', category=UserWarning, append=True)
warnings.filterwarnings('ignore', category=RuntimeWarning, append=True)
warnings.filterwarnings('ignore', category=FutureWarning, append=True)
warnings.filterwarnings('ignore',category=pd.io.pytables.PerformanceWarning)

In [5]:
path='/scratch/wcq7pz/exp_levante_post/'
## open topography, land_fraction
topo5km = xr.open_dataset(path+'topography_dom03_5km.nc')
topo5km.coords['lon'] = (topo5km.coords['lon'] + 180) % 360 - 180
topo5km  = topo5km.sortby(topo5km.lon)

frland=xr.open_dataset(path+'fr_land_dom03_5km.nc')
frland.coords['lon'] = (frland.coords['lon'] + 180) % 360 - 180
frland = frland.sortby(frland.lon)

## READ AMAZON MASK
onlyab = xr.open_dataset('onlyab5km.nc')
maskAB = onlyab.interp(lat = topo5km.lat,lon = topo5km.lon)

In [6]:
frland=xr.open_dataset(path+'fr_land_dom03_5km.nc'); frland.coords['lon'] = (frland.coords['lon'] + 180) % 360 - 180
frland = frland.sortby(frland.lon)

In [7]:
topo5km = xr.open_dataset(path+'topography_dom03_5km.nc')
topo5km.coords['lon'] = (topo5km.coords['lon'] + 180) % 360 - 180
topo5km  = topo5km.sortby(topo5km.lon)

## objects and datasets 

In [8]:
grid_area = xr.open_dataset('gridarea_dom3p5.nc')
grid_area.coords['lon'] = (grid_area.coords['lon'] + 180) % 360 - 180
grid_area = grid_area.sortby(grid_area.lon)

In [9]:
## *************** Limits for Amazon region ***************
xi=-80; xf=-50; yi=-21; yf=6; #del(grid_np,lon,lat)
grid_np =  grid_area.sel(lon=slice(xi,xf),lat=slice(yi,yf)).cell_area.values / 1e6
lon = control_deaccu.sel(lon=slice(xi,xf),lat=slice(yi,yf)).coords['lon'].values
lat = control_deaccu.sel(lon=slice(xi,xf),lat=slice(yi,yf)).coords['lat'].values

#precipitation 
array_control = np.squeeze(control_deaccu.tot_prec.where((maskAB.Band1>0),-1).sel(lon=slice(-80,-50),lat=slice(-21,6)).values)
array_fixedSM = np.squeeze(fixedSM_deaccu.tot_prec.where((maskAB.Band1>0),-1).sel(lon=slice(-80,-50),lat=slice(-21,6)).values)

#soil moisture 
arraySM_control = np.squeeze(control_land.isel(depth=slice(0,5)).sum('depth').w_so.where((maskAB.Band1>0),-1).sel(
    lon=slice(-80,-50),lat=slice(-21,6)).values)
arraySM_fixedSM = np.squeeze(fixedSM_land.isel(depth=slice(0,5)).sum('depth').w_so.where((maskAB.Band1>0),-1).sel(
    lon=slice(-80,-50),lat=slice(-21,6)).values)

In [10]:
labels_control5k, df_control5k = obj.precip_objects(array_control,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(control_deaccu.time))
labels_fixedSM5k, df_fixedSM5k = obj.precip_objects(array_fixedSM,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(fixedSM_deaccu.time))

In [11]:
df_SM_control5k = obj.SM_stats(array_control,arraySM_control,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(control_deaccu.time))
df_SM_fixedSM5k = obj.SM_stats(array_fixedSM,arraySM_fixedSM,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(fixedSM_deaccu.time))

In [12]:
def add_cols(df,dfsm):
    df['time_zone'] = np.round(pd.to_numeric(df['longitude'],errors = 'coerce')/15)
    df['local_time'] = df['time'] - (-1*df['time_zone']).astype('timedelta64[h]')
    df['mean_ob'] = np.round(pd.to_numeric(df['mean'],errors='coerce'),2)
    df['area_ob'] = np.round(pd.to_numeric(df['area'],errors='coerce'),2)
    df['radius'] = np.sqrt(df.area_ob/np.pi)
    df['mean_SM'] = pd.to_numeric(dfsm['mean'],errors = 'coerce')
    df['max_SM'] = pd.to_numeric(dfsm['max'],errors = 'coerce')
    df['std_SM'] = pd.to_numeric(dfsm['std'],errors = 'coerce')
    return df

In [13]:
df_control5k = add_cols(df_control5k,df_SM_control5k); 
df_fixedSM5k = add_cols(df_fixedSM5k,df_SM_fixedSM5k)

df_control5k.to_pickle('df_ob_control5k_Amazon.pkl'); 
df_fixedSM5k.to_pickle('df_ob_fixedSM5k_Amazon.pkl')

In [14]:
## *************** Limits for SESA region  ***************
xi=-64; xf=-50; yi=-26; yf=-15; del(grid_np,lon,lat)
grid_np =  grid_area.sel(lon=slice(xi,xf),lat=slice(yi,yf)).cell_area.values / 1e6
lon = control_deaccu.sel(lon=slice(xi,xf),lat=slice(yi,yf)).coords['lon'].values
lat = control_deaccu.sel(lon=slice(xi,xf),lat=slice(yi,yf)).coords['lat'].values

#Precipitation
del(array_control,array_fixedSM)
array_sesa = (control_deaccu.tot_prec.sel(lon=slice(xi,xf),lat=slice(yi,yf)))
# Check if all grid points for each time step are equal to 0
all_zeros = (array_sesa == 0).all(dim=('lat', 'lon'))
# Replace the grid points with 1 where all values are 0
array_sesa = array_sesa.where(~all_zeros, 1)
array_control = array_sesa.values

array_fsesa = (fixedSM_deaccu.tot_prec.sel(lon=slice(xi,xf),lat=slice(yi,yf)))
all_fzeros = (array_fsesa == 0).all(dim=('lat', 'lon'))
# Replace the grid points with 1 where all values are 0
array_fsesa = array_fsesa.where(~all_fzeros, 1)
array_fixedSM = array_fsesa.values

#Soil moisture
del(arraySM_control,arraySM_fixedSM)
arraySM_control = np.squeeze(control_land.isel(depth=slice(0,5)).sum('depth').w_so.where((frland.fr_land>0),-1).sel(
    lon=slice(xi,xf),lat=slice(yi,yf)).values)
arraySM_fixedSM = np.squeeze(fixedSM_land.isel(depth=slice(0,5)).sum('depth').w_so.where((frland.fr_land>0),-1).sel(
    lon=slice(xi,xf),lat=slice(yi,yf)).values)

In [15]:
labels_control5k_sesa, df_control5k_sesa = obj.precip_objects(array_control,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(control_deaccu.time))
labels_fixedSM5k_sesa, df_fixedSM5k_sesa = obj.precip_objects(array_fixedSM,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(fixedSM_deaccu.time))

In [16]:
df_SM_control5k_sesa = obj.SM_stats(array_control,arraySM_control,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(control_deaccu.time))
df_SM_fixedSM5k_sesa = obj.SM_stats(array_fixedSM,arraySM_fixedSM,grid_np,lon1=lon,area_limit=2500,
                                 lat1=lat,rr_limit=1,timeds=pd.Series(fixedSM_deaccu.time))

In [23]:
### ********* SESA **********
df_control5k_sesa = add_cols(df_control5k_sesa,df_SM_control5k_sesa); 
df_fixedSM5k_sesa = add_cols(df_fixedSM5k_sesa,df_SM_fixedSM5k_sesa)

df_control5k_sesa.to_pickle('df_ob_control5k_SESA.pkl'); 
df_fixedSM5k_sesa.to_pickle('df_ob_fixedSM5k_SESA.pkl')

In [18]:
#df_SM_control5k

In [19]:
# create masks for OCS 
mask_labels_5k_c = [np.where(labels_control5k[i]!=0,1,labels_control5k[i]) for i in range(np.size(labels_control5k,0))]
mask_labels_5k_fSM = [np.where(labels_fixedSM5k[i]!=0,1,labels_fixedSM5k[i]) for i in range(np.size(labels_fixedSM5k,0))]

In [20]:
# create masks for OCS ****SESA
mask_labels_5k_c_sesa = [np.where(labels_control5k_sesa[i]!=0,1,labels_control5k_sesa[i]) for i in range(np.size(labels_control5k,0))]
mask_labels_5k_fSM_sesa = [np.where(labels_fixedSM5k_sesa[i]!=0,1,labels_fixedSM5k_sesa[i]) for i in range(np.size(labels_fixedSM5k,0))]

In [21]:
# create datasets for OCS mask and labels
ds_ob_control = xr.Dataset(
    { "mask_obs": (["time", "lat", "lon"], mask_labels_5k_c),"labels_ocs": (["time", "lat", "lon"], labels_control5k)},
    coords={ "time": control_deaccu.time, 
            "lat": (["lat"], control_deaccu.sel(lat=slice(-21,6)).lat.values),
            "lon": (["lon"], control_deaccu.sel(lon=slice(-80,-50)).lon.values)})
ds_ob_fixedSM = xr.Dataset(
    { "mask_obs": (["time", "lat", "lon"], mask_labels_5k_fSM),"labels_ocs": (["time", "lat", "lon"], labels_fixedSM5k)},
    coords={ "time": fixedSM_deaccu.time, 
            "lat": (["lat"], fixedSM_deaccu.sel(lat=slice(-21,6)).lat.values),
            "lon": (["lon"], fixedSM_deaccu.sel(lon=slice(-80,-50)).lon.values)})

ds_ob_control.to_netcdf('ds_ob_control5k_Amazon.nc') 
ds_ob_fixedSM.to_netcdf('ds_ob_fixedSM5k_Amazon.nc')

In [22]:
# create datasets for OCS mask and labels ************** SESA ***************
ds_ob_control_sesa = xr.Dataset(
    { "mask_obs": (["time", "lat", "lon"], mask_labels_5k_c_sesa),"labels_ocs": (["time", "lat", "lon"], labels_control5k_sesa)},
    coords={ "time": control_deaccu.time, 
            "lat": (["lat"], control_deaccu.sel(lat=slice(yi,yf)).lat.values),
            "lon": (["lon"], control_deaccu.sel(lon=slice(xi,xf)).lon.values)})
ds_ob_fixedSM_sesa = xr.Dataset(
    { "mask_obs": (["time", "lat", "lon"], mask_labels_5k_fSM_sesa),"labels_ocs": (["time", "lat", "lon"], labels_fixedSM5k_sesa)},
    coords={ "time": fixedSM_deaccu.time, 
            "lat": (["lat"], fixedSM_deaccu.sel(lat=slice(yi,yf)).lat.values),
            "lon": (["lon"], fixedSM_deaccu.sel(lon=slice(xi,xf)).lon.values)})

ds_ob_control_sesa.to_netcdf('ds_ob_control5k_SESA.nc') 
ds_ob_fixedSM_sesa.to_netcdf('ds_ob_fixedSM5k_SESA.nc')