In [1]:
import xarray as xr
import pandas as pd
import numpy as np

In [2]:
import tobac

In [3]:
# Disable a few warnings:
import warnings
warnings.filterwarnings('ignore', category=UserWarning, append=True)
warnings.filterwarnings('ignore', category=RuntimeWarning, append=True)
warnings.filterwarnings('ignore', category=FutureWarning, append=True)
warnings.filterwarnings('ignore',category=pd.io.pytables.PerformanceWarning)

In [4]:
def update_lon(ds,longitude='lon'):
    ds.coords[longitude] = (ds.coords[longitude] + 180) % 360 - 180
    ds = ds.sortby(ds.lon)
    return ds

In [5]:
imerg2015 = xr.open_dataset('../../IMERG/IMERG_hourly_2015.nc')
imerg2016 = xr.open_dataset('../../IMERG/IMERG_hourly_2016.nc')
imerg2017 = xr.open_dataset('../../IMERG/IMERG_hourly_2017.nc')
imerg2018 = xr.open_dataset('../../IMERG/IMERG_hourly_2018.nc')
imerg2019 = xr.open_dataset('../../IMERG/IMERG_hourly_2019.nc')
imerg2020 = xr.open_dataset('../../IMERG/IMERG_hourly_2020.nc')

#### define some functions

In [6]:
def to_dataset(array,xarr,time=True,varname='mask'):
    if time==True:
        ds = xr.Dataset( { varname: (["time","lat", "lon"], array)},
    coords={ "time":(["time"],xarr.time.values), "lat": (["lat"], xarr.lat.values),"lon": (["lon"], 
             xarr.lon.values)})
    else:
        ds = xr.Dataset( { varname: (["lat", "lon"], array)},
    coords={ "lat": (["lat"], xarr.lat.values),"lon": (["lon"], xarr.lon.values)}) 
    
    return ds

In [7]:
def sel_by_lifetime(Track,hours=3,operator='>'):
    """
    for hourly data
    """
    counts = Track.groupby("cell")["time_cell"].count().values
    if operator == '<':
        counts_min = Track.groupby("cell")["time_cell"].count()[counts<hours]
    elif operator == '>':
        counts_min = Track.groupby("cell")["time_cell"].count()[counts>hours]
    else:
        raise ValueError('Invalid operator. Choose either "<" or ">".')
    selected_cells = Track[Track["cell"].isin(counts_min.reset_index()[counts_min.reset_index().cell>0].cell)]
    selected_cells = selected_cells.reset_index()
    
    return selected_cells

## Load objects

In [8]:
## Open objects (dataframes) 

## *************** Amazon region  ***************
df_h2015 = pd.read_pickle('pkl_files/df_2015_imergmax.pkl')
df_h2016 = pd.read_pickle('pkl_files/df_2016_imergmax.pkl') 
df_h2017 = pd.read_pickle('pkl_files/df_2017_imergmax.pkl') 
df_h2018 = pd.read_pickle('pkl_files/df_2018_imergmax.pkl') 
df_h2019 = pd.read_pickle('pkl_files/df_2019_imergmax.pkl') 
df_h2020 = pd.read_pickle('pkl_files/df_2020_imergmax.pkl') 


## *************** SESA region  ***************
df_h2015_sesa = pd.read_pickle('pkl_files/df_2015_sesa_imergmax.pkl')
df_h2016_sesa = pd.read_pickle('pkl_files/df_2016_sesa_imergmax.pkl') 
df_h2017_sesa = pd.read_pickle('pkl_files/df_2017_sesa_imergmax.pkl') 
df_h2018_sesa = pd.read_pickle('pkl_files/df_2018_sesa_imergmax.pkl') 
df_h2019_sesa = pd.read_pickle('pkl_files/df_2019_sesa_imergmax.pkl') 
df_h2020_sesa = pd.read_pickle('pkl_files/df_2020_sesa_imergmax.pkl')

In [9]:
#Rename columns to prepare for TOBAC tracking

## *************** Amazon region  ***************
df_h2015 = df_h2015.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2015['feature'] += 1 # Add 1 to the values in the "feature" column
df_h2016 = df_h2016.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2016['feature'] += 1
df_h2017 = df_h2017.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2017['feature'] += 1
df_h2018 = df_h2018.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2018['feature'] += 1
df_h2019 = df_h2019.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2019['feature'] += 1
df_h2020 = df_h2020.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2020['feature'] += 1

df_h2015 = df_h2015.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2016 = df_h2016.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2017 = df_h2017.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2018 = df_h2018.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2019 = df_h2019.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2020 = df_h2020.rename(columns={"y": "hdim_1", "x": "hdim_2"})


## *************** SESA region  ***************
df_h2015_sesa = df_h2015_sesa.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2015_sesa['feature'] += 1 # Add 1 to the values in the "feature" column
df_h2016_sesa = df_h2016_sesa.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2016_sesa['feature'] += 1
df_h2017_sesa = df_h2017_sesa.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2017_sesa['feature'] += 1
df_h2018_sesa = df_h2018_sesa.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2018_sesa['feature'] += 1
df_h2019_sesa = df_h2019_sesa.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2019_sesa['feature'] += 1
df_h2020_sesa = df_h2020_sesa.reset_index(drop=True).reset_index().rename(columns={'index': 'feature'})
df_h2020_sesa['feature'] += 1

df_h2015_sesa = df_h2015_sesa.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2016_sesa = df_h2016_sesa.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2017_sesa = df_h2017_sesa.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2018_sesa = df_h2018_sesa.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2019_sesa = df_h2019_sesa.rename(columns={"y": "hdim_1", "x": "hdim_2"})
df_h2020_sesa = df_h2020_sesa.rename(columns={"y": "hdim_1", "x": "hdim_2"})


In [10]:
# Create the "frame" column by grouping the data by unique values of "time"
df_h2015['frame'] = df_h2015.groupby('time').ngroup()
df_h2016['frame'] = df_h2016.groupby('time').ngroup()
df_h2017['frame'] = df_h2017.groupby('time').ngroup()
df_h2018['frame'] = df_h2018.groupby('time').ngroup()
df_h2019['frame'] = df_h2019.groupby('time').ngroup()
df_h2020['frame'] = df_h2020.groupby('time').ngroup()


df_h2015_sesa['frame'] = df_h2015_sesa.groupby('time').ngroup()
df_h2016_sesa['frame'] = df_h2016_sesa.groupby('time').ngroup()
df_h2017_sesa['frame'] = df_h2017_sesa.groupby('time').ngroup()
df_h2018_sesa['frame'] = df_h2018_sesa.groupby('time').ngroup()
df_h2019_sesa['frame'] = df_h2019_sesa.groupby('time').ngroup()
df_h2020_sesa['frame'] = df_h2020_sesa.groupby('time').ngroup()

## Tracking

In [11]:
# Grid spacing of the input data (in meter) and time in seconds
dxy = 9999 #(10km)
dt = 3600 #200

In [12]:
# Dictionary containing keyword arguments for the linking step:
parameters_linking={}
parameters_linking['method_linking']='predict'
parameters_linking['adaptive_stop']=0.2
parameters_linking['adaptive_step']=0.95
parameters_linking['subnetwork_size']=100
parameters_linking['memory']=0
parameters_linking['time_cell_min']=5*60
parameters_linking['v_max']=10 

In [13]:
Track_h2015 = tobac.linking_trackpy(df_h2015,imerg2015.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2016 = tobac.linking_trackpy(df_h2016,imerg2016.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2017 = tobac.linking_trackpy(df_h2017,imerg2017.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2018 = tobac.linking_trackpy(df_h2018,imerg2018.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2019 = tobac.linking_trackpy(df_h2019,imerg2019.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2020 = tobac.linking_trackpy(df_h2020,imerg2020.precip,dt=dt,dxy=dxy,**parameters_linking);

Frame 2207: 26 trajectories present.


In [14]:
Track_h2015_sesa = tobac.linking_trackpy(df_h2015_sesa,imerg2015.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2016_sesa = tobac.linking_trackpy(df_h2016_sesa,imerg2016.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2017_sesa = tobac.linking_trackpy(df_h2017_sesa,imerg2017.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2018_sesa = tobac.linking_trackpy(df_h2018_sesa,imerg2018.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2019_sesa = tobac.linking_trackpy(df_h2019_sesa,imerg2019.precip,dt=dt,dxy=dxy,**parameters_linking);
Track_h2020_sesa = tobac.linking_trackpy(df_h2020_sesa,imerg2020.precip,dt=dt,dxy=dxy,**parameters_linking);

Frame 1183: 1 trajectories present.


## Lifetime

In [15]:
## Select objects with a lifetime larger than 3 hours
## *************** Amazon region  ***************

sel_cells_h2015 = sel_by_lifetime(Track_h2015);
sel_cells_h2016 = sel_by_lifetime(Track_h2016); 
sel_cells_h2017 = sel_by_lifetime(Track_h2017);
sel_cells_h2018 = sel_by_lifetime(Track_h2018);
sel_cells_h2019 = sel_by_lifetime(Track_h2019);
sel_cells_h2020 = sel_by_lifetime(Track_h2020);

## *************** SESA region  ***************

sel_cells_h2015_sesa = sel_by_lifetime(Track_h2015_sesa);
sel_cells_h2016_sesa = sel_by_lifetime(Track_h2016_sesa); 
sel_cells_h2017_sesa = sel_by_lifetime(Track_h2017_sesa);
sel_cells_h2018_sesa = sel_by_lifetime(Track_h2018_sesa);
sel_cells_h2019_sesa = sel_by_lifetime(Track_h2019_sesa);
sel_cells_h2020_sesa = sel_by_lifetime(Track_h2020_sesa);

#### Redefine masks based on lifetime

In [16]:
### load object masks
ds_ocs_sesa_2015 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2015_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2016 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2016_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2017 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2017_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2018 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2018_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2019 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2019_sesa_sizeT2500i1.nc')
ds_ocs_sesa_2020 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2020_sesa_sizeT2500i1.nc')

In [17]:
ds_ocs_2015 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2015_AB_sizeT2500i1.nc')
ds_ocs_2016 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2016_AB_sizeT2500i1.nc')
ds_ocs_2017 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2017_AB_sizeT2500i1.nc')
ds_ocs_2018 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2018_AB_sizeT2500i1.nc')
ds_ocs_2019 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2019_AB_sizeT2500i1.nc')
ds_ocs_2020 = xr.open_dataset('imerg_masks/ds_ocs_hIMERG2020_AB_sizeT2500i1.nc')

In [18]:
sel_cells_h2015['idxn'] = (pd.to_numeric(sel_cells_h2015['idx'],errors = 'coerce'))
sel_cells_h2016['idxn'] = (pd.to_numeric(sel_cells_h2016['idx'],errors = 'coerce'))
sel_cells_h2017['idxn'] = (pd.to_numeric(sel_cells_h2017['idx'],errors = 'coerce'))
sel_cells_h2018['idxn'] = (pd.to_numeric(sel_cells_h2018['idx'],errors = 'coerce'))
sel_cells_h2019['idxn'] = (pd.to_numeric(sel_cells_h2019['idx'],errors = 'coerce'))
sel_cells_h2020['idxn'] = (pd.to_numeric(sel_cells_h2020['idx'],errors = 'coerce'))

In [19]:
sel_cells_h2015_sesa['idxn'] = (pd.to_numeric(sel_cells_h2015_sesa['idx'],errors = 'coerce'))
sel_cells_h2016_sesa['idxn'] = (pd.to_numeric(sel_cells_h2016_sesa['idx'],errors = 'coerce'))
sel_cells_h2017_sesa['idxn'] = (pd.to_numeric(sel_cells_h2017_sesa['idx'],errors = 'coerce'))
sel_cells_h2018_sesa['idxn'] = (pd.to_numeric(sel_cells_h2018_sesa['idx'],errors = 'coerce'))
sel_cells_h2019_sesa['idxn'] = (pd.to_numeric(sel_cells_h2019_sesa['idx'],errors = 'coerce'))
sel_cells_h2020_sesa['idxn'] = (pd.to_numeric(sel_cells_h2020_sesa['idx'],errors = 'coerce'))

In [20]:
def subset_mask(df,ds_mask):
    # Filter the original dataset to only include times that are in the dataframe
    original_ds_filtered = ds_mask.sel(time=df['time'].unique())
    #new_mask = np.zeros_like(original_ds_filtered.labels_ocs);
    
    #create boolean
    new_mask = [np.isin(original_ds_filtered.sel(time=i)['labels_ocs'],
                        df[df.time==i].idxn.values) for i in original_ds_filtered.time.values] 
    
    #make boolean a dataset
    ds_new_mask = to_dataset(new_mask,original_ds_filtered)
    
    #apply to ds_filtered
    new_ds = original_ds_filtered.labels_ocs.where(ds_new_mask.mask==True)
        

    return(new_ds)  

In [21]:
nMask_2015_sesa = subset_mask(sel_cells_h2015_sesa,ds_ocs_sesa_2015)
nMask_2016_sesa = subset_mask(sel_cells_h2016_sesa,ds_ocs_sesa_2016)
nMask_2017_sesa = subset_mask(sel_cells_h2017_sesa,ds_ocs_sesa_2017)
nMask_2018_sesa = subset_mask(sel_cells_h2018_sesa,ds_ocs_sesa_2018)
nMask_2019_sesa = subset_mask(sel_cells_h2019_sesa,ds_ocs_sesa_2019)
nMask_2020_sesa = subset_mask(sel_cells_h2020_sesa,ds_ocs_sesa_2020)

In [22]:
nMask_2015 = subset_mask(sel_cells_h2015,ds_ocs_2015)
nMask_2016 = subset_mask(sel_cells_h2016,ds_ocs_2016)
nMask_2017 = subset_mask(sel_cells_h2017,ds_ocs_2017)
nMask_2018 = subset_mask(sel_cells_h2018,ds_ocs_2018)
nMask_2019 = subset_mask(sel_cells_h2019,ds_ocs_2019)
nMask_2020 = subset_mask(sel_cells_h2020,ds_ocs_2020)

In [23]:
# nMask_2015.to_netcdf('imerg_masks/nMask_AB_2015.nc'); 
# nMask_2016.to_netcdf('imerg_masks/nMask_AB_2016.nc')
# nMask_2017.to_netcdf('imerg_masks/nMask_AB_2017.nc'); 
# nMask_2018.to_netcdf('imerg_masks/nMask_AB_2018.nc')
# nMask_2019.to_netcdf('imerg_masks/nMask_AB_2019.nc'); 
# nMask_2020.to_netcdf('imerg_masks/nMask_AB_2020.nc')

In [24]:
# nMask_2015_sesa.to_netcdf('imerg_masks/nMask_sesa_2015.nc'); 
# nMask_2016_sesa.to_netcdf('imerg_masks/nMask_sesa_2016.nc')
# nMask_2017_sesa.to_netcdf('imerg_masks/nMask_sesa_2017.nc'); 
# nMask_2018_sesa.to_netcdf('imerg_masks/nMask_sesa_2018.nc')
# nMask_2019_sesa.to_netcdf('imerg_masks/nMask_sesa_2019.nc'); 
# nMask_2020_sesa.to_netcdf('imerg_masks/nMask_sesa_2020.nc')

#### Add stage information

In [25]:
def add_stage_convective_system(df_tracked):
    """
    Two main stages are initial t_i and t_f correspondent to first and last timesteps. Intermediate stages
    are denoted t_1, t_2,..,t_n
    """
    df = df_tracked[['cell','time_cell']].copy()

    # count the number of stages for each cell
    n_stages = df.groupby('cell')['time_cell'].count() - 1

    # create a dictionary of stages for each cell
    stages_dict = {}
    for cell in n_stages.index:
        
        if n_stages.loc[cell] < 7:
            stages_dict[cell] = ['t_i'] * 2 + ['t_m'] * (n_stages[cell]-3) + ['t_f'] * 2
            
        else:            
            stages_dict[cell] = ['t_i'] * 3 + ['t_m'] * (n_stages[cell]-5) + ['t_f'] * 3

    # create a new column 'stage' based on the cell and the stages dictionary
    df['stage'] = [stages_dict[df.loc[i, 'cell']][df.groupby('cell').cumcount()[i]] for i in range(len(df))]
    
    dfn = df_tracked.reset_index(drop=True);
    dfn['stage'] = df['stage']

    return(dfn)

In [26]:
## *************** Amazon region  ***************

df_stage_h2015 = add_stage_convective_system(sel_cells_h2015)
df_stage_h2016 = add_stage_convective_system(sel_cells_h2016);
df_stage_h2017 = add_stage_convective_system(sel_cells_h2017);
df_stage_h2018 = add_stage_convective_system(sel_cells_h2018);
df_stage_h2019 = add_stage_convective_system(sel_cells_h2019);
df_stage_h2020 = add_stage_convective_system(sel_cells_h2020);

## *************** SESA region  ***************

df_stage_h2015_sesa = add_stage_convective_system(sel_cells_h2015_sesa)
df_stage_h2016_sesa = add_stage_convective_system(sel_cells_h2016_sesa);
df_stage_h2017_sesa = add_stage_convective_system(sel_cells_h2017_sesa);
df_stage_h2018_sesa = add_stage_convective_system(sel_cells_h2018_sesa);
df_stage_h2019_sesa = add_stage_convective_system(sel_cells_h2019_sesa);
df_stage_h2020_sesa = add_stage_convective_system(sel_cells_h2020_sesa);


### Add directions

In [20]:
import tools_for_evolution as tools

In [21]:
### Add direction propagations

## *************** Amazon region  ***************
df1_stage_h2015 = df_stage_h2015.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2016 = df_stage_h2016.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2017 = df_stage_h2017.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2018 = df_stage_h2018.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2019 = df_stage_h2019.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2020 = df_stage_h2020.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)

df1_stage_h2015['categorized_direction'] = df1_stage_h2015['direction'].apply(
    tools.categorize_direction)
df1_stage_h2016['categorized_direction'] = df1_stage_h2016['direction'].apply(
    tools.categorize_direction)
df1_stage_h2017['categorized_direction'] = df1_stage_h2017['direction'].apply(
    tools.categorize_direction)
df1_stage_h2018['categorized_direction'] = df1_stage_h2018['direction'].apply(
    tools.categorize_direction)
df1_stage_h2019['categorized_direction'] = df1_stage_h2019['direction'].apply(
    tools.categorize_direction)
df1_stage_h2020['categorized_direction'] = df1_stage_h2020['direction'].apply(
    tools.categorize_direction)


In [22]:
## *************** SESA region  ***************
df1_stage_h2015_sesa = df_stage_h2015_sesa.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2016_sesa = df_stage_h2016_sesa.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2017_sesa = df_stage_h2017_sesa.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2018_sesa = df_stage_h2018_sesa.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2019_sesa = df_stage_h2019_sesa.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)
df1_stage_h2020_sesa = df_stage_h2020_sesa.groupby('cell').apply(tools.calculate_direction).reset_index(
    drop=True)

df1_stage_h2015_sesa['categorized_direction'] = df1_stage_h2015_sesa['direction'].apply(
    tools.categorize_direction)
df1_stage_h2016_sesa['categorized_direction'] = df1_stage_h2016_sesa['direction'].apply(
    tools.categorize_direction)
df1_stage_h2017_sesa['categorized_direction'] = df1_stage_h2017_sesa['direction'].apply(
    tools.categorize_direction)
df1_stage_h2018_sesa['categorized_direction'] = df1_stage_h2018_sesa['direction'].apply(
    tools.categorize_direction)
df1_stage_h2019_sesa['categorized_direction'] = df1_stage_h2019_sesa['direction'].apply(
    tools.categorize_direction)
df1_stage_h2020_sesa['categorized_direction'] = df1_stage_h2020_sesa['direction'].apply(
    tools.categorize_direction)

### Add topography details related to OCs

In [23]:
import tools_for_tobac as toolst

In [24]:
grid_area = xr.open_dataset('../gridarea_dom03r10.nc')
grid_area.coords['lon'] = (grid_area.coords['lon'] + 180) % 360 - 180
grid_area = grid_area.sortby(grid_area.lon)
grid_area = grid_area.interp(lat = imerg2015.lat.values,lon = imerg2015.lon.values)
# grid_area

In [25]:
path='/scratch/wcq7pz/exp_levante_post/'
## open topography, land_fraction
topo5km = xr.open_dataset(path+'topography_dom03_5km.nc')
topo5km.coords['lon'] = (topo5km.coords['lon'] + 180) % 360 - 180
topo5km  = topo5km.sortby(topo5km.lon)

In [26]:
topo10km = topo5km.interp(lat = imerg2020.lat.values,lon = imerg2020.lon.values)

In [34]:
# Repeat the variable along the time dimension of ds2
ds_topo_2015 = topo10km['topography_c'].broadcast_like(imerg2015['time'])
ds_topo_2016 = topo10km['topography_c'].broadcast_like(imerg2016['time'])
ds_topo_2017 = topo10km['topography_c'].broadcast_like(imerg2017['time'])
ds_topo_2018 = topo10km['topography_c'].broadcast_like(imerg2018['time'])
ds_topo_2019 = topo10km['topography_c'].broadcast_like(imerg2019['time'])
ds_topo_2020 = topo10km['topography_c'].broadcast_like(imerg2020['time'])

In [58]:
# Filter the 'mask' variable to only include the values present in the original dataframe
# del(ab_topo2015,ab_topo2016,ab_topo2017,ab_topo2018,ab_topo2019,ab_topo2020)
ab_topo2015 =  ds_topo_2015.where(ds_topo_2015['time'].isin(nMask_2015.time), drop=True)
ab_topo2016 =  ds_topo_2016.where(ds_topo_2016['time'].isin(nMask_2016.time), drop=True)
ab_topo2017 =  ds_topo_2017.where(ds_topo_2017['time'].isin(nMask_2017.time), drop=True)
ab_topo2018 =  ds_topo_2018.where(ds_topo_2018['time'].isin(nMask_2018.time), drop=True)
ab_topo2019 =  ds_topo_2019.where(ds_topo_2019['time'].isin(nMask_2019.time), drop=True)
ab_topo2020 =  ds_topo_2020.where(ds_topo_2020['time'].isin(nMask_2020.time), drop=True)

In [35]:
## *************** SESA region  ***************
sesa_topo_2015 =  ds_topo_2015.where(ds_topo_2015['time'].isin(nMask_2015_sesa.time), drop=True)
sesa_topo_2016 =  ds_topo_2016.where(ds_topo_2016['time'].isin(nMask_2016_sesa.time), drop=True)
sesa_topo_2017 =  ds_topo_2017.where(ds_topo_2017['time'].isin(nMask_2017_sesa.time), drop=True)
sesa_topo_2018 =  ds_topo_2018.where(ds_topo_2018['time'].isin(nMask_2018_sesa.time), drop=True)
sesa_topo_2019 =  ds_topo_2019.where(ds_topo_2019['time'].isin(nMask_2019_sesa.time), drop=True)
sesa_topo_2020 =  ds_topo_2020.where(ds_topo_2020['time'].isin(nMask_2020_sesa.time), drop=True)

In [59]:
### extract arrays
##del(arraySFC_control)#,arraySFC_fixedSM) Removed .where((maskAB.Band1>0),-1) after slicing
arrayTOPO_ab_2015 = np.squeeze(ab_topo2015.sel(
    lon=slice(nMask_2015.lon.min(),nMask_2015.lon.max()),lat=slice(nMask_2015.lat.min(),nMask_2015.lat.max())).values)
arrayTOPO_ab_2016 = np.squeeze(ab_topo2016.sel(
    lon=slice(nMask_2016.lon.min(),nMask_2016.lon.max()),lat=slice(nMask_2016.lat.min(),nMask_2016.lat.max())).values)
arrayTOPO_ab_2017 = np.squeeze(ab_topo2017.sel(
    lon=slice(nMask_2017.lon.min(),nMask_2017.lon.max()),lat=slice(nMask_2017.lat.min(),nMask_2017.lat.max())).values)
arrayTOPO_ab_2018 = np.squeeze(ab_topo2018.sel(
    lon=slice(nMask_2018.lon.min(),nMask_2018.lon.max()),lat=slice(nMask_2018.lat.min(),nMask_2018.lat.max())).values)
arrayTOPO_ab_2019 = np.squeeze(ab_topo2019.sel(
    lon=slice(nMask_2019.lon.min(),nMask_2019.lon.max()),lat=slice(nMask_2019.lat.min(),nMask_2019.lat.max())).values)
arrayTOPO_ab_2020 = np.squeeze(ab_topo2020.sel(
    lon=slice(nMask_2020.lon.min(),nMask_2020.lon.max()),lat=slice(nMask_2020.lat.min(),nMask_2020.lat.max())).values)

In [37]:
######### ********* SESA REGION

def make_topo_array(topo_arr,mask_arr):
    array_sesa = (topo_arr.sel(lon=slice(mask_arr.lon.min(),mask_arr.lon.max()),lat=slice(mask_arr.lat.min(),mask_arr.lat.max())))
    # Check if all grid points for each time step are equal to 0
    all_zeros = (array_sesa == 0).all(dim=('lat', 'lon'))
    # Replace the grid points with 1 where all values are 0
    array_sesa = array_sesa.where(~all_zeros, 1)
    return(array_sesa.values)

In [38]:
arrayTOPO_sesa_2015 = make_topo_array(sesa_topo_2015,nMask_2015_sesa)
arrayTOPO_sesa_2016 = make_topo_array(sesa_topo_2016,nMask_2016_sesa)
arrayTOPO_sesa_2017 = make_topo_array(sesa_topo_2017,nMask_2017_sesa)
arrayTOPO_sesa_2018 = make_topo_array(sesa_topo_2018,nMask_2018_sesa)
arrayTOPO_sesa_2019 = make_topo_array(sesa_topo_2019,nMask_2019_sesa)
arrayTOPO_sesa_2020 = make_topo_array(sesa_topo_2020,nMask_2020_sesa)

In [39]:
def sel_gridarea(gridarea,mask):
    return(gridarea.sel(lon=slice(mask.lon.min(),mask.lon.max()), 
                        lat=slice(mask.lat.min(),mask.lat.max())).cell_area.values / 1e6)

In [46]:
df_topo_sesa_2015 = toolst.mask_var(nMask_2015_sesa,arrayTOPO_sesa_2015,
      sel_gridarea(grid_area,nMask_2015_sesa),lon1=nMask_2015_sesa.coords['lon'].values,
                        lat1=nMask_2015_sesa.coords['lat'].values,rr_limit=0,
                        timeds=pd.Series(nMask_2015_sesa.time))

In [48]:
df_topo_sesa_2016 = toolst.mask_var(nMask_2016_sesa,arrayTOPO_sesa_2016,sel_gridarea(grid_area,nMask_2016_sesa),
                        lon1=nMask_2016_sesa.coords['lon'].values,
                        lat1=nMask_2016_sesa.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2016_sesa.time))

In [49]:
df_topo_sesa_2017 = toolst.mask_var(nMask_2017_sesa,arrayTOPO_sesa_2017,sel_gridarea(grid_area,nMask_2017_sesa),
                        lon1=nMask_2017_sesa.coords['lon'].values,
                        lat1=nMask_2017_sesa.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2017_sesa.time))

In [51]:
df_topo_sesa_2018 = toolst.mask_var(nMask_2018_sesa,arrayTOPO_sesa_2018,sel_gridarea(grid_area,nMask_2018_sesa),
                        lon1=nMask_2018_sesa.coords['lon'].values,
                        lat1=nMask_2018_sesa.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2018_sesa.time))

In [52]:
df_topo_sesa_2019 = toolst.mask_var(nMask_2019_sesa,arrayTOPO_sesa_2019,sel_gridarea(grid_area,nMask_2019_sesa),
                        lon1=nMask_2019_sesa.coords['lon'].values,
                        lat1=nMask_2019_sesa.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2019_sesa.time))

In [53]:
df_topo_sesa_2020 = toolst.mask_var(nMask_2020_sesa,arrayTOPO_sesa_2020,sel_gridarea(grid_area,nMask_2020_sesa),
                        lon1=nMask_2020_sesa.coords['lon'].values,
                        lat1=nMask_2020_sesa.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2020_sesa.time))

In [54]:
df_topo_sesa_2015.to_pickle('imerg_pkl/df_topo_sesa_2015.pkl')
df_topo_sesa_2016.to_pickle('imerg_pkl/df_topo_sesa_2016.pkl')
df_topo_sesa_2017.to_pickle('imerg_pkl/df_topo_sesa_2017.pkl')
df_topo_sesa_2018.to_pickle('imerg_pkl/df_topo_sesa_2018.pkl')
df_topo_sesa_2019.to_pickle('imerg_pkl/df_topo_sesa_2019.pkl')
df_topo_sesa_2020.to_pickle('imerg_pkl/df_topo_sesa_2020.pkl')

In [60]:
#### AMAZON region
df_topo_ab_2015 = toolst.mask_var(nMask_2015,arrayTOPO_ab_2015,sel_gridarea(grid_area,nMask_2015),
                      lon1=nMask_2015.coords['lon'].values,
                      lat1=nMask_2015.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2015.time))


In [61]:
df_topo_ab_2016 = toolst.mask_var(nMask_2016,arrayTOPO_ab_2016,sel_gridarea(grid_area,nMask_2016),
                      lon1=nMask_2016.coords['lon'].values,
                      lat1=nMask_2016.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2016.time))
df_topo_ab_2017 = toolst.mask_var(nMask_2017,arrayTOPO_ab_2017,sel_gridarea(grid_area,nMask_2017),
                      lon1=nMask_2017.coords['lon'].values,
                      lat1=nMask_2017.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2017.time))
df_topo_ab_2018 = toolst.mask_var(nMask_2018,arrayTOPO_ab_2018,sel_gridarea(grid_area,nMask_2018),
                      lon1=nMask_2018.coords['lon'].values,
                      lat1=nMask_2018.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2018.time))
df_topo_ab_2019 = toolst.mask_var(nMask_2019,arrayTOPO_ab_2019,sel_gridarea(grid_area,nMask_2019),
                      lon1=nMask_2019.coords['lon'].values,
                      lat1=nMask_2019.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2019.time))
df_topo_ab_2020 = toolst.mask_var(nMask_2020,arrayTOPO_ab_2020,sel_gridarea(grid_area,nMask_2020),
                      lon1=nMask_2020.coords['lon'].values,
                      lat1=nMask_2020.coords['lat'].values,rr_limit=0,timeds=pd.Series(nMask_2020.time))


In [62]:
df_topo_ab_2015.to_pickle('imerg_pkl/df_topo_ab_2015.pkl')
df_topo_ab_2016.to_pickle('imerg_pkl/df_topo_ab_2016.pkl')
df_topo_ab_2017.to_pickle('imerg_pkl/df_topo_ab_2017.pkl')
df_topo_ab_2018.to_pickle('imerg_pkl/df_topo_ab_2018.pkl')
df_topo_ab_2019.to_pickle('imerg_pkl/df_topo_ab_2019.pkl')
df_topo_ab_2020.to_pickle('imerg_pkl/df_topo_ab_2020.pkl')

#### Merge dataframes of OCS and topo information 

In [27]:
df_topo_ab_2015 = pd.read_pickle('imerg_pkl/df_topo_ab_2015.pkl')
df_topo_ab_2016 = pd.read_pickle('imerg_pkl/df_topo_ab_2016.pkl')
df_topo_ab_2017 = pd.read_pickle('imerg_pkl/df_topo_ab_2017.pkl')
df_topo_ab_2018 = pd.read_pickle('imerg_pkl/df_topo_ab_2018.pkl')
df_topo_ab_2019 = pd.read_pickle('imerg_pkl/df_topo_ab_2019.pkl')
df_topo_ab_2020 = pd.read_pickle('imerg_pkl/df_topo_ab_2020.pkl')

df_topo_sesa_2015 = pd.read_pickle('imerg_pkl/df_topo_sesa_2015.pkl')
df_topo_sesa_2016 = pd.read_pickle('imerg_pkl/df_topo_sesa_2016.pkl')
df_topo_sesa_2017 = pd.read_pickle('imerg_pkl/df_topo_sesa_2017.pkl')
df_topo_sesa_2018 = pd.read_pickle('imerg_pkl/df_topo_sesa_2018.pkl')
df_topo_sesa_2019 = pd.read_pickle('imerg_pkl/df_topo_sesa_2019.pkl')
df_topo_sesa_2020 = pd.read_pickle('imerg_pkl/df_topo_sesa_2020.pkl')

In [35]:
def add_cols(df,dfsm,name_mean='topo_mean',name_median='topo_median',name_std='topo_std',name_max='topo_max'):
    df[name_mean] = pd.to_numeric(dfsm['mean'],errors = 'coerce')
    df[name_median] = pd.to_numeric(dfsm['median'],errors = 'coerce')
    df[name_max] = pd.to_numeric(dfsm['max'],errors = 'coerce')
    df[name_std] = pd.to_numeric(dfsm['std'],errors = 'coerce')
    return df

In [32]:
print(len(df1_stage_h2015_sesa),len(df_topo_sesa_2015))

1801 1801


In [36]:
df1_stage_h2015_sesa = add_cols(df1_stage_h2015_sesa,df_topo_sesa_2015)
df1_stage_h2016_sesa = add_cols(df1_stage_h2016_sesa,df_topo_sesa_2016)
df1_stage_h2017_sesa = add_cols(df1_stage_h2017_sesa,df_topo_sesa_2017)
df1_stage_h2018_sesa = add_cols(df1_stage_h2018_sesa,df_topo_sesa_2018)
df1_stage_h2019_sesa = add_cols(df1_stage_h2019_sesa,df_topo_sesa_2019)
df1_stage_h2020_sesa = add_cols(df1_stage_h2020_sesa,df_topo_sesa_2020)

In [37]:
df1_stage_h2015_ab = add_cols(df1_stage_h2015,df_topo_ab_2015)
df1_stage_h2016_ab = add_cols(df1_stage_h2016,df_topo_ab_2016)
df1_stage_h2017_ab = add_cols(df1_stage_h2017,df_topo_ab_2017)
df1_stage_h2018_ab = add_cols(df1_stage_h2018,df_topo_ab_2018)
df1_stage_h2019_ab = add_cols(df1_stage_h2019,df_topo_ab_2019)
df1_stage_h2020_ab = add_cols(df1_stage_h2020,df_topo_ab_2020)

In [41]:
df1_stage_h2015_ab.to_pickle('imerg_pkl/df_imerg_stage_AB_2015.pkl')
df1_stage_h2016_ab.to_pickle('imerg_pkl/df_imerg_stage_AB_2016.pkl')
df1_stage_h2017_ab.to_pickle('imerg_pkl/df_imerg_stage_AB_2017.pkl')
df1_stage_h2018_ab.to_pickle('imerg_pkl/df_imerg_stage_AB_2018.pkl')
df1_stage_h2019_ab.to_pickle('imerg_pkl/df_imerg_stage_AB_2019.pkl')
df1_stage_h2020_ab.to_pickle('imerg_pkl/df_imerg_stage_AB_2020.pkl')

In [42]:
df1_stage_h2015_sesa.to_pickle('imerg_pkl/df_imerg_stage_SESA_2015.pkl')
df1_stage_h2016_sesa.to_pickle('imerg_pkl/df_imerg_stage_SESA_2016.pkl')
df1_stage_h2017_sesa.to_pickle('imerg_pkl/df_imerg_stage_SESA_2017.pkl')
df1_stage_h2018_sesa.to_pickle('imerg_pkl/df_imerg_stage_SESA_2018.pkl')
df1_stage_h2019_sesa.to_pickle('imerg_pkl/df_imerg_stage_SESA_2019.pkl')
df1_stage_h2020_sesa.to_pickle('imerg_pkl/df_imerg_stage_SESA_2020.pkl')