In [1]:
#---------------------Import libraries --------------
import pandas as pd
import geopandas as gpd
import datetime 
import netCDF4
import numpy as np
import matplotlib.pyplot as plt
import itertools
import seaborn as sns
from scipy.stats import pearsonr

#--------------------- INSAR4SM functionalities --------------
from insar4sm.classes import INSAR4SM_stack, SM_point
from insar4sm.gridding import WGS84_to_UTM

In [2]:
def cftime_to_datetime(cfdatetime):
    '''
    Time convertion functionality
    '''
    year=cfdatetime.year
    month=cfdatetime.month
    day=cfdatetime.day
    hour=cfdatetime.hour
    minute=cfdatetime.minute
    second=cfdatetime.second
    return datetime.datetime(year,month,day,hour,minute,second)


In [3]:
def get_ERA5_data (cell, ERA5_variables, ERA5_datasets):
    
    df_ERA5 = pd.DataFrame()
    
    for ERA5_dataset in ERA5_datasets:
        df_dict={}

        for ERA5_variable in ERA5_variables:
            
            if ERA5_variable in ['longitude',  'latitude']:
                pass
            elif ERA5_variable=='time':
                time_var=ERA5_dataset.variables[ERA5_variable]
                t_cal = ERA5_dataset.variables[ERA5_variable].calendar
                dtime = netCDF4.num2date(time_var[:],time_var.units, calendar = t_cal)
                dtime_datetime=[cftime_to_datetime(cfdatetime) for cfdatetime in dtime.data]
                df_dict['Datetimes']=dtime_datetime
                
            elif ERA5_variable!='expver':
                temp_name=ERA5_variable+'__'+ERA5_dataset[ERA5_variable].units
                temp_dataset = ERA5_dataset[ERA5_variable][:][:,cell[0],cell[1]]
                df_dict[temp_name]=np.squeeze(temp_dataset)
            else:
                pass
            
        # create a dataframe
        df_ERA5_temp = pd.DataFrame(df_dict)
        df_ERA5_temp.index = pd.to_datetime(df_ERA5_temp['Datetimes'])

        # concatenate
        df_ERA5 = pd.concat([df_ERA5, df_ERA5_temp])

    return df_ERA5.sort_index()

#### In case you want to reproduce the results of this notebook please download the folder SM_NA from http://147.102.106.42:5000/fsdownload/gxzvLswJM/public and replace the paths in the following cells.

For example '/RSL02/SM_NA/era5/era5_land_na_orbit_100.nc' should become 'path_to_your_SM_NA/era5/era5_land_na_orbit_100.nc'

In [41]:
SM_AOI = '/RSL02/SM_NA/Plotting/bbox_aoi.geojson'
ERA5_file_100 = '/RSL02/SM_NA/era5/era5_land_na_orbit_100.nc'
ERA5_file_166 = '/RSL02/SM_NA/era5/era5_land_na_orbit_166.nc'
ERA5_file_173 = '/RSL02/SM_NA/era5/era5_land_na_orbit_173.nc'

InSAR4SM_100 = '/RSL02/SM_NA/INSAR4SM_results_100_sq250_FordDryLake/sm_inversions_INSAR4SM_results_100_sq250_FordDryLake_250.shp'
InSAR4SM_173 = '/RSL02/SM_NA/INSAR4SM_results_173_sq250_FordDryLake/sm_inversions_INSAR4SM_results_173_sq250_FordDryLake_250.shp'
InSAR4SM_166 = '/RSL02/SM_NA/INSAR4SM_results_166_sq250_FordDryLake/sm_inversions_INSAR4SM_estimations_166_sq250_250.shp'

InSAR4SM_173_extend = '/RSL02/SM_NA/INSAR4SM_estimations_173_sq250_extent/sm_inversions_INSAR4SM_estimations_173_sq250_extent_250.shp'
InSAR4SM_166_extend = '/RSL02/SM_NA/INSAR4SM_estimations_166_sq250_extent/sm_inversions_INSAR4SM_estimations_166_sq250_extent_250.shp'

orbit_combs = [['173','14:00:00'],
               ['100','14:00:00'],
               ['166','02:00:00']]

In [42]:
InSAR4SM_100_df = gpd.read_file(InSAR4SM_100)
InSAR4SM_173_df = gpd.read_file(InSAR4SM_173)
InSAR4SM_166_df = gpd.read_file(InSAR4SM_166)
InSAR4SM_173_extend_df = gpd.read_file(InSAR4SM_173_extend)
InSAR4SM_166_extend_df = gpd.read_file(InSAR4SM_166_extend)

InSAR4SM_173_df = pd.concat([InSAR4SM_173_df, InSAR4SM_173_extend_df])
InSAR4SM_166_df = pd.concat([InSAR4SM_166_df, InSAR4SM_166_extend_df])

SM_AOI_geom = gpd.read_file(SM_AOI)['geometry']
SM_AOI_geom_bounds = gpd.read_file(SM_AOI)['geometry'].bounds

ERA5_datasets = [netCDF4.Dataset(ERA5_file_166), netCDF4.Dataset(ERA5_file_173)]

ERA5_data_temp=netCDF4.Dataset(ERA5_file_173)
ERA5_variables = list(ERA5_data_temp.variables.keys())

ERA5_lons = ERA5_data_temp.variables['longitude'][:].data
ERA5_lats = ERA5_data_temp.variables['latitude'][:].data

ERA_pixel_inds1 = np.logical_and(ERA5_lats > SM_AOI_geom_bounds['miny'].values,
                                 ERA5_lats < SM_AOI_geom_bounds['maxy'].values)
ERA_pixel_inds2 = np.logical_and(ERA5_lons > SM_AOI_geom_bounds['minx'].values,
                                 ERA5_lons < SM_AOI_geom_bounds['maxx'].values)
                                 
cellinds1 =  np.where (ERA_pixel_inds1)[0]
cellinds2 =  np.where (ERA_pixel_inds2)[0]
cells = list(itertools.product(cellinds1, cellinds2))

In [50]:

    # create a dataframe
    df_ERA5 = get_ERA5_data (cell, ERA5_variables, ERA5_datasets)

    lon_center = ERA5_data_temp.variables['longitude'][cell[1]].data
    lat_center = ERA5_data_temp.variables['latitude'][cell[0]].data
    print(lon_center, lat_center)
    dist = 0.05
    xmin = lon_center - dist
    ymin = lat_center - dist
    xmax = lon_center + dist
    ymax = lat_center + dist
    
    InSAR4SM_100_df_cell = InSAR4SM_100_df.cx[xmin:xmax, ymin:ymax]
    InSAR4SM_100_df_cell.drop(columns=['geometry'], inplace = True)
    if len(InSAR4SM_100_df_cell) == 0:
        InSAR4SM_100_df_cell_mean = pd.Series()
    else:
        InSAR4SM_100_df_cell = InSAR4SM_100_df_cell.dropna(axis = 0, how = 'all')
        InSAR4SM_100_df_cell_mean = InSAR4SM_100_df_cell.mean()
        InSAR4SM_100_df_cell_std = InSAR4SM_100_df_cell.std()
        dates_100 = pd.to_datetime(InSAR4SM_100_df_cell.columns, exact=True, format = 'D%Y%m%d' )
        datetimes_100 = [date_100+datetime.timedelta(hours = 14) for date_100 in dates_100]
        InSAR4SM_100_df_cell_mean.index = datetimes_100

-115.27 33.75


In [52]:
    InSAR4SM_173_df_cell = InSAR4SM_173_df.cx[xmin:xmax, ymin:ymax]
    InSAR4SM_173_df_cell.drop(columns=['geometry'], inplace = True)
    InSAR4SM_173_df_cell = InSAR4SM_173_df_cell.dropna(axis = 0, how = 'all')
    InSAR4SM_173_df_cell_mean = InSAR4SM_173_df_cell.mean()
    InSAR4SM_173_df_cell_std = InSAR4SM_173_df_cell.std()
    dates_173 = pd.to_datetime(InSAR4SM_173_df_cell.columns, exact=True, format = 'D%Y%m%d' )
    datetimes_173 = [date_173+datetime.timedelta(hours = 14) for date_173 in dates_173]
    InSAR4SM_173_df_cell_mean.index = datetimes_173

    InSAR4SM_166_df_cell = InSAR4SM_166_df.cx[xmin:xmax, ymin:ymax]
    InSAR4SM_166_df_cell.drop(columns=['geometry'], inplace = True)
    InSAR4SM_166_df_cell = InSAR4SM_166_df_cell.dropna(axis = 0, how = 'all')
    InSAR4SM_166_df_cell_mean = InSAR4SM_166_df_cell.mean()
    InSAR4SM_166_df_cell_std = InSAR4SM_166_df_cell.std()
    dates_166 = pd.to_datetime(InSAR4SM_166_df_cell.columns, exact=True, format = 'D%Y%m%d' )
    datetimes_166 = [date_166+datetime.timedelta(hours = 2) for date_166 in dates_166]
    InSAR4SM_166_df_cell_mean.index = datetimes_166

In [None]:
InSAR4SM_df_cell_mean = pd.concat([InSAR4SM_100_df_cell_mean, InSAR4SM_173_df_cell_mean, InSAR4SM_166_df_cell_mean]).sort_index().to_frame()
InSAR4SM_df_cell_mean.rename(columns={0:'InSAR4SM'}, inplace=True)
InSAR4SM_df_cell_mean

In [56]:
    # merge ERA5 observations with INSAR4SM estimations
    InSAR4SM_df_cell_mean['ERA5'] = df_ERA5.loc[InSAR4SM_df_cell_mean.index]['swvl1__m**3 m**-3']*100
    
    InSAR4SM_df_cell_mean['orbit'] = 'S1_orbit_100'
    InSAR4SM_df_cell_mean['orbit'].loc[datetimes_173]='S1_orbit_173'
    InSAR4SM_df_cell_mean['orbit'].loc[datetimes_166]='S1_orbit_166'

In [58]:
predictions = InSAR4SM_df_cell_mean['InSAR4SM'].values
targets = InSAR4SM_df_cell_mean['ERA5'].values
n = predictions.shape[0]
rmse = np.linalg.norm(predictions - targets) / np.sqrt(n)
corr, _ = pearsonr(predictions, targets)


In [66]:
InSAR4SM_df_cell_mean['InSAR4SM'].index

DatetimeIndex(['2018-07-04 14:00:00', '2018-07-10 02:00:00',
               '2018-07-16 14:00:00', '2018-07-22 02:00:00',
               '2018-07-28 14:00:00', '2018-08-03 02:00:00',
               '2018-08-09 14:00:00', '2018-08-15 02:00:00',
               '2018-08-21 14:00:00', '2018-09-02 14:00:00',
               '2018-09-08 02:00:00', '2018-09-14 14:00:00',
               '2018-09-20 02:00:00', '2018-09-26 14:00:00',
               '2018-10-02 02:00:00', '2018-10-08 14:00:00',
               '2018-10-14 02:00:00', '2018-10-20 14:00:00',
               '2018-10-26 02:00:00', '2018-11-01 14:00:00',
               '2018-11-07 02:00:00', '2018-11-13 14:00:00',
               '2018-11-19 02:00:00', '2018-11-25 14:00:00',
               '2018-12-01 02:00:00', '2018-12-07 14:00:00',
               '2018-12-13 02:00:00', '2018-12-19 14:00:00',
               '2018-12-25 02:00:00', '2018-12-31 14:00:00',
               '2019-01-06 02:00:00', '2019-01-12 14:00:00',
               '2019-01-

True

In [73]:
for cell in cells:
           
    # create a dataframe
    df_ERA5 = get_ERA5_data (cell, ERA5_variables, ERA5_datasets)

    lon_center = ERA5_data_temp.variables['longitude'][cell[1]].data
    lat_center = ERA5_data_temp.variables['latitude'][cell[0]].data
    print(lon_center, lat_center)
    dist = 0.05
    xmin = lon_center - dist
    ymin = lat_center - dist
    xmax = lon_center + dist
    ymax = lat_center + dist
    
    InSAR4SM_100_df_cell = InSAR4SM_100_df.cx[xmin:xmax, ymin:ymax]
    InSAR4SM_100_df_cell.drop(columns=['geometry'], inplace = True)
    if len(InSAR4SM_100_df_cell) == 0:
        InSAR4SM_100_df_cell_mean = pd.Series()
    else:
        InSAR4SM_100_df_cell = InSAR4SM_100_df_cell.dropna(axis = 0, how = 'all')
        InSAR4SM_100_df_cell_mean = InSAR4SM_100_df_cell.mean()
        InSAR4SM_100_df_cell_std = InSAR4SM_100_df_cell.std()
        dates_100 = pd.to_datetime(InSAR4SM_100_df_cell.columns, exact=True, format = 'D%Y%m%d' )
        datetimes_100 = [date_100+datetime.timedelta(hours = 14) for date_100 in dates_100]
        InSAR4SM_100_df_cell_mean.index = datetimes_100
    
    InSAR4SM_173_df_cell = InSAR4SM_173_df.cx[xmin:xmax, ymin:ymax]
    InSAR4SM_173_df_cell.drop(columns=['geometry'], inplace = True)
    InSAR4SM_173_df_cell = InSAR4SM_173_df_cell.dropna(axis = 0, how = 'all')
    InSAR4SM_173_df_cell_mean = InSAR4SM_173_df_cell.mean()
    InSAR4SM_173_df_cell_std = InSAR4SM_173_df_cell.std()
    dates_173 = pd.to_datetime(InSAR4SM_173_df_cell.columns, exact=True, format = 'D%Y%m%d' )
    datetimes_173 = [date_173+datetime.timedelta(hours = 14) for date_173 in dates_173]
    InSAR4SM_173_df_cell_mean.index = datetimes_173

    InSAR4SM_166_df_cell = InSAR4SM_166_df.cx[xmin:xmax, ymin:ymax]
    InSAR4SM_166_df_cell.drop(columns=['geometry'], inplace = True)
    InSAR4SM_166_df_cell = InSAR4SM_166_df_cell.dropna(axis = 0, how = 'all')
    InSAR4SM_166_df_cell_mean = InSAR4SM_166_df_cell.mean()
    InSAR4SM_166_df_cell_std = InSAR4SM_166_df_cell.std()
    dates_166 = pd.to_datetime(InSAR4SM_166_df_cell.columns, exact=True, format = 'D%Y%m%d' )
    datetimes_166 = [date_166+datetime.timedelta(hours = 2) for date_166 in dates_166]
    InSAR4SM_166_df_cell_mean.index = datetimes_166

    InSAR4SM_df_cell_mean = pd.concat([InSAR4SM_100_df_cell_mean, InSAR4SM_173_df_cell_mean, InSAR4SM_166_df_cell_mean]).sort_index().to_frame()
    InSAR4SM_df_cell_mean.rename(columns={0:'InSAR4SM'}, inplace=True)
    
    # merge ERA5 observations with INSAR4SM estimations
    InSAR4SM_df_cell_mean['ERA5'] = df_ERA5.loc[InSAR4SM_df_cell_mean.index]['swvl1__m**3 m**-3']*100
    
    InSAR4SM_df_cell_mean['orbit'] = 'S1_orbit_100'
    InSAR4SM_df_cell_mean['orbit'].loc[datetimes_173]='S1_orbit_173'
    InSAR4SM_df_cell_mean['orbit'].loc[datetimes_166]='S1_orbit_166'
    
    predictions = InSAR4SM_df_cell_mean['InSAR4SM'].values
    targets = InSAR4SM_df_cell_mean['ERA5'].values
    n = predictions.shape[0]
    rmse = np.linalg.norm(predictions - targets) / np.sqrt(n)
    corr, _ = pearsonr(predictions, targets)

    if pd.to_datetime('2018-10-03T14:00:00') in InSAR4SM_df_cell_mean['InSAR4SM'].index:
        predictions_denoise = InSAR4SM_df_cell_mean['InSAR4SM'].drop([pd.to_datetime('2018-10-03T14:00:00')])
        targets_denoise = InSAR4SM_df_cell_mean['ERA5'].drop([pd.to_datetime('2018-10-03T14:00:00.00')])
        n = predictions_denoise.shape[0]
        rmse = np.linalg.norm(predictions_denoise - targets_denoise) / np.sqrt(n)
        corr, _ = pearsonr(predictions_denoise, targets_denoise)

    elif pd.to_datetime('2018-10-02 02:00:00') in InSAR4SM_df_cell_mean['InSAR4SM'].index:
        predictions_denoise = InSAR4SM_df_cell_mean['InSAR4SM'].drop([pd.to_datetime('2018-10-02 02:00:00')])
        targets_denoise = InSAR4SM_df_cell_mean['ERA5'].drop([pd.to_datetime('2018-10-02 02:00:00')])
        n = predictions_denoise.shape[0]
        rmse = np.linalg.norm(predictions_denoise - targets_denoise) / np.sqrt(n)
        corr, _ = pearsonr(predictions_denoise, targets_denoise)
        
    else:
        pass

    fig = plt.figure(figsize=(1.7,2))

    ax = InSAR4SM_df_cell_mean['ERA5'].plot(style='s', c='k', alpha=0.6, ms=3)
    ax = sns.scatterplot(x=InSAR4SM_df_cell_mean.index,
                    y='InSAR4SM',
                    hue='orbit',
                    data=InSAR4SM_df_cell_mean,
                    size=2,
                    alpha=0.7)
    ax.set(ylim=(0, 25))
    ax.get_legend().remove()
    ax.annotate(" RMSE:{0:.1f} $m^3/m^3$ \n R:{1:.2f} ".format(rmse, corr), xy=(0.,0.7), xycoords='axes fraction',fontsize=8)

    ax.yaxis.label.set_visible(False)
    
    export_name = 'Era5_cell_denoise_{0:.2f}_{1:.2f}.svg'.format(np.round(lon_center,2),np.round(lat_center,2))
    print(export_name)
    plt.savefig('/RSL02/SM_NA/revisions_r2_figures/era5_plots/r2_{}'.format(export_name))
    plt.close()

-115.27 33.75
Era5_cell_denoise_-115.27_33.75.svg
-115.17 33.75
Era5_cell_denoise_-115.17_33.75.svg
-115.07 33.75
Era5_cell_denoise_-115.07_33.75.svg
-114.97 33.75
Era5_cell_denoise_-114.97_33.75.svg
-114.87 33.75
Era5_cell_denoise_-114.87_33.75.svg
-114.77 33.75
Era5_cell_denoise_-114.77_33.75.svg
-115.27 33.65
Era5_cell_denoise_-115.27_33.65.svg
-115.17 33.65
Era5_cell_denoise_-115.17_33.65.svg
-115.07 33.65
Era5_cell_denoise_-115.07_33.65.svg
-114.97 33.65
Era5_cell_denoise_-114.97_33.65.svg
-114.87 33.65
Era5_cell_denoise_-114.87_33.65.svg
-114.77 33.65
Era5_cell_denoise_-114.77_33.65.svg
-115.27 33.55
Era5_cell_denoise_-115.27_33.55.svg
-115.17 33.55
Era5_cell_denoise_-115.17_33.55.svg
-115.07 33.55
Era5_cell_denoise_-115.07_33.55.svg
-114.97 33.55
Era5_cell_denoise_-114.97_33.55.svg
-114.87 33.55
Era5_cell_denoise_-114.87_33.55.svg
-114.77 33.55
Era5_cell_denoise_-114.77_33.55.svg
