In [None]:
import numpy as np
import pickle
from pathlib import Path
import pandas as pd
from scipy.stats import chi2
from matplotlib import pyplot as plt

In every bin, each data point is treated as sample of normal distribution $N(0,\sigma)$, so this is a interval estimation problem. The point estimation of $\sigma^2$ is
$$\frac{\sum_1^n x_i^2}{n}$$
In essence, this point estimation can be seen as emperical semivariogram since semivariogram is defined as
$$E(f(x)-f(x-r))^2$$
The confidence lower-bound is
$$\frac{\sum_1^n x_i^2}{\chi_n^2(1-\alpha)}$$
where $x$ is the sample, $1-\alpha$ is the confidence level.
If the confidence lower-bound is larger than $(3(1+L^{1/2}))^2$, we can say we have at least $1-\alpha$'s confidence to say the real $\sigma$ is larger than the $(3(1+L^{1/2}))^2$.
https://faculty.elgin.edu/dkernler/statistics/ch09/9-3.html is a reference of the statistics method used here.

This lower-bound can be also understanded in the hypothetical test's manner.
We define null hypotheis as $\sigma^2 <= \sigma_0^2 $. $\sigma_0^2$ is the value by the requirement curve. We will reject this null hypotheis if the samples show this hypotheis is wrong with great confidence.

The test is defined as: If $\sum_1^n x_i^2 > C$, we reject the null hypothesis.

By statistical analysis, if we define $C = \chi_n^2(1-\alpha)\sigma_0^2$, the significance level is equal to $1-\alpha$.

The critria is:
1. If less than `mratio` percent of bins fail to reject the hypotheis and
2. The mean of relative deviation $$(\frac{\sum_1^n x_i^2}{\chi_n^2(1-\alpha)}-\sigma_0^2)/\sigma_0^2$$ in each failed bins less than `mdev`,
we can say this interferogram pass the test.

In [None]:
# Set Parameters
n_bins = 100 # number of bins
mratio = 0.3
mdev = 0.3

In [None]:
calval_dir = Path.cwd()/'calval'
calval_location = 'central_valley'
# calval_location = 'texas'
# calval_location = 'oklahoma'
# calval_location = 'purtorico'
work_dir = calval_dir/calval_location

In [None]:
with open(work_dir/'approach2.pkl','rb') as f:
    dist,rel_measure, ifgs_date = pickle.load(f)

In [None]:
n_ifgs = len(dist)

In [None]:
bins = np.linspace(0.1,50.0,num=n_bins+1)
bins_interval = bins[1:] - bins[:-1]
bins_center = bins[:-1]+bins_interval/2

In [None]:
alpha = 0.05

In [None]:
n_all = np.empty([n_ifgs,n_bins+1],dtype=int) # number of points for each ifgs and bins
lowbound = np.empty([n_ifgs,n_bins])
est = np.empty([n_ifgs,n_bins])
rqmt = (3*(1+np.sqrt(bins_center)))**2 # square of the curve
for i in range(n_ifgs):
    inds = np.digitize(dist[i],bins)
    for j in range(1,n_bins+1):
        rem = rel_measure[i][inds==j] # relative measurement for each bin
        len_rem = len(rem)
        n_all[i,j-1] = len_rem
        lowbound[i,j-1] = sum(rem**2)/chi2.ppf(1-alpha,df=len_rem)
        est[i,j-1] = sum(rem**2)/len_rem
        
    n_all[i,-1] = np.sum(n_all[i,0:-2])

In [None]:
def to_str(x:bool):
    if x==True:
        return 'true '
    elif x==False:
        return 'false '

In [None]:
# for i in range(n_ifgs):
#     fig, ax = plt.subplots(figsize=[18, 5.5])
#     ax.plot(bins_center,rqmt,'r')
#     ax.scatter(bins_center,est[i],c='yellow')
#     ax.scatter(bins_center,lowbound[i],c='green')

#     ax.set_xlabel('Distance (km)')
#     ax.set_ylabel(r'$\sigma^2$ ($mm^2$)')
#     plt.legend(["Mission Requirement","Estimated","Lower Bound"])

In [None]:
dev = (lowbound-rqmt)/rqmt
success_or_fail = dev < 0.0

In [None]:
n_pos = np.empty(n_ifgs)
mean_dev = np.empty(n_ifgs)
success_or_fail_total = np.empty(n_ifgs,dtype=bool)
for i in range(n_ifgs):
    dev_i = dev[i]
    dev_i_pos = dev_i[dev_i>=0.0]
    n_pos[i] = len(dev_i_pos)
    if n_pos[i] == 0:
        mean_dev[i] = 0.0
    else:
        mean_dev[i] = dev_i_pos.mean()
    if n_pos[i]<n_bins*mratio and mean_dev[i] < mdev:
        success_or_fail_total[i] = True
    else:
        success_or_fail_total[i] = False

In [None]:
success_or_fail_total_2d = np.array([success_or_fail_total])
mean_dev = np.array([mean_dev])
success_or_fail = np.hstack((success_or_fail,success_or_fail_total_2d.T))
dev = np.hstack((dev,mean_dev.T))
success_or_fail_str = [list(map(to_str, x)) for x in success_or_fail]

In [None]:
columns = []
for i in range(n_bins):
    columns.append(f'{bins[i]:.2f}-{bins[i+1]:.2f}')
columns.append('mean')

In [None]:
index = []
for i in range(len(ifgs_date)):
    index.append(ifgs_date[i,0].strftime('%Y%m%d')+'-'+ifgs_date[i,1].strftime('%Y%m%d'))

In [None]:
dev_pd = pd.DataFrame(dev,columns=columns,index=index)
success_or_fail_pd = pd.DataFrame(success_or_fail_str,columns=columns,index=index)

In [None]:
s = dev_pd.style
s.set_table_styles([  # create internal CSS classes
    {'selector': '.true', 'props': 'background-color: #e6ffe6;'},
    {'selector': '.false', 'props': 'background-color: #ffe6e6;'},
], overwrite=False)
s.set_td_classes(success_or_fail_pd)

Percentage of interferograms passes the requirement (significant level = 0.95):

In [None]:
np.count_nonzero(success_or_fail_total)/len(success_or_fail_total)

**Note**: some low-distance bins are rejected by approach 2.2 but not rejected by 2.1. For example, first bin of central valley 20190122-20190203, 78 percent of points are under the curve but rejected by approach 2.2.

The most potential reason is: for low-distance bins, the variation of the mission requirement is significant so it is not appropriate to assume points in these bins obey the same distribution.

I increase the number of bins to 50 and find the percentage of accepted interferograms by approach 2.2 increases to around 0.8 which is 0.76 before. But the phenomenon still exist. As for approach 2.1, no significant difference.