# Plot acceptance ratio of the selective stack
2024.01.05 Kurama Okubo

This notebook plots the acceptance ratio of the selective stack for the rebuttal letter. 

- 2024.1.22 update to download the dataframe from dasway

In [1]:
import datetime
import os

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import matplotlib.dates as mdates
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

from scipy import stats

%matplotlib inline
import seaborn as sns 
# import importParaviewColormap
import numpy as np
import pandas as pd
import h5py
import requests

import shutil
from tqdm import tqdm

import matplotlib as mpl

os.environ['TZ'] = 'GMT' # change time zone to avoid confusion in unix_tvec conversion

plt.rcParams["font.family"] = 'Arial'
# plt.rcParams["font.sans-serif"] = "DejaVu Sans, Arial, Helvetica, Lucida Grande, Verdana, Geneva, Lucid, Avant Garde, sans-serif"
plt.rcParams["font.size"] = 12
plt.rcParams["xtick.direction"] = "in"
plt.rcParams["xtick.major.size"] = 4.75
plt.rcParams["xtick.major.width"] = 0.75
plt.rcParams["xtick.minor.size"] = 3
plt.rcParams["xtick.minor.width"] = 0.4
plt.rcParams["xtick.minor.visible"] = True

plt.rcParams["ytick.direction"] = "in"
plt.rcParams["ytick.major.size"] = 4.75
plt.rcParams["ytick.major.width"] = 0.75
plt.rcParams["ytick.minor.size"] = 3
plt.rcParams["ytick.minor.width"] = 0.4
plt.rcParams["ytick.minor.visible"] = True

plt.rcParams["savefig.transparent"] = True

plt.rcParams['axes.linewidth'] = 0.75

In [2]:
#---set the file path of your case study list---#
csv_stats_id = 0

starttime = datetime.datetime(2002, 1, 1)
endtime = datetime.datetime(2022, 6, 1)

cc_time_unit=86400 # short-stacking time unit
averagestack_factor=30 # length of time bin to compute mean and std
averagestack_step=15

output_imgdir = "./figure/"
#-------------------------------------------#

if not os.path.exists(output_imgdir):
    os.makedirs(output_imgdir)

# Download and read the acceptance ratio from the pandas dataframe

We download the dataframe from dasway to plot the acceptance ratio of selective stack.

In [3]:
# reference: https://stackoverflow.com/a/62113293
def download(url: str, fname: str):
    resp = requests.get(url, stream=True)
    total = int(resp.headers.get('content-length', 0))
    # Can also replace 'file' with a io.BytesIO object
    with open(fname, 'wb') as file, tqdm(
        desc=fname,
        total=total,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in resp.iter_content(chunk_size=1024):
            size = file.write(data)
            bar.update(size)

In [4]:
url=f'https://dasway.ess.washington.edu/shared/kokubo/parkfield_selectivestack_acceptanceratio/monitoring_stats_uwbackup_2010-2022_mwcs_onlyacceptratio.h5' # download the data from dasway
foname='./monitoring_stats_uwbackup_2010-2022_mwcs_onlyacceptratio.h5'
download(url, foname)

./monitoring_stats_uwbackup_2010-2022_mwcs_onlyacceptratio.h5: 100%|█| 43.6M/43.


In [5]:
df_origin=pd.read_hdf("./monitoring_stats_uwbackup_2010-2022_mwcs_onlyacceptratio.h5")


In [6]:
# # # Extract the accept ratio from the data csv
# # We extract the accept ratio from the data sheet to reduce the csv file size. We already converted from the csv to the h5 dataframe.

# root_csv = "./"
# csv_stats_list = [root_csv + "/monitoring_stats_uwbackup_2010-2022_mwcs_acceptratio.csv"]

# df_origin=pd.read_csv(csv_stats, dtype=str, comment='#')

# df_origin_trimmed = df_origin.drop(columns=['dvv_mwcs', 'dvv0_mwcs', 'dvv_err_mwcs', 'dvv0_err_mwcs', ])

# df_origin_trimmed.to_csv("./monitoring_stats_uwbackup_2010-2022_mwcs_onlyacceptratio.csv")

# df_origin_trimmed.to_hdf('monitoring_stats_uwbackup_2010-2022_mwcs_onlyacceptratio.h5', key='df', mode='w') 

In [7]:
df_origin.head()

Unnamed: 0,date,stationpair,networks,components,freqband,acceptratio
0,2002-05-16T00:00:00.0,BP.CCRB-BP.CCRB,BP-BP,11,0.2-0.5,1.0
1,2002-05-16T00:00:00.0,BP.CCRB-BP.CCRB,BP-BP,11,0.5-0.9,1.0
2,2002-05-16T00:00:00.0,BP.CCRB-BP.CCRB,BP-BP,11,0.9-1.2,1.0
3,2002-05-16T00:00:00.0,BP.CCRB-BP.CCRB,BP-BP,11,1.2-2.0,1.0
4,2002-05-31T00:00:00.0,BP.CCRB-BP.CCRB,BP-BP,11,0.2-0.5,1.0


In [8]:
#scan frequency band
freqbands = np.unique(df_origin.freqband) #freqid is corresponding to the index of this band
freqbands

array(['0.2-0.5', '0.5-0.9', '0.9-1.2', '1.2-2.0'], dtype=object)

In [9]:
df_origin['t'] = pd.to_datetime(df_origin['date'], format="%Y-%m-%dT%H:%M:%S.%f").view(int)/1e9


In [10]:
df_acceptratio = df_origin.pivot(index='t', columns=['freqband', 'stationpair', "components"], values=['acceptratio'])

In [11]:
df_acceptratio.head()

Unnamed: 0_level_0,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio,acceptratio
freqband,0.2-0.5,0.5-0.9,0.9-1.2,1.2-2.0,0.2-0.5,0.5-0.9,0.9-1.2,1.2-2.0,0.2-0.5,0.5-0.9,...,0.9-1.2,1.2-2.0,0.2-0.5,0.5-0.9,0.9-1.2,1.2-2.0,0.2-0.5,0.5-0.9,0.9-1.2,1.2-2.0
stationpair,BP.CCRB-BP.CCRB,BP.CCRB-BP.CCRB,BP.CCRB-BP.CCRB,BP.CCRB-BP.CCRB,BP.CCRB-BP.CCRB,BP.CCRB-BP.CCRB,BP.CCRB-BP.CCRB,BP.CCRB-BP.CCRB,BP.CCRB-BP.CCRB,BP.CCRB-BP.CCRB,...,BP.VCAB-BP.VCAB,BP.VCAB-BP.VCAB,BP.VCAB-BP.VCAB,BP.VCAB-BP.VCAB,BP.VCAB-BP.VCAB,BP.VCAB-BP.VCAB,BP.VCAB-BP.VCAB,BP.VCAB-BP.VCAB,BP.VCAB-BP.VCAB,BP.VCAB-BP.VCAB
components,11,11,11,11,12,12,12,12,13,13,...,31,31,32,32,32,32,33,33,33,33
t,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
1011139000.0,,,,,,,,,,,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1012435000.0,,,,,,,,,,,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1013731000.0,,,,,,,,,,,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1015027000.0,,,,,,,,,,,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1016323000.0,,,,,,,,,,,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Plot acceptance ratio of selective stack

In [12]:
cpalette_freq=np.array(sns.color_palette("colorblind"))[:]


In [13]:
bins = np.linspace(0, 105, 22) - 2.5
bins

array([ -2.5,   2.5,   7.5,  12.5,  17.5,  22.5,  27.5,  32.5,  37.5,
        42.5,  47.5,  52.5,  57.5,  62.5,  67.5,  72.5,  77.5,  82.5,
        87.5,  92.5,  97.5, 102.5])

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 4.5))

rejection_rate_all = []
average_rejection_all = []

for i, freqkey in enumerate(freqbands):
    
    df_acceptratio_freq = df_acceptratio.loc[:, df_acceptratio.loc[:, "acceptratio"].columns.get_level_values("freqband")==freqkey]
    acceptratio_arr = df_acceptratio_freq.to_numpy().flatten().astype(float) * 100 # scale to percentage
    acceptratio_arr = acceptratio_arr[~np.isnan(acceptratio_arr)] # remove nan
    weights = np.ones_like(acceptratio_arr) / len(acceptratio_arr)

    ax.hist(acceptratio_arr, bins=bins, range=(0, 100), density=False, weights=weights,
           ec="k", fc=cpalette_freq[i], alpha=0.2, label=f"{freqkey}Hz")

    # compute the fraction of CFs with at least one rejection
    rejection_rate_all.append(len(acceptratio_arr[acceptratio_arr < 99]) / len(acceptratio_arr))

    # compute average rejection
    acceptratio_arr_wrejection = acceptratio_arr[acceptratio_arr < 99]
    average_rejection = np.mean(acceptratio_arr_wrejection)
    average_rejection_all.append(average_rejection)
    ax.axvline(average_rejection, c=cpalette_freq[i], ls="--")

# annotate
ax.text(15, 0.35, "Mean acceptance ratio\nwithout the cases of 100%")

ax.legend(loc=0)
ax.set_xlabel("Acceptance ratio of selective stack [%]")
ax.set_ylabel("Frequency")
ax.set_xlim([-7.5, 107.5])
ax.set_ylim([0, 0.8])
plt.tight_layout()


foname = (output_imgdir+f"/selectivestack_acceptratio.jpg")
plt.savefig(foname, dpi=150)

foname = (output_imgdir+f"/selectivestack_acceptratio.eps")
plt.savefig(foname)

In [15]:
rejection_rate_all

[0.3335481201878897,
 0.31584318924666255,
 0.2888936969541999,
 0.30349465292694916]

In [16]:
average_rejection_all

[61.579300064340075, 60.14367722013328, 60.74337364333606, 61.38320448176379]

# Summary

We evaluated the acceptance ratio of the selective stack, which is (number of daily CFs satisfying the threshold of CC)/Available CFs, showing that 70% of the monthly stack accepts all the daily CFs. The 30% stacks contain the thresholded CFs with a mean acceptance ratio of 60%. This threshold thus helps stabilize a part of monthly stacks. 