# Plot dv/v measurement from TACC Frontera case study
2023.09.01 Kurama Okubo

This notebook plot the dv/v time history of the case study done in TACC Frontera on June 2021.

In [None]:
import datetime
import os

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import matplotlib.dates as mdates
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

%matplotlib inline
import seaborn as sns 
# import importParaviewColormap
import numpy as np
import pandas as pd
import h5py

import shutil
from tqdm import tqdm

import matplotlib as mpl

os.environ['TZ'] = 'GMT' # change time zone to avoid confusion in unix_tvec conversion

plt.rcParams["font.family"] = 'Arial'
# plt.rcParams["font.sans-serif"] = "DejaVu Sans, Arial, Helvetica, Lucida Grande, Verdana, Geneva, Lucid, Avant Garde, sans-serif"
plt.rcParams["font.size"] = 12
plt.rcParams["xtick.direction"] = "in"
plt.rcParams["xtick.major.size"] = 4.75
plt.rcParams["xtick.major.width"] = 0.75
plt.rcParams["xtick.minor.size"] = 3
plt.rcParams["xtick.minor.width"] = 0.4
plt.rcParams["xtick.minor.visible"] = True

plt.rcParams["ytick.direction"] = "in"
plt.rcParams["ytick.major.size"] = 4.75
plt.rcParams["ytick.major.width"] = 0.75
plt.rcParams["ytick.minor.size"] = 3
plt.rcParams["ytick.minor.width"] = 0.4
plt.rcParams["ytick.minor.visible"] = True

plt.rcParams["savefig.transparent"] = False

plt.rcParams['axes.linewidth'] = 0.75

In [None]:
root_csv_master = "../../../Post/Modelfit/plotcsv_masterdata"
csv_stats_master_list = [root_csv_master + "/dvvdata_all_stretching.csv",
                              root_csv_master + "/dvvdata_all_mwcs.csv"]

# Convert dv/v data
We pivot the dv/v data sheet for the plotting.

In [None]:
root_csv_TACC = "../data/monitoring_stats_TACC/"
csv_stats_TACC_list = [ root_csv_TACC + "02_monitoring_stats_raw_2010-2020_stretching_practitionercorrected.csv",
                        root_csv_TACC + "03_monitoring_stats_raw_2010-2020_mwcs.csv",
                        root_csv_TACC + "04_monitoring_stats_normalized_2010-2020_stretching_practitionercorrected.csv",
                        root_csv_TACC + "05_monitoring_stats_normalized_2010-2020_mwcs.csv",
                        root_csv_TACC + "06_monitoring_stats_raw_2006-2016_stretching_practitionercorrected.csv",
                        root_csv_TACC + "07_monitoring_stats_raw_2007-2010_stretching_practitionercorrected.csv",
                        root_csv_TACC + "08_monitoring_stats_raw_2017-2020_stretching_practitionercorrected.csv",
                        root_csv_TACC + "09_monitoring_stats_raw_withoutremovalEQ_2010-2020_stretching_practitionercorrected.csv",
                        root_csv_TACC + "10_monitoring_stats_raw_robust_2010-2020_stretching_practitionercorrected.csv",
#                         root_csv_TACC + "11_monitoring_stats_raw_compute_codaQ.csv"
                       ]
len(csv_stats_TACC_list)

In [None]:
# See also Post/ModelFit/code/plotfigure_convert_dvvdata.ipynb
starttime = datetime.datetime(2002, 1, 1)
endtime = datetime.datetime(2022, 6, 1)

cc_time_unit=86400 # short-stacking time unit
averagestack_factor=30 # length of time bin to compute mean and std
averagestack_step=15

tSS = datetime.datetime(2003, 12, 22) # date of San Simeon earthquake
vlines = [datetime.datetime(2003, 12, 22), datetime.datetime(2004, 9, 28)]

cc_threshold = 0.7 # threshold of correlation coefficient
err_mwcs_eps= 2e-4 

output_datadir = "../plotcsv_TACC_masterdata"
output_imgdir = "../figure"

if not os.path.exists(output_datadir):
    os.makedirs(output_datadir)
    
if not os.path.exists(output_imgdir):
    os.makedirs(output_imgdir)

In [None]:
# csv_stats_id = 0
# csv_stats = csv_stats_TACC_list[csv_stats_id]

for csv_stats in csv_stats_TACC_list:
    
    casename = os.path.basename(csv_stats.split(".csv")[0])
    print(f"process {casename}")
    
    foname = output_datadir+f"/dvvdata_all_{casename}.csv"
    if os.path.isfile(foname):
        print(f"{foname} exists. skipping.")
        continue
    
    if "stretching" in csv_stats:
        dvvmethod = "stretching"
    elif "mwcs" in csv_stats:
        dvvmethod = "mwcs"
    else:
        print("unknown dvvmethod. skipping")
        continue
        
    print(dvvmethod)

    # make uniform time vector
    st_center = (averagestack_factor*cc_time_unit/86400)/2
    date_range_st = starttime + datetime.timedelta(days=st_center) # day
    datefreq = '%dD'%(averagestack_step*cc_time_unit/86400)
    uniformdates = pd.date_range(start=date_range_st, end=endtime, freq=datefreq)
    uniform_tvec_date = uniformdates.date
    uniform_tvec = [datetime.datetime.fromordinal(x.toordinal()) for x in uniform_tvec_date] # convert from date to datetime
    uniform_timestamp = np.array([datetime.datetime.timestamp(x) for x in uniform_tvec])


    df_origin=pd.read_csv(csv_stats, dtype=str, comment='#')

    #scan frequency band
    freqbands = np.unique(df_origin.freqband) #freqid is corresponding to the index of this band
    freqbands

    # set dtype
    df_origin['t'] = pd.to_datetime(df_origin['date'], format="%Y-%m-%dT%H:%M:%S.%f").view(int)/1e9

    if dvvmethod=="stretching":
        df_origin['cc_dvv'] = df_origin['cc_ts'].astype(float)
        df_origin['dvv'] = df_origin['dvv_ts'].astype(float)
        df_origin['err'] = df_origin['err_ts'].astype(float)

    elif dvvmethod=="mwcs":
        """
        We chose dvv_mwcs here, but you can also use dvv0_mwcs, which impose crossing at zero.
        """
        df_origin['dvv'] = (-1) * 100 * df_origin['dvv_mwcs'].astype(float) # convert the unit to [%]
        df_origin['err'] =  100 * df_origin['dvv_err_mwcs'].astype(float)
        df_origin['cc_dvv'] = np.zeros_like(df_origin['dvv'])
        df_origin['cc_dvv'].replace(0, np.nan, inplace=True)
    # elif dvvmethod=="codaQ": # We skip the result of coda Q for the moment.
    #     df_origin['cc_dvv'] = df_origin['cc_dvv'].astype(float)
    #     df_origin['dvv'] = df_origin['dvv'].astype(float)

    df_pivoted = df_origin.pivot(index='t', columns=['freqband', 'stationpair', "components"], values=['dvv', 'cc_dvv', 'err'])

    # Write the csv to the csv file
    df_pivoted.to_csv(foname)

# Plot the case study

In [None]:
sns.color_palette("colorblind")

In [None]:
cpalette_dvv=np.array(sns.color_palette("colorblind"))[:]
cpalette_dvv

In [None]:
# Read uw backup cases
df_stretching = pd.read_csv(csv_stats_master_list[0], header=[0, 1, 2, 3], index_col=0)
df_mwcs = pd.read_csv(csv_stats_master_list[1], header=[0, 1, 2, 3], index_col=0)
uniform_tvec_uw = [datetime.datetime.fromtimestamp(x) for x in df_mwcs.index.get_level_values(0)]

In [None]:
freqbands = ["0.2-0.5", "0.5-0.9", "0.9-1.2", "1.2-2.0"]

In [None]:
# Read TACC case study
for csv_stats_TACC in csv_stats_TACC_list:
#     csv_stats_TACC = csv_stats_TACC_list[0]

    casename = os.path.basename(csv_stats_TACC.split(".csv")[0])
    caseid = casename[0:2]
    finame = output_datadir+f"/dvvdata_all_{casename}.csv"
    print(f"plot {casename}")

    if "stretching" in csv_stats_TACC:
        dvvmethod = "stretching"
    elif "mwcs" in csv_stats_TACC:
        dvvmethod = "mwcs"
    else:
        print("unknown dvvmethod. skipping")
    #     continue

    df_tacc = pd.read_csv(finame, header=[0, 1, 2, 3], index_col=0)
    uniform_tvec_tacc = [datetime.datetime.fromtimestamp(x) for x in df_tacc.index.get_level_values(0)]

    fig, axs = plt.subplots(4, 1, figsize=(8, 10), sharex=False) 
    dvvdata_IsOffsetRemoval=True


    for i, freqkey in enumerate(freqbands):

        #---Extract the date with the freqkey---#
        df_dvvfreq_stretching = df_stretching.loc[:, "dvv"].loc[:, df_stretching.loc[:, "dvv"].columns.get_level_values("freqband")==freqkey]
        df_ccfreq_stretching = df_stretching.loc[:, "cc_dvv"].loc[:, df_stretching.loc[:, "cc_dvv"].columns.get_level_values("freqband")==freqkey]
        df_errfreq_stretching = df_stretching.loc[:, "err"].loc[:, df_stretching.loc[:, "err"].columns.get_level_values("freqband")==freqkey]

        df_dvvfreq_mwcs = df_mwcs.loc[:, "dvv"].loc[:, df_mwcs.loc[:, "dvv"].columns.get_level_values("freqband")==freqkey]
        df_ccfreq_mwcs = df_mwcs.loc[:, "cc_dvv"].loc[:, df_mwcs.loc[:, "cc_dvv"].columns.get_level_values("freqband")==freqkey]
        df_errfreq_mwcs = df_mwcs.loc[:, "err"].loc[:, df_mwcs.loc[:, "err"].columns.get_level_values("freqband")==freqkey]

        df_dvvfreq_tacc = df_tacc.loc[:, "dvv"].loc[:, df_tacc.loc[:, "dvv"].columns.get_level_values("freqband")==freqkey]
        df_ccfreq_tacc = df_tacc.loc[:, "cc_dvv"].loc[:, df_tacc.loc[:, "cc_dvv"].columns.get_level_values("freqband")==freqkey]
        df_errfreq_tacc = df_tacc.loc[:, "err"].loc[:, df_tacc.loc[:, "err"].columns.get_level_values("freqband")==freqkey]

        #---Threshold out the dvv with cc and err for the cases with stretching and mwcs, respectively---#
        df_dvvfreq_stretching.mask(df_ccfreq_stretching < cc_threshold, inplace=True)
        df_dvvfreq_mwcs.mask(df_errfreq_mwcs > err_mwcs_eps, inplace=True)

        if dvvmethod == "stretching":
            df_dvvfreq_tacc.mask(df_ccfreq_tacc < cc_threshold, inplace=True)
        else:
            df_dvvfreq_tacc.mask(df_errfreq_tacc > err_mwcs_eps, inplace=True)


        #---Compute stats---#
        df_dvvfreq_stretching_stats = df_dvvfreq_stretching.quantile([0.25, 0.5, 0.75], axis=1).T # 1st, 2nd, and 3rd quartiles
        df_dvvfreq_stretching_stats.loc[:, "mean"] = df_dvvfreq_stretching.mean(skipna=True, axis=1).T #mean
        df_dvvfreq_stretching_stats.loc[:, "std"] = df_dvvfreq_stretching.std(skipna=True, axis=1).T #std
        df_dvvfreq_stretching_stats.loc[:, "count"] = df_dvvfreq_stretching.count(axis=1).T #number of data with each time window

        df_dvvfreq_mwcs_stats = df_dvvfreq_mwcs.quantile([0.25, 0.5, 0.75], axis=1).T # 1st, 2nd, and 3rd quartiles
        df_dvvfreq_mwcs_stats.loc[:, "mean"] = df_dvvfreq_mwcs.mean(skipna=True, axis=1).T #mean
        df_dvvfreq_mwcs_stats.loc[:, "std"] = df_dvvfreq_mwcs.std(skipna=True, axis=1).T #std
        df_dvvfreq_mwcs_stats.loc[:, "count"] = df_dvvfreq_mwcs.count(axis=1).T #number of data with each time window

        df_dvvfreq_tacc_stats = df_dvvfreq_tacc.quantile([0.25, 0.5, 0.75], axis=1).T # 1st, 2nd, and 3rd quartiles
        df_dvvfreq_tacc_stats.loc[:, "mean"] = df_dvvfreq_tacc.mean(skipna=True, axis=1).T #mean
        df_dvvfreq_tacc_stats.loc[:, "std"] = df_dvvfreq_tacc.std(skipna=True, axis=1).T #std
        df_dvvfreq_tacc_stats.loc[:, "count"] = df_dvvfreq_tacc.count(axis=1).T #number of data with each time window


        #---plot dvv---#
        if dvvdata_IsOffsetRemoval:
            offset_ind = np.where(np.array(uniform_tvec_uw) < tSS)
            offset_data1 = np.nanmean(df_dvvfreq_stretching_stats[0.5].values[offset_ind]) # offset is evaluated with the mean of median dvv
            offset_data2 = np.nanmean(df_dvvfreq_mwcs_stats[0.5].values[offset_ind]) # offset is evaluated with the mean of median dvv
            offset_data3 = np.nanmean(df_dvvfreq_tacc_stats[0.5].values[offset_ind]) # offset is evaluated with the mean of median dvv
        else:
            offset_data1 = 0
            offset_data2 = 0
            offset_data3 = 0

        # Plot stretching
        if not i == 0: # skip 0.2-0.5 Hz        
            axs[i].plot(uniform_tvec_uw, df_dvvfreq_stretching_stats[0.5].values-offset_data1, "-", c=cpalette_dvv[3], label="stretching UW", zorder=6, lw=1.4) # median
            ybot1 =  df_dvvfreq_stretching_stats[0.25].values-offset_data1 #  1st quartile
            ytop1 =  df_dvvfreq_stretching_stats[0.75].values-offset_data1 #  3rd quartile
            axs[i].fill_between(uniform_tvec_uw, ybot1, ytop1, color=cpalette_dvv[3], alpha=0.1, label="", ec="k", lw=0.5, zorder=5)
            axs[i].plot(uniform_tvec_uw, ybot1, c=cpalette_dvv[3], alpha=0.1, lw=0.7, ls="-")
            axs[i].plot(uniform_tvec_uw, ytop1, c=cpalette_dvv[3], alpha=0.1, lw=0.7, ls="-")
            axs[i].legend(loc=4, labelcolor="k", fontsize=11)

        # mwcs
        axs[i].plot(uniform_tvec_uw, df_dvvfreq_mwcs_stats[0.5].values-offset_data2, "-", c=cpalette_dvv[0], label="mwcs UW", zorder=6, lw=1.4) # median
        ybot2 =  df_dvvfreq_mwcs_stats[0.25].values-offset_data2 #  1st quartile
        ytop2 =  df_dvvfreq_mwcs_stats[0.75].values-offset_data2 #  3rd quartile
        axs[i].fill_between(uniform_tvec_uw, ybot2, ytop2, color=cpalette_dvv[0], alpha=0.1, label="", ec="k", lw=0.5, zorder=5)
        axs[i].plot(uniform_tvec_uw, ybot2, c=cpalette_dvv[0], alpha=0.1, lw=0.7, ls="-")
        axs[i].plot(uniform_tvec_uw, ytop2, c=cpalette_dvv[0], alpha=0.1, lw=0.7, ls="-")
        axs[i].legend(loc=4, labelcolor="k", fontsize=11)
        axs[i].axhline(0, c="k", lw=0.75, ls="--", zorder=-5)

        # case study of tacc
        if (dvvmethod == "stretching") & (i==0):
            print("stretching with 0.2-0.5Hz. skip")
        else:
            axs[i].plot(uniform_tvec_tacc, df_dvvfreq_tacc_stats[0.5].values-offset_data3, "-", c="k", label=f"{dvvmethod} TACC case{caseid}", zorder=6, lw=1.4) # median
            ybot3 =  df_dvvfreq_tacc_stats[0.25].values-offset_data3 #  1st quartile
            ytop3 =  df_dvvfreq_tacc_stats[0.75].values-offset_data3 #  3rd quartile
            axs[i].fill_between(uniform_tvec_tacc, ybot3, ytop3, color="k", alpha=0.1, label="", ec="k", lw=0.5, zorder=5)
            axs[i].plot(uniform_tvec_tacc, ybot3, c="k", alpha=0.1, lw=0.7, ls="-")
            axs[i].plot(uniform_tvec_tacc, ytop3, c="k", alpha=0.1, lw=0.7, ls="-")
            axs[i].legend(loc=4, labelcolor="k", fontsize=11)
            axs[i].axhline(0, c="k", lw=0.75, ls="--", zorder=-5)


        xfmt = dates.DateFormatter('%Y')

        axs[i].set_xlim(starttime, endtime)
        axs[i].grid(True, axis="both", lw=0.5, c=[0.8, 0.8, 0.8], alpha=0.4, zorder=-20)
    #     plt.setp(axs[i].get_xticklabels(), rotation=30, ha="right", rotation_mode="anchor")

        # plot the date of San Simeon and Parkfield eq.
        for vline in vlines:
            axs[i].axvline(vline,
                        color='k',
                        linewidth=1.0,
                        linestyle='--',
                        zorder=2)

        axs[i].set_ylim(-0.25, 0.15)
        axs[i].set_yticks(np.linspace(-0.20, 0.15, 8))
        axs[i].set_ylabel("dv/v [%]")
        # period of unstable dv/v
        vlines_unstable = [datetime.datetime(2017, 1, 1), datetime.datetime(2017, 6, 1)]
        axs[i].fill_betweenx([-0.3, 0.3], vlines_unstable[0], vlines_unstable[1], fc="gray", alpha=0.7, ls="-", zorder=-10)

        # annotate reference period
    #     refy = -0.3
    #     axs[i].annotate('', xy=(refstarttime, refy), xytext=(refendtime, refy), arrowprops=dict(arrowstyle='<->'), annotation_clip=False)
    #     axs[i].text(datetime.datetime(2016,6,1), -0.31, "reference stack period", ha="center", va='top')
    #     # axs[1].plot([refstarttime, refendtime], [-0.3, -0.3], clip_on=False,)


        axs[i].set_title(f'{freqkey}Hz')

    plt.tight_layout()
    fig.suptitle(f'{casename}', x=0.53)
    fig.tight_layout(rect=[0,0,1,1])

    plt.subplots_adjust(hspace=0.35)
    fig.align_ylabels()

    foname = (output_imgdir+f"/dvv_TACC_casestudy_comparison_{casename}.png")
    plt.savefig(foname, dpi=80, bbox_inches='tight')
#     plt.clf()
#     plt.close()