# Plot stats of dv/v model parameters with MCMC.

2023.4.4 Kurama Okubo

Plot master figures of MCMC inversion.

- 2023/04/19 update debugging `dvvmodel` and use modelparam saved in pickle. Correct the number of data in AIC and BIC as N = Ntot - Nnan. 
- Unify the offset of observation to that of model to take into account the model parameter of 'a0'.
- Unify the order of dv/v plot with the mwcs wlin case

In [28]:
# %load_ext autoreload
# %autoreload 2

import datetime
import os
import time

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import matplotlib.dates as mdates
%matplotlib inline
from IPython.display import display, Math

import numpy as np
import pandas as pd
import h5py
import pickle

import shutil
from tqdm import tqdm
import glob

import matplotlib as mpl

import emcee # MCMC sampler
import corner

# import mcmc functions
from scattermatrix import *
from MCMC_func import *

plt.rcParams["font.family"] = 'Arial'
# plt.rcParams["font.sans-serif"] = "DejaVu Sans, Arial, Helvetica, Lucida Grande, Verdana, Geneva, Lucid, Avant Garde, sans-serif"
plt.rcParams["font.size"] = 12
plt.rcParams["xtick.direction"] = "in"
plt.rcParams["xtick.major.size"] = 5
plt.rcParams["xtick.major.width"] = 0.5
plt.rcParams["xtick.minor.size"] = 2
plt.rcParams["xtick.minor.width"] = 1
plt.rcParams["xtick.minor.visible"] = True


plt.rcParams["ytick.direction"] = "in"
plt.rcParams["ytick.major.size"] = 5
plt.rcParams["ytick.major.width"] = 0.5
plt.rcParams["ytick.minor.size"] = 2
plt.rcParams["ytick.minor.width"] = 1
plt.rcParams["ytick.minor.visible"] = True

# Plot all dv/v fitting time history

In [29]:
# load the data
dvvmethod = "stretching"
modelcase = "wlin"

modelparam_indir = "../modelparam_data"
freqband = "0.9-1.2"

residu_absmean_thresh = 0.05
residu_var_thresh = 0.002

outputdatadir = "../data_stats"
output_imgdir = "../figure_stats"

if not os.path.exists(outputdatadir):
    os.makedirs(outputdatadir)
    
if not os.path.exists(output_imgdir):
    os.makedirs(output_imgdir)


In [30]:
# search the modelparam files
fi_list = np.sort(glob.glob(modelparam_indir+f"/MCMC_modelparam_*_{dvvmethod}_{freqband}Hz_{modelcase}.pickle"))

In [31]:
meta_dict = dict()
dvv_dict = dict()

# store meta data
meta_dict["dvvmethod"] = dvvmethod
meta_dict["modelcase"] = modelcase
meta_dict["freqband"] = freqband

if meta_dict["modelcase"] == "base":
    meta_dict["ndim"] = 12
elif meta_dict["modelcase"] == "wlin":
    meta_dict["ndim"] = 13
else:
    print("model case not known.")
    
meta_dict["modelkeys"] = get_keys(meta_dict["modelcase"])
# read metadata

finame_meta = fi_list[0]
with open(finame_meta, 'rb') as fi:
    modelparam = pickle.load(fi)
    meta_dict["unix_tvec"] = modelparam["unix_tvec"]
    meta_dict["uniform_tvec"] = modelparam["uniform_tvec"]
    meta_dict["fitting_period_ind"] = modelparam["fitting_period_ind"]
    meta_dict["unix_tSS"] = modelparam["unix_tSS"]
    meta_dict["unix_tPF"] = modelparam["unix_tPF"]
    
meta_dict.keys()

dict_keys(['dvvmethod', 'modelcase', 'freqband', 'ndim', 'modelkeys', 'unix_tvec', 'uniform_tvec', 'fitting_period_ind', 'unix_tSS', 'unix_tPF'])

In [32]:
# read dv/v data and compute the model using best model parameters
stationpair_list=[]

for finame in fi_list:

    stationpair = os.path.basename(finame).split("_")[2]
    stationpair_list.append(stationpair)
    
    with open(finame, 'rb') as fi:
        modelparam_st = pickle.load(fi)
        
        # Reassemble maximum likelihood model paramters
        theta0 = modelparam_st["maxlnprob_theta"]
        if modelparam_st["fixparam01"] == True:
            # fix the aprecip, log10tmin1 and log10tmin2
            if  modelcase=="base":
                theta = np.concatenate((theta0[0:2], [modelparam_st["a_{precip}_fixed"]], theta0[2:5], [modelparam_st["log10tmin1_fixed"]],
                                  theta0[5:7], [modelparam_st["log10tmin2_fixed"]], theta0[7:9]), axis=None)
            elif modelcase=="wlin":
                theta = np.concatenate((theta0[0:2], [modelparam_st["a_{precip}_fixed"]], theta0[2:5], [modelparam_st["log10tmin1_fixed"]],
                                  theta0[5:7], [modelparam_st["log10tmin2_fixed"]], theta0[7:10]), axis=None)
        else:
            theta = theta0


        dvv_dict[stationpair+"/dvv_data"] = modelparam_st["dvv_data"]
        dvv_dict[stationpair+"/err_data"] = modelparam_st["err_data"]
        dvv_dict[stationpair+"/maxlnprob_theta"] = theta
        
        # compute the model
        if meta_dict["modelcase"] == "base":    
            model = model_base(theta, all=False, **modelparam_st)

        elif meta_dict["modelcase"] == "wlin":
            model = model_wlin(theta, all=False, **modelparam_st)

        dvv_dict[stationpair+"/dvv_model"] = model

In [33]:
# plt.plot(modelparam_st["uniform_tvec"], modelparam_st["precip"], "r-" )
# plt.plot(modelparam_st["uniform_tvec"], modelparam_st["CAVG"], "b-" )

# Compute residuals, AIC and BIC

In [34]:
# make a dataframe to store the model parameters and AIC, BIC
df_dvvstats_all = pd.DataFrame(columns=["dvvmethod", "modelcase", "freqband",  'a0', 'p1', 'a_{precip}', 'p2', 't_{shiftdays}',
                'S1', 'log10tmin1', 'log10tmax1', 'S2', 'log10tmin2','log10tmax2', 'b_{lin}', 'logf', 'AIC', 'BIC', 'residu_absmean', 'residu_var'])

for st in np.sort(stationpair_list):
    # st = stationpair_list[21]

    dvv_data = dvv_dict[st+"/dvv_data"]
    dvv_model = dvv_dict[st+"/dvv_model"]
    maxlnprob_theta = dvv_dict[st+"/maxlnprob_theta"]

    residu_absmean = np.nanmean(np.abs(dvv_model-dvv_data))
    residu_var = np.nanvar(dvv_model-dvv_data)

    # compute AIC and BIC
    AIC = compute_AIC(dvv_data*1e-2, dvv_model*1e-2, meta_dict["ndim"]) # convert from % to nondim
    BIC = compute_BIC(dvv_data*1e-2, dvv_model*1e-2, meta_dict["ndim"]) # convert from % to nondim

    stdict = {"dvvmethod":dvvmethod, "modelcase":modelcase, "freqband":freqband, "AIC": AIC, "BIC": BIC,
              "residu_absmean": residu_absmean, "residu_var": residu_var}
    
    # store model parameters
    for i, key in enumerate(meta_dict["modelkeys"]):
        stdict[key] = maxlnprob_theta[i]

    df_st = pd.DataFrame(stdict, index=[st])

    df_dvvstats_all = pd.concat([df_dvvstats_all, df_st])

    # plt.plot(dvv_data)
    # plt.plot(dvv_model)

In [35]:
df_dvvstats_all

Unnamed: 0,dvvmethod,modelcase,freqband,a0,p1,a_{precip},p2,t_{shiftdays},S1,log10tmin1,log10tmax1,S2,log10tmin2,log10tmax2,b_{lin},logf,AIC,BIC,residu_absmean,residu_var
BP.CCRB-BP.CCRB,stretching,wlin,0.9-1.2,-0.050196,-0.001438,0.02426,0.001343,1.970812,0.011752,6.293,7.944015,0.024233,6.996,8.922855,1.2e-05,-4.044325,-7597.281362,-7543.774765,0.017204,0.000491
BP.CCRB-BP.EADB,stretching,wlin,0.9-1.2,-0.042284,-0.004332,0.02426,-0.000183,38.992332,0.01552,6.293,8.454239,0.045176,6.996,8.617927,1.1e-05,-4.199614,-7131.789862,-7078.66179,0.020481,0.000856
BP.CCRB-BP.FROB,stretching,wlin,0.9-1.2,-0.002084,-0.00333,0.02426,-0.000455,87.716885,0.006939,6.293,11.684897,0.038731,6.996,8.20789,1.1e-05,-7.32129,-5630.744642,-5580.628707,0.019857,0.000912
BP.CCRB-BP.JCSB,stretching,wlin,0.9-1.2,-0.005139,-0.007616,0.02426,-0.001434,89.68927,0.006426,6.293,10.136991,0.013955,6.996,9.10844,9e-06,-9.559013,-6119.589574,-6067.603398,0.034734,0.00237
BP.CCRB-BP.LCCB,stretching,wlin,0.9-1.2,-0.020279,-0.003624,0.02426,-0.00045,5.782466,0.008481,6.293,7.565929,0.01779,6.996,9.902057,1.3e-05,-4.255874,-7288.278229,-7235.150158,0.018931,0.000602
BP.CCRB-BP.MMNB,stretching,wlin,0.9-1.2,-0.034732,-0.003584,0.02426,-0.000724,0.549409,0.015478,6.293,8.647866,0.035215,6.996,8.624071,9e-06,-3.862281,-6883.447112,-6830.831274,0.020956,0.000801
BP.CCRB-BP.SCYB,stretching,wlin,0.9-1.2,-0.049128,-0.001308,0.02426,0.001106,44.032674,0.005336,6.293,8.433381,0.044076,6.996,8.4071,1e-05,-4.598888,-7534.431223,-7481.332731,0.013858,0.000331
BP.CCRB-BP.SMNB,stretching,wlin,0.9-1.2,-0.066075,-0.002519,0.02426,0.001101,0.924116,0.00789,6.293,7.692836,0.059297,6.996,8.0654,1.3e-05,-4.064846,-7060.528651,-7007.851492,0.017528,0.000573
BP.CCRB-BP.VCAB,stretching,wlin,0.9-1.2,-0.028684,-0.00294,0.02426,-0.000979,86.547785,0.004728,6.293,11.834803,0.082203,6.996,7.574138,1.2e-05,-3.377314,-6650.225962,-6597.306371,0.031819,0.002003
BP.EADB-BP.EADB,stretching,wlin,0.9-1.2,-0.039783,-0.002951,0.02426,-0.000563,54.279138,0.012663,6.293,8.571501,0.030789,6.996,8.930737,1.3e-05,-4.431401,-8319.352217,-8264.824947,0.015938,0.000401


In [36]:
df_dvvstats_all.to_csv(outputdatadir+"/MCMC_modelparam_all_{}_{}_{}.csv".format(dvvmethod, freqband, modelcase))