Following Python jupyter notebook presents the workflow for Correlation Coefficient analysis as
described in Hypothesis Testing Section of Chapter 4 (Example for 500 simulations of Earthquake
Count).

Scipt is used to calculate correlation (CC) coefficients between vectors corresponding to Observed
seismicity catalogues and Perturbed catalogues and Null/alternative hypotheses. In the next step,
Kullback-Leibler Divergence, Jensen-Shannon Divergence, and Kolmogorov-Smirnof Statistics based
on correlation coefficients distribution for perturbed catalogs and Null, A, B and C hypothesis
Input data include calculated values of Earthquake Count and Total Seismic Moment Release. 

It
can be run in Jupyter notebook or Jupyter Lab (https://jupyter.org/)

In [60]:
# Load libraries
import numpy as np
import pandas as pd
import seaborn as sns
import string
from scipy import stats
from scipy.spatial.distance import jensenshannon as js
from scipy.stats import entropy
import glob

In [61]:
### Specify number of simulations
nsims = 500

### Load results
### Earthquake Count
real_vals = pd.read_csv('results/EQCountResults/EQCount_Obs.csv', engine='python')
simH0 = pd.read_csv('results/EQCountResults/EQCount_testH0.csv', engine='python')
simH0 = simH0.filter(regex='sim_')
filesTests = ['results/EQCountResults/EQCount_testA.csv',
              'results/EQCountResults/EQCount_testB.csv',
              'results/EQCountResults/EQCount_testC.csv']
dfValsPert = pd.read_csv('results/EQCountResults/EQCount_Pert.csv', engine='python')

In [73]:
"""
### Total Seismic Moment Release
real_vals = pd.read_csv('results/M0CountResults/M0Count_Obs.csv', engine='python')
simH0 = pd.read_csv('results/M0CountResults/M0Count_testH0.csv', engine='python')
simH0 = simH0.filter(regex='sim_')
filesTests = ['results/M0CountResults/M0Count_testA.csv',
              'results/M0CountResults/M0Count_testB.csv',
              'results/M0CountResults/M0Count_testC.csv']
dfValsPert = pd.read_csv('results/M0CountResults/M0Count_Pert.csv', engine='python')
"""

"\n### Total Seismic Moment Release\nreal_vals = pd.read_csv('results/M0CountResults/M0Count_Obs.csv', engine='python')\nsimH0 = pd.read_csv('results/M0CountResults/M0Count_testH0.csv', engine='python')\nsimH0 = simH0.filter(regex='sim_')\nfilesTests = ['results/M0CountResults/M0Count_testA.csv',\n              'results/M0CountResults/M0Count_testB.csv',\n              'results/M0CountResults/M0Count_testC.csv']\ndfValsPert = pd.read_csv('results/M0CountResults/M0Count_Pert.csv', engine='python')\n"

In [74]:
### Create empty dictionary to store results
dRes = {}
### Set correlation mode
correlation_mode = 'same'

Correlation Analysis

In [77]:
### Null hypothesis Catalogs Analysis
CCH0List = []

for i in range(0, nsims): #H0.shape[1]):
    in1Vector = simH0.values[:,i]
    in2Vector = real_vals.sim_0.values
    CCH0 = np.corrcoef(in1Vector, in2Vector)[0][1]
    CCH0List.append(CCH0)

### Perturbed Catalogs analysis
dfValsPert = dfValsPert.filter(regex='sim_')

CCPertList = []

for i in range(0, nsims): #Pert.shape[1]):
    in1Vector = dfValsPert.values[:,i]
    in2Vector = real_vals.sim_0.values
    CCPert = np.corrcoef(in1Vector, in2Vector)[0][1]
    CCPertList.append(CCPert)

    ### Kullback-Leibler & Jensen-Shannon divergences + Kolmogorov-smirnov statistic calculation
### for Null hypothesis
KLDH0 = entropy(CCPertList, CCH0List) #qk=None => Kullback Leibler divergence
JSDH0 = js(CCPertList, CCH0List) # Jensen-Shannon divergence
KSStatH0 = stats.ks_2samp(CCPertList, CCH0List, alternative='two-sided')[0] #Kolmogorov-Smirnov statistic

### Append results of each test to dictionary
dRes['H0'] = {}
dRes['H0'] = {"CC": CCH0List,
              "JSD":JSDH0,
              "KLD":KLDH0,
              "KSstat":KSStatH0
             }

### Synthetic Catalogs analysis
### Iterate through A, B, C hypotheses
for filenr, file in zip(string.ascii_uppercase, filesTests):
    CCSimList = []
    dfValsTest = pd.read_csv(file)
    dfValsTest = dfValsTest.filter(regex='sim_')

    for i in range(0, nsims):
        in1Vector = dfValsTest.values[:,i]
        in2Vector = real_vals.sim_0.values
        CCSim = np.corrcoef(in1Vector, in2Vector)[0][1]
        CCSimList.append(CCSim) ##
        
### Kullback-Leibler & Jensen-Shannon divergences + Kolmogorov-smirnov statistic calculation
### for alternative hypothesis
    KSStat = stats.ks_2samp(CCPertList, CCSimList, alternative='two-sided')[0]
    JSD = js(CCPertList, CCSimList)
    KLD = entropy(CCPertList, CCSimList)

    ### Append results of each test to dictionary
    dRes[filenr] = {"KLD":KLD,
                    "JSD":JSD,
                    "KSstat":KSStat,
                    "CC":CCSimList}
dRes['Pert'] = {"CC": CCPertList}

In [78]:
dfResults = pd.DataFrame(dRes)
dfResultsABC = dfResults[['A','B','C', 'H0','Pert']]
dfResultsABC = dfResultsABC.transpose()

dfResultsABC.loc['A','Mean Pearson Correlation Coef.'] = np.mean(dRes['A']['CC'])
dfResultsABC.loc['B','Mean Pearson Correlation Coef.'] = np.mean(dRes['B']['CC'])
dfResultsABC.loc['C','Mean Pearson Correlation Coef.'] = np.mean(dRes['C']['CC'])
dfResultsABC.loc['Pert','Mean Pearson Correlation Coef.'] = np.mean(dRes['Pert']['CC'])
dfResultsABC.loc['H0','Mean Pearson Correlation Coef.'] = np.mean(dRes['H0']['CC'])
dfResultsABC.loc['A','Mean Pearson Correlation Coef. std'] = np.std(dRes['A']['CC'])
dfResultsABC.loc['B','Mean Pearson Correlation Coef. std'] = np.std(dRes['B']['CC'])
dfResultsABC.loc['C','Mean Pearson Correlation Coef. std'] = np.std(dRes['C']['CC'])
dfResultsABC.loc['Pert','Mean Pearson Correlation Coef. std'] = np.std(dRes['Pert']['CC'])
dfResultsABC.loc['H0','Mean Pearson Correlation Coef. std'] = np.std(dRes['H0']['CC'])
dfResultsABCH0 = dfResultsABC[['KLD', 'JSD', 'KSstat',
                               'Mean Pearson Correlation Coef.',
                               'Mean Pearson Correlation Coef. std']]


In [79]:
dfResultsABCH0

Unnamed: 0,KLD,JSD,KSstat,Mean Pearson Correlation Coef.,Mean Pearson Correlation Coef. std
A,0.184418,0.2085,0.962,0.263922,0.166763
B,0.177624,0.203343,0.944,0.300858,0.178898
C,0.162241,0.195076,0.918,0.34157,0.19322
H0,0.316223,0.267511,0.976,0.171681,0.146112
Pert,,,,0.844249,0.05809
