In [1]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr

In [3]:
cat_sse_plusTranch = pd.read_csv('cat_sse_plusTranch.csv')

cat_sse_plusTranch.replace('-', np.nan, inplace=True)

df1 = cat_sse_plusTranch[['depth', 'mag', 'strike', 'dip', 'length', 'width',
       'slip', 'duration']]

# A (miss 7%): age
# alphaS (miss 15%): shallow slab dip measured between 0 and 125 km depth
# ZT (miss 0): trench depth
# Tchannel (miss 50%): subduction channel sediment thickness (potentially subject to large variations) 
# Phi: thermal parameter, product of A with the vertical component of trench-normal subduction velocity 
# M56_vs: subduction velocity, accounting for upper plate deformation
# Vup1 (miss 0): upper plate velocity
# Vsub1 (miss 0): subducting plate velocity
# Vt (miss 0): trench velocity
# M56_vc: convergence velocity 
col_to_corr = ['A', 'alphaS', 'ZT', 'Tchannel', 'Phi', 'M56_vs', 'M56_vc' ,'Vup1', 'Vsub1', 'Vt1']

df2 = cat_sse_plusTranch[col_to_corr]
# If df1 and df2 have different columns and you want *all pairwise* correlations:
results_corr = pd.DataFrame(index=['depth', 'mag', 'strike', 'dip', 'length', 'width',
       'slip', 'duration'], columns=col_to_corr)
results_pv = pd.DataFrame(index=['depth', 'mag', 'strike', 'dip', 'length', 'width',
       'slip', 'duration'], columns=col_to_corr)

for col1 in df1.columns:
    for col2 in df2.columns:
        s1 = df1[col1]
        s2 = df2[col2]
        mask = ~np.isnan(s1) & ~np.isnan(s2)
        s1 = s1[mask]
        s2 = s2[mask]
        # Compute correlation with dropped NaNs
        corr, p_value = pearsonr(s1, s2)
        results_corr.loc[col1, col2] = corr
        results_pv.loc[col1, col2] = p_value

# Flatten the DataFrame
stacked_corr = results_corr.stack().astype(float)
stacked_pv = results_pv.stack().astype(float)

mask = (stacked_corr > 0.2) | (stacked_corr < -0.2)
filtered_corr = stacked_corr[mask]
filtered_pv = stacked_pv[mask]

# Optional: sort values
filtered_df = pd.DataFrame({
    'correlation': filtered_corr,
    'p_value': filtered_pv
}).sort_values(by='correlation', ascending=False)


# Show result
print(filtered_df)

                   correlation       p_value
strike   Vt1          0.461496  8.311358e-36
slip     A            0.382602  1.047341e-23
         Vsub1        0.343661  5.650082e-21
         M56_vc       0.337703  2.888397e-20
mag      M56_vs       0.337042  1.981712e-26
dip      alphaS       0.320513  1.172271e-15
slip     ZT           0.311661  2.403623e-17
mag      M56_vc       0.298410  8.301548e-21
slip     M56_vs       0.293245  1.899105e-15
         Tchannel     0.269041  1.105027e-08
duration Vup1         0.257701  4.169033e-11
mag      Phi          0.244900  1.488326e-06
         Vsub1        0.217567  1.518467e-11
         Tchannel     0.208747  5.022367e-06
         Vup1         0.200505  5.449826e-10
depth    ZT          -0.204610  2.368097e-10
length   A           -0.221683  5.578616e-08
strike   A           -0.274788  1.201622e-11
depth    A           -0.288044  5.230403e-18
         alphaS      -0.307598  4.659722e-19
         Phi         -0.315259  3.818321e-10
strike   P