### Code to calculate the partial correlation of temperature with ENSO, IOD and SAM indices 

This code used data from the moored location'NRSKAI', calculates the monthly temperature anomalies, calculates the Pearson r correlation, and the partial correlation

In [1]:
import sys
import pandas as pd
sys.path.insert(0, "../sa_upwelling")   # Allows using the files in the Python package
import utils

from matplotlib import pyplot as plt
#import pingouin as pg
from scipy.stats import pearsonr
import numpy as np

In [2]:
hourly_files, hourly_datasets = utils.load_data_products()

Loading local hourly-timeseries data for mooring 'NRSKAI'.
Loading local hourly-timeseries data for mooring 'SAM8SG'.
Loading local hourly-timeseries data for mooring 'SAM5CB'.
Loading local hourly-timeseries data for mooring 'SAM2CP'.
Loading local hourly-timeseries data for mooring 'SAM6IS'.
Loading local hourly-timeseries data for mooring 'SAM3MS'.
Loading local hourly-timeseries data for mooring 'SAM7DS'.


In [3]:
# Run the extraction function for all sites, saving to CSV and storing the
# timeseries DataFrames in a local variable for subsequent use

temp_timeseries = dict()
for mooring in hourly_datasets.keys():
    ds = hourly_datasets[mooring]
    df = utils.extract_timeseries_df(ds, save=True)
    temp_timeseries[mooring] = df

Saved timeseries to ../Datasets/NRSKAI_TEMP_101-111m.csv
Saved timeseries to ../Datasets/SAM8SG_TEMP_38-48m.csv
Saved timeseries to ../Datasets/SAM5CB_TEMP_90-100m.csv
Saved timeseries to ../Datasets/SAM2CP_TEMP_90-100m.csv
Saved timeseries to ../Datasets/SAM6IS_TEMP_73-83m.csv
Saved timeseries to ../Datasets/SAM3MS_TEMP_158-168m.csv
Saved timeseries to ../Datasets/SAM7DS_TEMP_512-522m.csv


In [4]:
#Read the mooring data and resample to monthly
mor = pd.read_csv('../Datasets/NRSKAI_TEMP_101-111m.csv')
mor['TIME'] = pd.to_datetime(mor['TIME'])
mor.index = mor['TIME']
mor_mon = mor.resample('M').mean()
mor_mon = mor_mon.drop(['DEPTH'],axis=1)
mor_mon

  mor_mon = mor.resample('M').mean()


Unnamed: 0_level_0,TEMP
TIME,Unnamed: 1_level_1
2008-08-31,14.256375
2008-09-30,14.363663
2008-10-31,14.873148
2008-11-30,15.532777
2008-12-31,16.153825
...,...
2023-01-31,14.030186
2023-02-28,14.010088
2023-03-31,15.036819
2023-04-30,17.123268


In [5]:
# Calculate the mean for each month across all years
monthly_means = mor_mon.groupby(mor_mon.index.month).mean()

# Calculate the anomalies by subtracting the monthly means from the data
anomalies = mor_mon - monthly_means.iloc[mor_mon.index.month - 1].values
anomalies

Unnamed: 0_level_0,TEMP
TIME,Unnamed: 1_level_1
2008-08-31,-0.515712
2008-09-30,-0.333575
2008-10-31,-0.145784
2008-11-30,0.424062
2008-12-31,1.009310
...,...
2023-01-31,-0.284630
2023-02-28,0.718094
2023-03-31,1.430747
2023-04-30,1.202743


In [6]:
#ENSO index
soi = pd.read_csv('../Datasets/SOI_index.csv')
soi['date'] = pd.to_datetime(soi['date'])
soi.set_index('date',inplace=True)

  soi['date'] = pd.to_datetime(soi['date'])


In [7]:
#IOD index
iod = pd.read_csv('../Datasets/iod_index.csv')
iod['date'] = pd.to_datetime(iod['date'])
iod.set_index('date',inplace=True)

#SAM index
sam = pd.read_csv('../Datasets/SAM_index.csv')
sam['date'] = pd.to_datetime(sam['date'])
sam.set_index('date',inplace=True)

sam

  iod['date'] = pd.to_datetime(iod['date'])
  sam['date'] = pd.to_datetime(sam['date'])


Unnamed: 0_level_0,sam_index
date,Unnamed: 1_level_1
1990-01-31,-0.23
1990-02-28,1.95
1990-03-31,0.80
1990-04-30,-3.34
1990-05-31,-3.35
...,...
2020-08-31,-2.20
2020-09-30,-0.25
2020-10-31,1.79
2020-11-30,1.14


In [8]:
#making the time axis equal and creating a new dataframe with all the variables
start = max(anomalies.index.min(),soi.index.min())
end = min(anomalies.index.max(),soi.index.max())
anomalies_new = anomalies.loc[start:end]
soi_new = soi.loc[start:end]
iod_new = iod.loc[start:end]
sam_new = sam.loc[start:end]

new_df = pd.concat([anomalies_new,soi_new,iod_new,sam_new],axis=1)
#new_df.to_csv('trial.csv')

In [9]:
# Remove rows with NaN values
df_cleaned = new_df.dropna()

# Calculate the correlation matrix
correlation_matrix = df_cleaned[['soi_index', 'iod_index', 'sam_index', 'TEMP']].corr()

# Calculate the inverse of the correlation matrix
inv_corr_matrix = np.linalg.inv(correlation_matrix.values)

# Calculate partial correlations
partial_corr_enso = -inv_corr_matrix[0, 3] / np.sqrt(inv_corr_matrix[0, 0] * inv_corr_matrix[3, 3])
partial_corr_sam = -inv_corr_matrix[2, 3] / np.sqrt(inv_corr_matrix[2, 2] * inv_corr_matrix[3, 3])
partial_corr_iod = -inv_corr_matrix[1, 3] / np.sqrt(inv_corr_matrix[1, 1] * inv_corr_matrix[3, 3])

In [10]:
#Direct correlations
correlation_matrix

Unnamed: 0,soi_index,iod_index,sam_index,TEMP
soi_index,1.0,-0.289515,0.132069,0.144907
iod_index,-0.289515,1.0,-0.096046,-0.074311
sam_index,0.132069,-0.096046,1.0,-0.186243
TEMP,0.144907,-0.074311,-0.186243,1.0


In [11]:
# Partial Correlations
print("Partial correlation between ENSO and TEMP:", partial_corr_enso)
print("Partial correlation between SAM and TEMP:", partial_corr_sam)
print("Partial correlation between IOD and TEMP:", partial_corr_iod)

Partial correlation between ENSO and TEMP: 0.15446042842624913
Partial correlation between SAM and TEMP: -0.21200555135006952
Partial correlation between IOD and TEMP: -0.048079332658071586
