In [40]:
from preamble import *
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression

In [3]:
pdo = pd.read_csv('Data/PDO/data_PDO.csv')
duration = pd.read_csv('Data/computed-data/computed_annual-intensification-duration_JTWC.csv')
nino4 = pd.read_csv('Data/ENSO/Nino4/data_nino4.csv')

## Seasonal Regression (July-September)

In [31]:
duration.head()

Unnamed: 0,YEAR,Intensification Duration,9-year average Int Duration
0,1951,1.124008,
1,1952,1.159188,
2,1953,1.758929,
3,1954,0.718171,
4,1955,1.498843,1.426263


In [18]:
season_nino4_anom = nino4[nino4['month'].between(7,9)].groupby('year')['nino4_anom'].mean().reset_index()
season_nino4_temp = nino4[nino4['month'].between(7,9)].groupby('year')['nino4_temp'].mean().reset_index()
season_pdo = pdo[pdo['month'].between(7,9)].groupby('YEAR')['PDO'].mean().reset_index()

In [22]:
season_pdo.columns = ['year', 'pdo']

### Nino4 anomaly + PDO

In [23]:
variables = season_nino4_anom.merge(season_pdo,on='year')

In [35]:
X_train = variables[['nino4_anom','pdo']].values
y_train = duration['Intensification Duration'].values

In [37]:
lr = LinearRegression().fit(X_train, y_train)

In [47]:
print("Coefficients: %s" % lr.coef_)
print("Intercept   : %s" % lr.intercept_)
print("Score       : %f" % lr.score(X_train, y_train))

Coefficients: [ 0.29538467 -0.00061367]
Intercept   : 1.58376275697
Score       : 0.217030


### Nino4 temperature + PDO

In [42]:
variables = season_nino4_temp.merge(season_pdo,on='year')

In [48]:
X_train = variables[['nino4_temp','pdo']].values
y_train = duration['Intensification Duration'].values

In [49]:
lr = LinearRegression().fit(X_train, y_train)

In [50]:
print("Coefficients: %s" % lr.coef_)
print("Intercept   : %s" % lr.intercept_)
print("Score       : %f" % lr.score(X_train, y_train))

Coefficients: [ 0.29585186 -0.00078739]
Intercept   : -6.93601070921
Score       : 0.217053


In [8]:
pdo_correlation = []

for i in (np.arange(12)+1):
    # Extract pdo temperature for exactly one month each year
    temp = pdo[pdo['month']==i]['PDO'].values
    # Calculate correlation with duration
    pdo_correlation.append(pearsonr(duration['Intensification Duration'],temp)[0])

In [9]:
corr_table = pd.DataFrame({'month':(np.arange(12)+1),'corr_pdo':pdo_correlation})

In [10]:
monthly_corr = corr_table.set_index('month')
monthly_corr.round(2).transpose()

month,1,2,3,4,5,6,7,8,9,10,11,12
corr_pdo,0.28,0.18,0.27,0.29,0.19,0.16,0.18,0.25,0.24,0.23,0.28,0.33


### Quarterly Correlation

In [16]:
q_pdo_correlation = []

for i in (np.arange(4)):
    # Extract pdo temperature for each quarter each year
    temp = pdo[pdo['month'].between(3*i+1,3*(i+1))].groupby('YEAR').mean()['PDO'].values
    # Calculate correlation with duration
    q_pdo_correlation.append(pearsonr(duration['Intensification Duration'],temp)[0])

In [17]:
q_corr_table = pd.DataFrame({'quarter':['Jan-Mar','Apr-Jun','Jul-Sep','Oct-Dec'],'corr_pdo':q_pdo_correlation})

In [22]:
q_corr = q_corr_table.set_index('quarter')
q_corr.round(2).transpose()

quarter,Jan-Mar,Apr-Jun,Jul-Sep,Oct-Dec
corr_pdo,0.26,0.23,0.24,0.31


### Annual Correlation

In [20]:
y_pdo_correlation = []

# Calculate correlation of annual pdo temperature with duration
print('PDO correlation coefficient: {:.3f}'.format(pearsonr(duration['Intensification Duration'],
                                                pdo.groupby('YEAR').mean()['PDO'].values)[0]))

PDO correlation coefficient: 0.317
