In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [3]:
from preamble import *
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor

depths = [2,3,4,5,6,7]

In [4]:
pdo = pd.read_csv('Data/PDO/data_PDO.csv')
wind = pd.read_csv('Data/wind-shear/computed_seasonal-wind-shear.csv')
duration = pd.read_csv('Data/computed-data/computed_annual-intensification-duration_JTWC.csv')
nino4 = pd.read_csv('Data/ENSO/Nino4/data_nino4.csv')
wpsh = pd.read_csv('Data/tropical-high/data_subtropical-high.csv')

In [5]:
# Extract 3 regions of wind shears
wind1 = wind[wind['latitude'].between(-15,0)]
wind1 = wind1[wind1['longitude'].between(180,215)]

wind2 = wind[wind['latitude'].between(0,10)]
wind2 = wind2[wind2['longitude'].between(130,160)]

wind3 = wind[wind['latitude'].between(15,25)]
wind3 = wind3[wind3['longitude'].between(125,140)]

In [6]:
season_nino4 = nino4[nino4['month'].between(7,9)].groupby('year')['nino4_anom'].mean().reset_index()
season_wind1 = wind1.groupby('year')['wind'].mean().rename('wind1').reset_index()
season_wind2 = wind2.groupby('year')['wind'].mean().rename('wind2').reset_index()
season_wind3 = wind3.groupby('year')['wind'].mean().rename('wind3').reset_index()
season_pdo = pdo[pdo['month'].between(7,9)].groupby('YEAR')['PDO'].mean().reset_index()
season_pdo.columns = ['year', 'pdo']

In [7]:
variables = season_nino4.merge(season_wind1,on='year')
variables = variables.merge(season_wind2,on='year')
variables = variables.merge(season_wind3,on='year')
variables = variables.merge(season_pdo,on='year')

### PDO + Nino4 + Wind Shear - Duration Linear Regression

In [8]:
X_train = variables.loc[:,variables.columns!='year'].values
y_train = duration['Intensification Duration'].values

lr = LinearRegression().fit(X_train, y_train)

print("Coefficients: %s" % lr.coef_)
print("Intercept   : %s" % lr.intercept_)
print("Correlation : %f" % np.sqrt(lr.score(X_train, y_train)))

Coefficients: [ 0.22686028 -0.01252035  0.01003655 -0.05745775 -0.03463197]
Intercept   : 2.0061137994
Correlation : 0.640036


In [11]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

poly = PolynomialFeatures(degree=2).fit(X_train_scaled)
X_train_poly = poly.transform(X_train_scaled)

ridge = Ridge(alpha=0.001).fit(X_train_poly,y_train)
print('Ridge coefficients: {}'.format(ridge.coef_))
print("Ridge Intercept   : %s" % ridge.intercept_)
print('Ridge correlation: {:.3f}  \n'.format(np.sqrt(ridge.score(X_train_poly,y_train))))




lasso = Lasso(alpha=0.0001,max_iter=1e5).fit(X_train_poly,y_train)
print('Lasso coefficients: {}'.format(lasso.coef_))
print("Lasso Intercept   : %s" % lasso.intercept_)
print('Lasso correlation: {:.3f}'.format(np.sqrt(lasso.score(X_train_poly,y_train))))

Ridge coefficients: [ 0.          5.44613616  3.89963848  1.14576433  3.97551549  4.93127413
 -1.98988752 -2.24477483  0.46856293 -1.92487093 -1.6796353  -0.88759003
 -1.07496463 -0.93191741 -2.32048945 -1.49341259  1.10026001  0.06407592
 -1.50542194 -3.634435   -1.67467452]
Ridge Intercept   : -3.25195548014
Ridge correlation: 0.770  

Lasso coefficients: [ 0.          3.87068118  2.14571809  0.91125046  2.512378    3.34196051
 -1.45476538 -1.59674562  0.18660254 -1.0777622  -0.84073993 -0.46269187
 -0.78240285 -0.06907865 -1.38502204 -1.25648266  0.98314238  0.20115162
 -1.19086216 -2.93731327 -1.36336811]
Lasso Intercept   : -1.63964049227
Lasso correlation: 0.755


In [12]:
for depth in depths:
    tree = DecisionTreeRegressor(max_depth=depth)
    tree.fit(X_train, y_train)
    print('Tree depth {} correlation: {}'.format(depth, tree.score(X_train, y_train)))

Tree depth 2 correlation: 0.47143139393323
Tree depth 3 correlation: 0.6409782841100349
Tree depth 4 correlation: 0.7470375307028133
Tree depth 5 correlation: 0.8386265097220851
Tree depth 6 correlation: 0.905667538586068
Tree depth 7 correlation: 0.9470038869971669


### WPSH + PDO + Nino4 + Wind Shear - Duration Linear Regression

In [13]:
wpsh = wpsh[~wpsh['yr'].isnull()]
wpsh = wpsh[['yr','ix']]
wpsh.columns = ['year','wpsh']

In [14]:
variables_m = variables.merge(wpsh,on='year')

In [20]:
X_train_m = variables_m[['wpsh']].values
y_train_m = duration[duration['YEAR']>=1979]['Intensification Duration'].values

lr = LinearRegression().fit(X_train_m, y_train_m)

print("Coefficients: %s" % lr.coef_)
print("Intercept   : %s" % lr.intercept_)
print("R2 Score    : %f" % lr.score(X_train_m, y_train_m))
print("Correlation : %f" % np.sqrt(lr.score(X_train_m, y_train_m)))

Coefficients: [-0.10791636]
Intercept   : 1.55254703143
R2 Score    : 0.134223
Correlation : 0.366364


In [16]:
X_train_m = variables_m.loc[:,variables_m.columns!='year'].values
y_train_m = duration[duration['YEAR']>=1979]['Intensification Duration'].values

lr = LinearRegression().fit(X_train_m, y_train_m)

print("Coefficients: %s" % lr.coef_)
print("Intercept   : %s" % lr.intercept_)
print("R2 Score    : %f" % lr.score(X_train_m, y_train_m))
print("Correlation : %f" % np.sqrt(lr.score(X_train_m, y_train_m)))

Coefficients: [ 0.16949049 -0.04911683 -0.00713789 -0.0744341  -0.05238422 -0.02349279]
Intercept   : 2.80549052912
R2 Score    : 0.563653
Correlation : 0.750769


In [17]:
scaler = MinMaxScaler()
X_train_m_scaled = scaler.fit_transform(X_train_m)

poly = PolynomialFeatures(degree=2).fit(X_train_m_scaled)
X_train_m_poly = poly.transform(X_train_m_scaled)

ridge = Ridge(alpha=0.001).fit(X_train_m_poly,y_train_m)
print('Ridge coefficients: {}'.format(ridge.coef_))
print("Ridge Intercept   : %s" % ridge.intercept_)
print('Ridge correlation: {:.3f}  \n'.format(np.sqrt(ridge.score(X_train_m_poly,y_train_m))))

lasso = Lasso(alpha=0.001,max_iter=1e5).fit(X_train_m_poly,y_train_m)
print('Lasso coefficients: {}'.format(lasso.coef_))
print("Lasso Intercept   : %s" % lasso.intercept_)
print('Lasso correlation: {:.3f}'.format(np.sqrt(lasso.score(X_train_m_poly,y_train_m))))

Ridge coefficients: [ 0.          0.05413038  2.53047181  1.26459198  6.68581449  0.31136036
 -2.76255494  0.74964224  0.5238027   2.14321191 -2.79874241 -1.21217389
  0.49062635 -0.52398543 -4.58298867 -5.04617092 -0.28092859  0.47572443
 -3.78517619 -0.26146235  4.89843145 -0.57647825 -2.07988802 -1.56837907
 -1.87096997 -2.8459963   3.15148691  1.83452634]
Ridge Intercept   : 0.333954366212
Ridge correlation: 0.966  

Lasso coefficients: [ 0.          0.         -0.05262032  0.          0.          0.
 -0.08222789  0.41949209 -0.38777002  0.          0.          0.          0.
 -0.06826993 -0.41518555 -0.55847017 -0.         -0.         -0.31368998
  0.          0.5639923   0.         -0.21002256 -0.         -0.         -0.4180621
 -0.          0.        ]
Lasso Intercept   : 1.78584381275
Lasso correlation: 0.809


In [18]:
for depth in depths:
    tree = DecisionTreeRegressor(max_depth=depth)
    tree.fit(X_train_m, y_train_m)
    print('Tree depth {} correlation: {}'.format(depth, tree.score(X_train_m, y_train_m)))

Tree depth 2 correlation: 0.6714995695851276
Tree depth 3 correlation: 0.8774104232010537
Tree depth 4 correlation: 0.9665527252907438
Tree depth 5 correlation: 0.9925599849442722
Tree depth 6 correlation: 0.9988269349261193
Tree depth 7 correlation: 0.9999495106417977
