In [None]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [1]:
from preamble import *
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

In [2]:
pdo = pd.read_csv('Data/PDO/data_PDO.csv')
wind = pd.read_csv('Data/wind-shear/computed_seasonal-wind-shear.csv')
duration = pd.read_csv('Data/computed-data/computed_annual-intensification-duration_JTWC.csv')
nino4 = pd.read_csv('Data/ENSO/Nino4/data_nino4.csv')

In [3]:
# Extract 3 regions of wind shears
wind1 = wind[wind['latitude'].between(-15,0)]
wind1 = wind1[wind1['longitude'].between(180,215)]

wind2 = wind[wind['latitude'].between(0,10)]
wind2 = wind2[wind2['longitude'].between(130,160)]

wind3 = wind[wind['latitude'].between(15,25)]
wind3 = wind3[wind3['longitude'].between(125,140)]

In [7]:
season_nino4 = nino4[nino4['month'].between(7,9)].groupby('year')['nino4_anom'].mean().reset_index()
season_wind1 = wind1.groupby('year')['wind'].mean().rename('wind1').reset_index()
season_wind2 = wind2.groupby('year')['wind'].mean().rename('wind2').reset_index()
season_wind3 = wind3.groupby('year')['wind'].mean().rename('wind3').reset_index()
season_pdo = pdo[pdo['month'].between(7,9)].groupby('YEAR')['PDO'].mean().reset_index()
season_pdo.columns = ['year', 'pdo']

In [12]:
variables = season_nino4.merge(season_wind1,on='year')
variables = variables.merge(season_wind2,on='year')
variables = variables.merge(season_wind3,on='year')
variables = variables.merge(season_pdo,on='year')

In [29]:
variables.head()

Unnamed: 0,year,nino4_anom,wind1,wind2,wind3,pdo
0,1951,-0.316667,12.878017,14.471279,5.803357,-0.25
1,1952,-0.613333,9.445609,15.21035,8.500693,-0.913333
2,1953,0.033333,11.539983,15.313083,9.473967,0.053333
3,1954,-1.033333,15.361635,13.197492,9.403017,-0.143333
4,1955,-0.87,17.743377,9.147422,7.833979,-2.183333


### PDO + Nino4 + Wind Shear - Duration Linear Regression

In [33]:
X_train = variables.ix[:,variables.columns!='year'].values
y_train = duration['Intensification Duration'].values

lr = LinearRegression().fit(X_train, y_train)

print("Coefficients: %s" % lr.coef_)
print("Intercept   : %s" % lr.intercept_)
print("R2 Score    : %f" % lr.score(X_train, y_train))
print("Correlation : %f" % np.sqrt(lr.score(X_train, y_train)))

Coefficients: [ 0.22686028 -0.01252035  0.01003655 -0.05745775 -0.03463197]
Intercept   : 2.0061137994
R2 Score    : 0.409646
Correlation : 0.640036


### Interactions and Polynomial

In [60]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

poly = PolynomialFeatures(degree=2).fit(X_train_scaled)
X_train_poly = poly.transform(X_train_scaled)

print(X_train_poly.shape)

ridge = Ridge(alpha=0.001).fit(X_train_poly,y_train)
print('Regression correlation result: {:.3f}'.format(np.sqrt(ridge.score(X_train_poly,y_train))))

lasso = Lasso(alpha=0.0001,max_iter=1e5).fit(X_train_poly,y_train)
print('Regression correlation result: {:.3f}'.format(np.sqrt(lasso.score(X_train_poly,y_train))))
print('Lasso coefficients: {}'.format(lasso.coef_))
print('Ridge coefficients: {}'.format(ridge.coef_))

(60, 21)
Regression correlation result: 0.770
Regression correlation result: 0.755
Lasso coefficients: [ 0.          3.87068118  2.14571809  0.91125046  2.512378    3.34196051
 -1.45476538 -1.59674562  0.18660254 -1.0777622  -0.84073993 -0.46269187
 -0.78240285 -0.06907865 -1.38502204 -1.25648266  0.98314238  0.20115162
 -1.19086216 -2.93731327 -1.36336811]
Ridge coefficients: [ 0.          5.44613616  3.89963848  1.14576433  3.97551549  4.93127413
 -1.98988752 -2.24477483  0.46856293 -1.92487093 -1.6796353  -0.88759003
 -1.07496463 -0.93191741 -2.32048945 -1.49341259  1.10026001  0.06407592
 -1.50542194 -3.634435   -1.67467452]


### Decision Tree Regression

In [68]:
from sklearn.tree import DecisionTreeRegressor

tree = DecisionTreeRegressor(max_depth=5)
tree.fit(X_train, y_train)
tree.score(X_train, y_train)

0.83862650972208508