In [None]:
import numpy as np
import pylab as pl
import xray
import pandas as pd

In [None]:
pd.options.display.max_rows = 8

In [None]:
%pylab inline
rcParams['figure.figsize'] = (14.0, 3.0)

In [None]:
from sklearn import preprocessing, decomposition

In [None]:
from sklearn.linear_model import LinearRegression, Perceptron, SGDRegressor, LogisticRegression, PassiveAggressiveRegressor
from sklearn.svm import SVR, NuSVR  #, LinearSVR
from sklearn.neural_network import MultilayerPerceptronRegressor

In [None]:
met_vars = ['SWdown', 'Tair', 'LWdown', 'Wind', 'Rainf', 'PSurf', 'Qair']
met_data = xray.open_dataset('/home/naught101/phd/data/PALS/datasets/met/TumbaFluxnet.1.4_met.nc')
met_df = met_data.to_dataframe().reset_index(['x','y','z']).ix[:, met_vars]

flux_vars = ['Qh', 'Qle', 'Rnet', 'NEE']
flux_data = xray.open_dataset('/home/naught101/phd/data/PALS/datasets/flux/TumbaFluxnet.1.4_flux.nc')
flux_df = flux_data.to_dataframe().reset_index(['x','y']).ix[:, flux_vars]


In [None]:
flux_df[0:2]

In [None]:
met_df[0:2]

In [None]:
import time

def timeit(f):
    def timed(*args, **kw):
        ts = time.time()
        result = f(*args, **kw)
        te = time.time()
        print('model took: {:2.4f} sec'.format(te-ts))        
        return result
    return timed

In [None]:
@timeit
def get_model_prediction(model, X_train, Y_train, X_validate, scale=True, PCA=True):    
    
    X_t = X_train
    X_v = X_validate
    Y = Y_train
    
    if scale:
        X_scaler = preprocessing.StandardScaler()
        X_t = X_scaler.fit_transform(X_t)
        X_v = X_scaler.fit_transform(X_v)
        Y_scaler = preprocessing.StandardScaler()
        Y = Y_scaler.fit_transform(Y)
    if PCA:
        X_pca = decomposition.PCA()
        X_t = X_pca.fit_transform(X_t)
        X_v = X_pca.fit_transform(X_v)
    
    model.fit(X=X_t, y=Y)
    # model.get_params()
    
    if scale:
        pred = Y_scaler.inverse_transform(model.predict(X=X_v))
    else:
        pred = model.predict(X=X_v)
    
    return(pred)

In [None]:
def test_model(name, model, scale=True, PCA=True):
    y_var = 'Qh'
    X = met_df
    Y = np.array(flux_df[y_var])
    
    train_len = (7*len(X)//10)
    
    X_train = X[:train_len]
    X_validate = X[train_len:]
    Y_train = Y[:train_len]
    Y_validate = Y[train_len:]    
    
    print(name)
    Y_pred = get_model_prediction(model, X_train, Y_train, X_validate, scale=scale, PCA=PCA)
    print('---')
    if len(Y_pred.shape) > 1:
        Y_pred = Y_pred[:,0]
    print('RMSE: {:.2f}'.format(sqrt(mean((Y_pred-Y_validate)**2))))
    plot_data = pd.DataFrame({y_var+'_obs': Y_validate[1:350], y_var+'_pred': Y_pred[1:350]}) 
    pl.plot(plot_data)
    pl.legend(plot_data.columns)
    pl.show()
    

In [None]:
test_model("LinearRegression", LinearRegression(), scale=False, PCA=False)

In [None]:
test_model("LinearRegression", LinearRegression(), scale=True, PCA=False)

In [None]:
test_model("LinearRegression", LinearRegression(), scale=False, PCA=True)

In [None]:
test_model("SGDRegressor", SGDRegressor(), PCA=False)

In [None]:
#test_model("LogisticRegression", LogisticRegression())

In [None]:
#test_model("PassiveAggressiveRegressor", PassiveAggressiveRegressor())

In [None]:
test_model("SVR - linear Support Vector Regression", SVR())

In [None]:
test_model("SVR - linear Support Vector Regression", SVR(), PCA=False)

In [None]:
test_model("SVR - poly", SVR(kernel='poly'))

In [None]:
test_model("MultilayerPerceptronRegressor - default",
           MultilayerPerceptronRegressor())

In [None]:
test_model("MultilayerPerceptronRegressor - default",
           MultilayerPerceptronRegressor(),
           scale=True, PCA=False)

In [None]:
test_model("MultilayerPerceptronRegressor - default",
           MultilayerPerceptronRegressor(),
           scale=False, PCA=True)

In [None]:
test_model("MultilayerPerceptronRegressor - default",
           MultilayerPerceptronRegressor(),
           scale=False, PCA=False)

In [None]:
test_model("MultilayerPerceptronRegressor - logistic",
           MultilayerPerceptronRegressor(activation='logistic'),
           PCA=False)

In [None]:
test_model("MultilayerPerceptronRegressor - 3 hidden layer",
           MultilayerPerceptronRegressor(hidden_layer_sizes=(20,20,20,)),
           PCA=False)

In [None]:
test_model("MultilayerPerceptronRegressor - 2 small hidden layer",
           MultilayerPerceptronRegressor(hidden_layer_sizes=(10,10,)),
           PCA=False)

In [None]:
test_model("MultilayerPerceptronRegressor - 2 small hidden layer",
           MultilayerPerceptronRegressor(hidden_layer_sizes=(10,30,)),
           PCA=False)

In [None]:
pl.plot(get_model_prediction(models['LinearRegression'], 'Qh'))
pl.show()

In [None]:
pd.rolling_mean(flux_test_set[['Qh_pred','Qh']], 48)['2005-01-01 23':'2005-01-02']

In [None]:
pl.plot(pd.rolling_mean(flux_test_set[['Qh_pred','Qh']], 72))

In [None]:
pl.show()

In [None]:
help('bla'.format)

In [None]:
?str.format

In [None]:
help(MultilayerPerceptronRegressor)

In [None]:
a = np.array([[1,2,3,4,5,6]]).T

In [None]:
a.shape

In [None]:
a

In [None]:
print('<h2>blah</h2>')