In [None]:
import numpy as np
import pylab as pl
import xray
import pandas as pd

In [None]:
%pylab inline
pd.options.display.max_rows = 8

In [None]:
from sklearn import preprocessing

In [None]:
from sklearn.linear_model import LinearRegression, Perceptron, SGDRegressor, LogisticRegression, PassiveAggressiveRegressor
from sklearn.svm import SVR, NuSVR

from multilayer_perceptron import MultilayerPerceptronRegressor


In [None]:
met_vars = ['SWdown', 'Tair', 'LWdown', 'Wind', 'Rainf', 'PSurf', 'Qair']
met_data = xray.open_dataset('/home/naught101/phd/data/PALS/datasets/met/TumbaFluxnet.1.4_met.nc')
met_df = met_data.to_dataframe().reset_index(['x','y','z']).ix[:, met_vars]

flux_vars = ['Qh', 'Qle', 'Rnet', 'NEE']
flux_data = xray.open_dataset('/home/naught101/phd/data/PALS/datasets/flux/TumbaFluxnet.1.4_flux.nc')
flux_df = flux_data.to_dataframe().reset_index(['x','y']).ix[:, flux_vars]


In [None]:
met_train_set = met_df['2002':'2004']
met_test_set = met_df['2005']
flux_train_set = flux_df['2002':'2004']
flux_test_set = flux_df['2005']

In [None]:
import time

def timeit(f):
    def timed(*args, **kw):
        ts = time.time()
        result = f(*args, **kw)
        te = time.time()
        print('model took: {:2.4f} sec'.format(te-ts))        
        return result
    return timed

In [None]:
@timeit
def get_model_prediction(model, var):
    met_scaler = preprocessing.StandardScaler().fit(met_train_set)
    flux_var_scaler = preprocessing.StandardScaler().fit(flux_train_set[['Qh']])
    
    # TODO: avoid external variables
    model.fit(X=met_scaler.transform(met_train_set), y=flux_var_scaler.transform(flux_train_set.loc[:,var]))
    # model.get_params()
    
    pred = flux_var_scaler.inverse_transform(model.predict(X=met_scaler.transform(met_test_set)))
    
    plot_data = flux_test_set[[var]].copy()
    plot_data[var+"_pred"] = pred
    
    return(plot_data)

In [None]:
def test_model(name, model):
    print(name)
    data = get_model_prediction(model, 'Qh')
    print('RMSE: {:.2f}'.format(sqrt(mean((data['Qh']-data['Qh_pred'])**2))))
    pl.plot(data[1:192])
    pl.legend(data.columns)
    pl.show()
    

In [None]:
test_model("LinearRegression", LinearRegression())

In [None]:
test_model("SGDRegressor", SGDRegressor())

In [None]:
#test_model("LogisticRegression", LogisticRegression())

In [None]:
test_model("PassiveAggressiveRegressor", PassiveAggressiveRegressor())

In [None]:
test_model("SVR", SVR())

In [None]:
test_model("NuSVR - poly", SVR(kernel='poly'))

In [None]:
test_model("MultilayerPerceptronRegressor - default", MultilayerPerceptronRegressor())

In [None]:
test_model("MultilayerPerceptronRegressor - logistic", MultilayerPerceptronRegressor())

In [None]:
test_model("MultilayerPerceptronRegressor - 3 hidden layer", MultilayerPerceptronRegressor(hidden_layer_sizes=(20,20,20,)))

In [None]:
test_model("MultilayerPerceptronRegressor - 2 small hidden layer", MultilayerPerceptronRegressor(hidden_layer_sizes=(10,10,)))

In [None]:
pl.plot(get_model_prediction(models['LinearRegression'], 'Qh'))
pl.show()

In [None]:
pd.rolling_mean(flux_test_set[['Qh_pred','Qh']], 48)['2005-01-01 23':'2005-01-02']

In [None]:
pl.plot(pd.rolling_mean(flux_test_set[['Qh_pred','Qh']], 72))

In [None]:
pl.show()

In [None]:
help('bla'.format)

In [None]:
?str.format