# Library

In [None]:
%matplotlib inline
from sklearn import neural_network
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import plotly.plotly as py
from sklearn.metrics import mean_squared_error
from math import sqrt

# Global

In [None]:
predict = dict()

# Function

# Data preprocess

In [None]:
date_feature = pd.read_csv("../data/date_feature.csv", parse_dates=True)
time_series = pd.read_csv("../data/pm2.5_timeseries.csv", parse_dates=True)
meteorology = pd.read_csv("../data/Erlin.csv", parse_dates=True)

In [None]:
time_series = time_series[[column for column in time_series.columns if column not in ['datetime']]]
time_series = time_series[['t-3', 't-2', 't-1']]
meteorology = meteorology[['AMB_TEMP', 'RAINFALL', 'RH', 'WIND_SPEED', 'PM2.5']]

In [None]:
data = pd.concat([meteorology, time_series, date_feature], axis=1)
data.head()

In [None]:
start = data[data.datetime.values == "2016-06-01 00:00:00"].index[0]
end = data[data.datetime.values == "2016-12-31 23:00:00"].index[0]

In [None]:
train = data[:start].dropna()
#train = pd.concat([data[:start], data[end:]], axis=0).dropna()
test = data[start:end]

In [None]:
exclude = ['datetime', 'PM2.5']
features = [f for f in data.columns if f not in exclude]

print features

In [None]:
target = "PM2.5"

X_train, y_train = train[features], train[target]
X_test, y_test = test[features], test[target]

# Regression Tree

In [None]:
RT = AdaBoostRegressor(DecisionTreeRegressor())
RT.fit(X_train, y_train)
zipped = sorted(zip(features, RT.feature_importances_), key = lambda imp: imp[1])
print 'Feature : Importances'
for f, imp in zipped:
    print f, ':', imp

predict["RT"] = RT.predict(X_test)

# Linear Regression

In [None]:
LR = linear_model.LinearRegression()
LR.fit(X_train, y_train)
zipped = sorted(zip(features, LR.coef_), key = lambda coef: coef[1])
print 'Feature : Coefficients'
for f, coef in zipped:
    print f, ':', coef

predict["LR"] = LR.predict(X_test)

# MLP Regression

In [None]:
MLP = neural_network.MLPRegressor(activation='relu')
MLP.fit(X_train, y_train)
print 'Model: \n', MLP

predict["MLP"] = MLP.predict(X_test)

# Plot

In [None]:
plt.rcParams['figure.figsize'] = [16.0, 8.0]
colors = {'MLP':'C3', 'RT':'C2', 'LR':'C1', 'Real':'C0'}

fig = plt.figure()
ax = fig.add_subplot(111)
ax.title.set_text(target)
# Turn off axis lines and ticks of the big subplot
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
ax.set_xlabel('Index')
ax.set_ylabel('Value')

ax1 = fig.add_subplot(221)
ax1.plot(predict['RT'], colors['RT'], label = 'RT')
ax1.plot(y_test.tolist(), colors['Real'], label = 'Real')
plt.legend(loc='upper left')

ax2 = fig.add_subplot(222)
ax2.plot(predict['LR'], colors['LR'], label = 'LR')
ax2.plot(y_test.tolist(), colors['Real'], label = 'Real')
plt.legend(loc='upper left')

ax3 = fig.add_subplot(223)
ax3.plot(predict["MLP"], colors['MLP'], label = 'MLP')
ax3.plot(y_test.tolist(), colors['Real'], label = 'Real')
plt.legend(loc='upper left')

ax4 = fig.add_subplot(224)
for key, value in predict.iteritems():
    ax4.plot(value, colors[key], label=key)
ax4.plot(y_test.tolist(), colors['Real'], label = 'Real')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = [16.0, 8.0]
plt.xlabel('Predict')
plt.ylabel('Real')

for key, value in predict.iteritems():
    plt.plot(value, y_test.tolist(), colors[key] + '.', label=key)
    
plt.legend(loc='upper left')
a, b = 0, max(max(y_test.tolist()), 24)
mrange = 5 * sqrt(2)
plt.plot([a, b], [a, b], 'b-')
plt.plot([a, b - 3], [a+mrange, b+mrange -3], 'r--')
plt.plot([a + 3, b], [a-mrange + 3, b-mrange], 'r--')
plt.show()

# Measure

In [None]:
for key, value in predict.iteritems():
    print("'%s' mean squared error: %.6f"
          % (key, sqrt(mean_squared_error(value, y_test))))