# Model Pipeline

This notebook is used for training candidate models for air quality dataset.

In [1]:
MODEL_NAME = 'baseline'

## Modules

In [2]:
import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import cross_val_score, train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

Using TensorFlow backend.


## Ensuring reproducibility

In [3]:
CUSTOM_SEED = 42
np.random.seed(CUSTOM_SEED)

## Load Dataset

In [4]:
path = '../dataset/Air quality/'
names = ["No",  "year",  "month",  "day",  "hour",  "PM2.5",  "PM10",  "SO2",  "NO2",  "CO",  "O3",  "TEMP",  "PRES",  "DEWP",  "RAIN",  "wd",  "WSPM",  "station"]
dataset = pd.read_csv(path + 'cleansed_air_quality.csv',  names=names)
dataset

Unnamed: 0,No,year,month,day,hour,PM2.5,PM10,SO2,NO2,CO,O3,TEMP,PRES,DEWP,RAIN,wd,WSPM,station
0,5844,2013,10,30,11,41.0,49.0,19.0,51.1197,700.0,6.0,13.2,1022.4,-0.1,0.0,NE,1.4,Wanliu
1,27824,2016,5,3,7,15.0,26.0,2.0,11.0000,300.0,72.0,15.5,993.5,-1.1,0.0,NW,3.7,Dingling
2,25841,2016,2,10,16,95.0,95.0,59.0,46.0000,3100.0,61.0,9.3,1012.4,-12.6,0.0,ESE,1.4,Shunyi
3,26986,2016,3,29,9,10.0,36.0,12.0,34.0000,500.0,45.0,15.7,1014.6,-6.0,0.0,WNW,0.8,Aotizhongxin
4,23848,2015,11,19,15,49.0,49.0,2.0,40.0000,1700.0,2.0,4.6,1019.2,4.0,0.3,W,0.9,Wanliu
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
378693,30527,2016,8,23,22,83.0,106.0,6.0,77.0000,1200.0,49.0,26.9,1001.2,21.4,0.0,NNE,1.1,Nongzhanguan
378695,18071,2015,3,23,22,41.0,87.0,6.0,56.0000,500.0,42.0,10.8,1024.8,-14.1,0.0,S,1.8,Shunyi
378697,7768,2014,1,18,15,105.0,112.0,38.0,77.0000,1600.0,43.0,4.6,1027.4,-13.8,0.0,SSE,2.3,Nongzhanguan
378698,19377,2015,5,17,8,123.0,139.0,15.0,33.0000,900.0,68.0,20.4,996.6,15.0,0.0,S,2.2,Dongsi


### Splitting Dataset

In [5]:
# Split dataset
feat_cols = ["year",  "month",  "day",  "hour",  "PM10",  "SO2",  "NO2",  "CO",  "O3",  "TEMP",  "PRES",  "DEWP",  "RAIN",  "WSPM"]
excluded_cols = ["wd", "station"]
target = "PM2.5"
X, y = dataset[feat_cols], dataset[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print ("Ukuran data training {}, data testing {}".format(X_train.shape, X_test.shape))

Ukuran data training (249613, 14), data testing (62404, 14)


## Load Models

Please copy your models here as a function.

In [6]:
# define base model
def baseline_model(input_dim):
    def _model():
        # create model
        model = Sequential()
        model.add(Dense(50, input_dim=input_dim, kernel_initializer='normal', activation='relu'))
        model.add(Dense(1, kernel_initializer='normal'))
        # Compile model
        model.compile(loss='mean_squared_error', optimizer='adam')
        return model
    return _model

In [7]:
baseline_model(16)

<function __main__.baseline_model.<locals>._model()>

## Preprocessing

In [None]:
# evaluate model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model(X_train.shape[1]), epochs=50, batch_size=5, verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10)
results = cross_val_score(pipeline, X_train, y_train, cv=kfold)
print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/5

Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/5

Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50

## Evaluation