In [14]:
!pip3 install keras

Collecting keras
  Using cached https://files.pythonhosted.org/packages/5e/10/aa32dad071ce52b5502266b5c659451cfd6ffcbf14e6c8c4f16c0ff5aaab/Keras-2.2.4-py2.py3-none-any.whl
Collecting keras-applications>=1.0.6 (from keras)
  Using cached https://files.pythonhosted.org/packages/90/85/64c82949765cfb246bbdaf5aca2d55f400f792655927a017710a78445def/Keras_Applications-1.0.7-py2.py3-none-any.whl
Collecting keras-preprocessing>=1.0.5 (from keras)
  Using cached https://files.pythonhosted.org/packages/c0/bf/0315ef6a9fd3fc2346e85b0ff1f5f83ca17073f2c31ac719ab2e4da0d4a3/Keras_Preprocessing-1.0.9-py2.py3-none-any.whl
Installing collected packages: keras-applications, keras-preprocessing, keras
Successfully installed keras-2.2.4 keras-applications-1.0.7 keras-preprocessing-1.0.9


---

This Notebook shows implementation of SSN Stacker.

In [1]:
BUCKET_NAME = "msil_raw"
FOLDER_NAME = "training_data"
XGBSTACKER = "stack_xgb_data.csv"
LSTMSTACKER = "stack_lstm_data.csv"

In [2]:
import google.datalab.storage as storage
import pandas as pd
from io import BytesIO

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import time
from datetime import datetime
from scipy import integrate
import pickle
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras import optimizers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
# setting up the parameters
plt.rcParams["figure.figsize"] = (10, 10)
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 200)
pd.set_option("precision", 15)
sns.set_style("darkgrid")

In [4]:
mybucket = storage.Bucket(BUCKET_NAME)
data_csv = mybucket.object(FOLDER_NAME + "/" + XGBSTACKER)

uri = data_csv.uri
%gcs read --object $uri --variable data

stack_01 = pd.read_csv(BytesIO(data))
stack_01.head()

Unnamed: 0,y,yhat
0,93.0,92.99948880076408
1,93.0,92.99898770451544
2,93.0,92.99849197268486
3,93.0,92.99798074364662
4,93.0,92.99748611450195


In [5]:
mybucket = storage.Bucket(BUCKET_NAME)
data_csv = mybucket.object(FOLDER_NAME + "/" + LSTMSTACKER)

uri = data_csv.uri
%gcs read --object $uri --variable data

stack_02 = pd.read_csv(BytesIO(data))
stack_02.head()

Unnamed: 0,y,yhat
0,93.0,92.99729640828444
1,93.0,92.99459254019891
2,93.0,92.99188925069757
3,93.0,92.98918619798496
4,93.0,92.98648235551079


In [6]:
print("length of XGB Predictions is  {}".format(len(stack_01)))
print("length of LSTM Predictions is  {}".format(len(stack_02)))

length of XGB Predictions is  2203313
length of LSTM Predictions is  3032534


In [7]:
# making both dataset of same len
stack_02 = stack_02[:2203313]

In [8]:
stack_02.columns = ["actual2", "LSTM_pred"]
stack_01.columns = ["actual", "XGB_pred"]

df = pd.concat((stack_01, stack_02), axis = 1).reset_index(drop = True)
df = df.drop(columns = ["actual2"])

df = df[["XGB_pred", "LSTM_pred", "actual"]]
df.columns = ["XGB_pred", "LSTM_pred", "label"]
df.sample(10)

Unnamed: 0,XGB_pred,LSTM_pred,label
1346797,54.04267994165421,62.65068508186378,54.46649741814082
572358,51.11194770932197,53.18105208231136,51.02795673271328
1866146,35.33365345597266,51.040432356670486,33.49211110777518
1171115,83.55629034638403,83.63246642327867,82.34303302310268
1272661,83.30045257210733,84.68028315976262,82.99877405275508
2130054,89.39916926026346,89.3938756597694,89.39999999999996
1723166,63.65639660954476,65.48574954229407,60.31371203066762
1045753,79.76912841796873,74.54442038983106,79.87073654008974
2057520,61.463809067010885,66.06382625396364,59.46079905581677
1613779,85.64009679555893,85.28295052265747,86.01916494981396


In [9]:
df.isnull().sum()

XGB_pred     0
LSTM_pred    0
label        0
dtype: int64

In [None]:
x = df[["XGB_pred", "LSTM_pred"]].values
y = df["label"].values.reshape(-1, 1)

mms_1 = MinMaxScaler(feature_range = (0, 1))
mms_2 = MinMaxScaler(feature_range = (0, 1))
mms_1 = mms_1.fit(x)
mms_2 = mms_2.fit(y)
pickle.dump(mms_1, open("ssn_scaler_x.pickle.dat", "wb"))
pickle.dump(mms_2, open("ssn_scaler_y.pickle.dat", "wb"))

x=  mms_2.transform(x)
y = mms_2.transform(y)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)

In [11]:
model = Sequential()
model.add(Dense(128, input_dim = x_train.shape[1], kernel_initializer = 'normal', activation = 'relu'))
model.add(Dense(16, kernel_initializer = 'normal', activation = 'relu'))
model.add(Dense(1, kernel_initializer = 'normal'))
# Compile model
model.compile(loss='mean_squared_error', optimizer="adam")

In [12]:
model.fit(x_train, y_train, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7feef537cef0>

In [13]:
predictions = model.predict(x_test)
predictions = mms_2.inverse_transform(predictions)
y_test = mms_2.inverse_transform(y_test)

In [14]:
predictions = predictions.ravel()
y_test = y_test.ravel()

In [16]:
result_df = pd.DataFrame({
  "y": y_test,
  "yhat": predictions
})

In [17]:
result_df.sample(10)

Unnamed: 0,y,yhat
462879,83.91728198413962,82.82864379882812
454178,72.42914647014301,70.73723602294922
550137,64.8,65.13129425048828
22096,89.87218444087027,89.37261962890625
423203,61.19755820982017,64.2813720703125
7391,34.547774969806085,35.281394958496094
480516,90.47450647188542,90.73003387451173
413620,80.56603870823379,81.44491577148438
343872,72.5709408110486,72.81790161132812
323251,44.071349805958725,43.46305084228516


In [18]:
(((result_df["y"] - result_df["yhat"])**2).mean())**0.5

2.223739796964447

## Saving the Model

In [19]:
# serialize model to JSON
model_json = model.to_json()
with open("ann_test.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("ann_test.h5")
print("Model is saved !!!")

Model is saved !!!


In [42]:
!gsutil cp 'model_lstm_stack.json' 'gs://msil_raw/training_data/model_lstm_stack_final.json'

Copying file://model_lstm_stack.json [Content-Type=application/json]...
/ [1 files][  1.5 KiB/  1.5 KiB]                                                
Operation completed over 1 objects/1.5 KiB.                                      
