In [1]:
from mlsquare.losses.keras import quantile_loss
import numpy as np
%load_ext autoreload
%autoreload 2

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
2019-09-24 10:08:47,129	INFO node.py:423 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-09-24_10-08-47_3112/logs.
2019-09-24 10:08:47,241	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:12911 to respond...
2019-09-24 10:08:47,365	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:36383 to respond...
2019-09-24 10:08:47,369	INFO services.py:760 -- Starting Redis shard with 20.0 GB max memory.
2019-09-24 10:08:47,398	INFO services.py:1384 -- Starting the Plasma object store with 1.0 GB memory using /tmp.


In [24]:
%matplotlib inline

import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from statsmodels.regression.quantile_regression import QuantReg

data = sm.datasets.engel.load_pandas().data
data.head()
df = (data-data.mean())/data.std()
df.head()
mod = smf.quantreg('foodexp ~ income', df)
res = mod.fit(q=.1)
print(res.summary())
res = mod.fit(q=.5)
print(res.summary())
res = mod.fit(q=.9)
print(res.summary())

                         QuantReg Regression Results                          
Dep. Variable:                foodexp   Pseudo R-squared:               0.4945
Model:                       QuantReg   Bandwidth:                      0.2613
Method:                 Least Squares   Sparsity:                        1.303
Date:                Tue, 24 Sep 2019   No. Observations:                  235
Time:                        10:42:50   Df Residuals:                      233
                                        Df Model:                            1
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.4315      0.026    -16.758      0.000      -0.482      -0.381
income         0.7546      0.046     16.376      0.000       0.664       0.845
                         QuantReg Regression Results                          
Dep. Variable:                foodexp   Pseudo R-squ

# Median Regression using Keras MAE loss

In [3]:
# Create your first MLP in Keras
from keras.models import Sequential
from keras.layers import Dense
# fix random seed for reproducibility
X = np.array(df.loc[:,"income"])
Y = np.array(df.loc[:,"foodexp"])
# create model
model = Sequential()
model.add(Dense(1, input_dim=1, activation='linear'))
# Compile model
model.compile(loss='mae', optimizer='sgd', metrics=['mae'])
# Fit the model
model.fit(X, Y, epochs=100, batch_size=20,verbose=0)
# evaluate the model
scores = model.evaluate(X, Y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
print(model.get_weights())

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.

mean_absolute_error: 27.03%
[array([[1.0520353]], dtype=float32), array([0.03000003], dtype=float32)]


# Median Regression Using custom Quantile Loss

In [5]:
from mlsquare.losses.keras import quantile_loss
model = Sequential()
model.add(Dense(1, input_dim=1, activation='linear'))
model.compile(optimizer='sgd', metrics=['mae'],loss=quantile_loss)
# Fit the model
model.fit(X, Y, epochs=100, batch_size=20,verbose=0)
# evaluate the model
scores = model.evaluate(X, Y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
print(model.get_weights())


mean_absolute_error: 27.04%
[array([[1.033776]], dtype=float32), array([0.02433334], dtype=float32)]


# Quantile Regression, one model for each Quantile

In [33]:
from keras.layers import Input, Dense
from keras.models import Model
x = Input(shape=(1,))
q1 = Dense(1,activation='linear',name='q1')(x)
q2 = Dense(1,activation='linear',name='q2')(x)
q3 = Dense(1,activation='linear',name='q3')(x)
model = Model(inputs=x, outputs=[q1,q2,q3])
loss = {"q1": quantile_loss(quantile=0.1), 'q2':quantile_loss(quantile=0.5),'q3':quantile_loss(quantile=0.9)}
loss_weights = {"q1": 1.0, "q2": 1.0,"q3":1.0}
model.compile(optimizer='sgd', metrics=['mae'],loss=loss,loss_weights=loss_weights)
# Fit the model
model.fit(X, [Y,Y,Y], epochs=500, batch_size=20,verbose=0)
# evaluate the model
scores = model.evaluate(X, [Y,Y,Y])
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
print(model.get_weights())


q1_loss: 5.96%
[array([[0.7531248]], dtype=float32), array([-0.43300548], dtype=float32), array([[1.0518758]], dtype=float32), array([0.02616658], dtype=float32), array([[1.2921655]], dtype=float32), array([0.4230025], dtype=float32)]


# Single Input, Multiple Output, Interface. But all quantiles, share the same input

In [30]:
from keras.layers import Input, Dense
from keras.models import Model
from mlsquare.losses.keras import quantile_ensemble_loss
x = Input(shape=(1,))
q = Dense(3,activation='linear',name='q1')(x)
model = Model(inputs=x, outputs=q)
quantiles = np.array([0.1,0.5,0.9])
loss = quantile_ensemble_loss(quantile=quantiles)
model.compile(optimizer='sgd', metrics=['mae'],loss=loss)
# Fit the model
model.fit(X, Y, epochs=500, batch_size=20,verbose=0)
# evaluate the model
scores = model.evaluate(X,Y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
print(model.get_weights())


mean_absolute_error: 41.57%
[array([[0.6503008, 1.0055718, 1.1047503]], dtype=float32), array([-0.51633525,  0.02011111,  0.40517914], dtype=float32)]


# Single Input, Multiple Output, Interface. Quantile can be fed by arbitraty DNNs. In the end, cross-over penalty is applied

In [43]:
from keras.layers import Input, Dense, Concatenate, concatenate
from keras.models import Model
from keras import initializers
x = Input(shape=(1,))
q1 = Dense(1,activation='linear',name='q1')(x)
q2 = Dense(1,activation='linear',name='q2')(x)
q3 = Dense(1,activation='linear',name='q3')(x)
cat = concatenate([q1,q2,q3])
q = Dense(3,activation='linear',name='final',kernel_initializer=initializers.Identity(gain=1.0),bias_initializer=initializers.Zeros(),trainable=False)(cat)
model = Model(inputs=x, outputs=q)
quantiles = np.array([0.1,0.5,0.9])
loss = quantile_ensemble_loss(quantile=quantiles)
model.compile(optimizer='sgd', metrics=['mae'],loss=loss)
# Fit the model
model.fit(X, Y, epochs=500, batch_size=20,verbose=0)
# evaluate the model
scores = model.evaluate(X, Y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
print(model.get_weights())


mean_absolute_error: 40.44%
[array([[0.69779]], dtype=float32), array([-0.47458422], dtype=float32), array([[1.0511156]], dtype=float32), array([0.02738887], dtype=float32), array([[1.29023]], dtype=float32), array([0.42405877], dtype=float32), array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32), array([0., 0., 0.], dtype=float32)]
