### custom DNDT keras layer

* **A typical custom dot product layer--example**

In [4]:
import keras
keras.__version__

'2.3.1'

In [51]:
class MyLayer(Layer):
    def __init__(self, output_dim, **kwargs):
        self.output_dim= output_dim
        super(MyLayer, self).__init__(**kwargs)
    def build(self, input_shape):
        self.kernel = self.add_weight(name= 'kernel', shape=(input_shape[1], self.output_dim),
                                      initializer='uniform', trainable=True)
        self.built= True#super(MyLayer, self).build(input_shape)
    
    def call(self,x):
        return K.dot(x, self.kernel)# shape (1,3)
        
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)#(None, 3)

In [233]:
from keras import backend as K
from keras.engine.topology import Layer

from keras.layers import Input, Dense
from keras.models import Model

In [56]:
l1= Input(shape=(2,))
latent= MyLayer(3)(l1)

In [59]:
mod= Model(inputs=l1, outputs=latent)
mod.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_18 (InputLayer)        (None, 2)                 0         
_________________________________________________________________
my_layer_6 (MyLayer)         (None, 3)                 6         
Total params: 6
Trainable params: 6
Non-trainable params: 0
_________________________________________________________________


In [62]:
wgt= mod.layers[1].get_weights()
wgt #weighhts init by custom layer

[array([[ 0.00611025,  0.02683054,  0.02120567],
        [ 0.03125073, -0.02671376, -0.04855492]], dtype=float32)]

In [74]:
t1= x[:1]
mod.predict(t1)#prediction at end of custom layer

array([[0.0148045 , 0.03222001, 0.01997696]], dtype=float32)

In [71]:
print('shapes: ',t1.shape, wgt[0].shape)

shapes:  (1, 2) (2, 3)


* Cross checking results

In [72]:
import numpy as np
np.dot(t1, wgt)

array([[[0.0148045 , 0.03222001, 0.01997696]]])

____________________

* **Keras implementation of Custom DT layer**

In [294]:
import keras
from keras.layers import Input, Dense
from keras.models import Model
from functools import reduce
from keras import backend as K

from keras.layers import Layer
keras.__version__

'2.3.1'

In [311]:
#check where is num_Class arg passed into initialization into mlsquare.DT?
class DT(Layer):
    def __init__(self, cuts_per_feat, t=0.1, num_class=3, **kwargs):
        self.temperature= t #kwargs.get('t')
        self.num_cut= cuts_per_feat
        self.num_leaf = np.prod(np.array(cuts_per_feat)+1)
        self.num_class= num_class #kwargs.get('num_class')
        super(DT, self).__init__(**kwargs)
    
    def build(self, input_shape):
        self.leaf_score= self.add_weight(shape=(self.num_leaf, self.num_class), initializer='random_uniform',
                             trainable=True)
        self.cut_points_list = [self.add_weight(shape=(cut_val,), initializer='random_uniform', trainable=True) 
                                for cut_val in self.num_cut]
        super(DT, self).build(input_shape) 
        
    def kron_prod(self, a, b):
        res = np.einsum('ij,ik->ijk', a, b)
        res = np.reshape(res, (-1, np.prod(res.shape[1:])))
        return res
    
    def binn(self, x, cut_points, temperature=0.1):
        # x is a N-by-1 matrix (column vector)
        # cut_points is a D-dim vector (D is the number of cut-points)
        # this function produces a N-by-(D+1) matrix, each row has only one element being one and the rest are all zeros
        D = cut_points.shape[0]
        W = np.reshape(np.linspace(1.0, D + 1.0, D + 1), (1,-1))
        cut_points = np.sort(np.array([cut_points]))  # make sure cut_points is monotonically increasing
        b = np.cumsum(np.concatenate([np.array([0]), -cut_points], 0), dtype='float32')
        #b = np.cumsum(np.concatenate([K.variable(value= [[0]]), -cut_points], 0))#, dtype='float32')
        h = tf.matmul(x, W) + b
        res =keras.activations.softmax(h / temperature) 
        #np.exp(h / temperature) / tf.reduce_sum(tf.exp(logits), axis)
        return res
    
    def call(self, inputs):
        leaf = reduce(self.kron_prod, map(lambda z: self.binn(inputs[:, z[0]:z[0] + 1], z[1],
                    self.temperature), enumerate(self.cut_points_list)))
        
        return np.matmul(leaf, self.leaf_score)
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.num_class)    

In [313]:
#l1 =Input(shape=(2,))
#latent= DT([1,1], num_class=3, t=0.1)(l1)

#pred = Dense(3, activation='sigmoid')(latent)

#model_dt_2 = Model(inputs=l1, outputs= pred)
#model_dt_2.summary()

* Above layer has some incosistency in clubbing with a Dense layer that follows.
* Results from above Needs to be evaluated with TF 2.0, alongside results from author's demo.py.

* So the following training is done with a model defined using tf.keras & keras conventions 

In [262]:
from sklearn.datasets import load_iris

iris= load_iris()

In [263]:
x= iris.data
y=iris.target

x= x[:,2:4]#taking Petal width, Petal width

In [170]:
from keras.utils import to_categorical
from keras import losses
from keras import optimizers

y = to_categorical(y)
print('X & y shapes:', x.shape, y.shape)

X & y shapes: (150, 2) (150, 3)


In [163]:
opt= optimizers.Adam()
loss= losses.categorical_crossentropy 
model_dt_2.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [171]:
model_dt_2.fit(x,y,batch_size=8, epochs=16, verbose=1, shuffle=True)

Instructions for updating:
Use tf.cast instead.
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


<keras.callbacks.callbacks.History at 0x7fe3e5f10cf8>

_________


#### `tensorflow.keras` implementation of Custom DT layer: with TF 2.0 

In [15]:
import tensorflow as tf
from tensorflow.keras import layers
from functools import reduce
tf.__version__

'2.0.0'

In [106]:
class DT(layers.Layer):
    """
    --config:    
        input args:
        num_class: no. of classes/ no of units /output dims.
        num_cut: no. of cuts for each feature
        temperature: t
        
        returns:
        tf.Keras DT layer
    """    
    def __init__(self, cuts_per_feat, num_class=3, t=0.1, **kwargs):
        super(DT, self).__init__(**kwargs)
        self.num_class= num_class #determines output dims
        self.num_cut= cuts_per_feat
        self.temperature= t
        self.num_leaf = tf.math.reduce_prod(tf.constant(cuts_per_feat)+1)
        
    def build(self, input_shape):
        assert input_shape[1]==len(self.num_cut), 'Num. of defined cut points and input feature count is unequal; Define cut points for each input feature'
        #Add some assertion input_shape[1]==len(num_cut)
        
        self.leaf_score= self.add_weight(shape=(self.num_leaf, self.num_class), initializer='random_uniform',
                             trainable=True)
        self.cut_points_list = [self.add_weight(shape=(cut_val,), initializer='random_uniform', trainable=True) 
                                for cut_val in self.num_cut]
        
        self.built= True
        #[tf.Variable(tf.random.uniform([i])) for i in self.num_cut]
        
    def kron_prod(self, a, b):
        res = tf.einsum('ij,ik->ijk', a, b)
        res = tf.reshape(res, [-1, tf.math.reduce_prod(res.shape[1:])])
        return res    
    
    def binn(self, x, cut_points, temperature):        
        # x is a N-by-1 matrix (column vector)
        # cut_points is a D-dim vector (D is the number of cut-points)
        # this function produces a N-by-(D+1) matrix, each row has only one element being one and the rest are all zeros
        D = cut_points.get_shape().as_list()[0]
        W = tf.reshape(tf.linspace(1.0, D + 1.0, D + 1), [1, -1])#corresponds to list of no. of cut_points
        #Or use tf.Variable(tf.reshape(tf.linspace(1.0, D + 1.0, D + 1), [1, -1]), trainable=False)
        
        cut_points = tf.sort(cut_points)  # makes sure cut_points is monotonically increasing
        b = tf.cumsum(tf.concat([tf.constant(0.0, shape=[1]), -cut_points], 0))#outputs list os cutpoints as [0,-b1,-b1]
        
        h = tf.matmul(x, W) + b
        res = tf.nn.softmax(h / temperature)
        return res
        
    def call(self, inputs):
        leaf = reduce(self.kron_prod, 
            map(lambda z: self.binn(inputs[:, z[0]:z[0] + 1], z[1], self.temperature), 
                enumerate(self.cut_points_list)))
        return tf.matmul(leaf, self.leaf_score)
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.num_class)
        

In [108]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

l1 =Input(shape=(2))
latent= DT([1,1], num_class=3, t=0.1)(l1)
pred = Dense(3, activation='sigmoid')(latent)

model_dt_3 = Model(inputs=l1, outputs= pred)
model_dt_3.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 2)]               0         
_________________________________________________________________
dt_5 (DT)                    (None, 3)                 14        
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 12        
Total params: 26
Trainable params: 26
Non-trainable params: 0
_________________________________________________________________


In [114]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import losses
from tensorflow.keras import optimizers
from sklearn.datasets import load_iris

iris= load_iris()

x= iris.data
y=iris.target

x= x[:,2:4]#taking Petal width, Petal width
y = to_categorical(y)
print('X & y shapes:', x.shape, y.shape)

X & y shapes: (150, 2) (150, 3)


In [116]:
opt= optimizers.Adam()
loss= losses.categorical_crossentropy 
model_dt_3.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

model_dt_3.fit(x,y,batch_size=8, epochs=16, verbose=1, shuffle=True)

Train on 150 samples
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


<tensorflow.python.keras.callbacks.History at 0x7f1906a4b438>

* **Trying Titatnic dataset(3 input features) with above layer**

In [163]:
import pandas as pd
data2 = pd.read_csv('titanic//train.csv')
data2.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [142]:
from sklearn.preprocessing import LabelEncoder
le= LabelEncoder()
data2['Sex'] = le.fit_transform(data2['Sex'])
data2.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",0,26.0,0,0,STON/O2. 3101282,7.925,,S


In [122]:
data2_cols= list(data2.columns)
data2_cols

['PassengerId',
 'Survived',
 'Pclass',
 'Name',
 'Sex',
 'Age',
 'SibSp',
 'Parch',
 'Ticket',
 'Fare',
 'Cabin',
 'Embarked']

In [155]:
x = data2[['Pclass','Sex','SibSp']].values
y = data2[['Survived']].values

y = to_categorical(y)
print('X & y shapes:', x.shape, y.shape)

X & y shapes: (891, 3) (891, 2)


In [157]:
l1 =Input(shape=(3))
latent= DT([1,1,1], num_class=2, t=0.1)(l1)
pred = Dense(2, activation='sigmoid')(latent)

model_dt_4 = Model(inputs=l1, outputs= pred)
model_dt_4.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 3)]               0         
_________________________________________________________________
dt_11 (DT)                   (None, 2)                 19        
_________________________________________________________________
dense_6 (Dense)              (None, 2)                 6         
Total params: 25
Trainable params: 25
Non-trainable params: 0
_________________________________________________________________


In [158]:
opt= optimizers.Adam()
loss= losses.categorical_crossentropy 
model_dt_4.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

model_dt_4.fit(x,y,batch_size=8, epochs=8, verbose=1, shuffle=True)

Train on 891 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x7f18d7e89dd8>

* varying `cuts_per_features`

In [161]:
l1 =Input(shape=(3))
latent= DT([2,2,3], num_class=2, t=0.1)(l1)
pred = Dense(2, activation='sigmoid')(latent)

model_dt_4 = Model(inputs=l1, outputs= pred)
model_dt_4.summary()

Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        [(None, 3)]               0         
_________________________________________________________________
dt_13 (DT)                   (None, 2)                 79        
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 6         
Total params: 85
Trainable params: 85
Non-trainable params: 0
_________________________________________________________________


In [162]:
opt= optimizers.Adam()
loss= losses.categorical_crossentropy 
model_dt_4.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

model_dt_4.fit(x,y,batch_size=8, epochs=8, verbose=1, shuffle=True)

Train on 891 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x7f18d4764588>

___________