In [42]:
import keras.backend as K
from keras.layers import Layer, Dense, TimeDistributed, Concatenate, InputSpec, Wrapper, RNN,Conv1D,Lambda,Add,Input,Flatten
import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.initializers import Ones, Zeros

In [147]:
class ScaledDotProductAttention(Layer):
    def __init__(self,**kwargs):
        super(ScaledDotProductAttention,self).__init__(**kwargs)
    def call(self,x):
        """
            Attention(Q,K,V)=softmax(Q*K^T / sqrt(d_k))*V
        """
        q,k,v=x
        
        d_k=q.shape.as_list()[2]
        weights=K.batch_dot(q,k)
        
        weights=K.softmax(weights/np.sqrt(d_k))
        
        output=K.batch_dot(weights,v)

        return output

In [148]:
class MultiHeadAttention(Layer):
    def __init__(self,h,d_k,**kwargs):
        self.h=h
        self.d_k=d_k
        self.d_v=d_k
        self.d_model=self.h*d_k
        self._q_layers=[]
        self._k_layers=[]
        self._v_layers=[]
        self.sdpa_layer=ScaledDotProductAttention()
        self._output=TimeDistributed(Dense(self.d_model))
        for _ in range(self.h):
            self._q_layers.append(TimeDistributed(Dense(self.d_k,activation="relu",use_bias=False)))
            self._k_layers.append(TimeDistributed(Dense(self.d_k,activation="relu",use_bias=False)))
            self._v_layers.append(TimeDistributed(Dense(self.d_v,activation="relu",use_bias=False)))
            
        super(MultiHeadAttention,self).__init__(**kwargs)
    def build(self,input_shape):
        
        super(MultiHeadAttention, self).build(input_shape)
    
    def call(self,x):
        """
            MultiHeadAttention(q,k,v)=concat(head_1,...head_h)*W_0
            head_i=Attention(q*W_q_i,k*W_k_i,v*W_v_i)
        """
        [q,k,v]=x
        
        outputs=[]
        attentions=[]
        for i in range(self.h):
            qi=self._q_layers[i](q)
            ki=self._k_layers[i](k)
            vi=self._v_layers[i](v)
            output=self.sdpa_layer([qi,ki,vi])
            outputs.append(output)
        
        concatenated_outputs=Concatenate()(outputs)
        output=self._output(concatenated_outputs)
        return output
        

In [181]:
class PositionWiseFeedForward(Layer):
    def __init__(self,d_model=512,d_ff=2048,**kwargs):
        self.d_model=d_model,
        self.d_ff=d_ff
        self.conv1=Conv1D(filters=d_ff,activation='relu',use_bias=True,kernel_size=1)
        self.conv2=Dense(units=d_model,use_bias=True)
        super(PositionWiseFeedForward,self).__init__(**kwargs)
        
    def build(self,input_shape):
        super(PositionWiseFeedForward,self).build(input_shape)
        
    def call(self,x):
        xshape = x.shape
        print(xshape)
        result = tf.reshape(x, [xshape[0].value, xshape[2].value, xshape[1].value])
        
        temp_x=self.conv1(result)
        print(temp_x.shape)
        x=self.conv2(temp_x)
        
        return x
        

In [182]:
class LayerNormalization(Layer):
    def __init__(self, eps=1e-6, **kwargs):
        self.eps = eps
        super(LayerNormalization, self).__init__(**kwargs)
    def build(self, input_shape):
        self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:],
                                     initializer=Ones(), trainable=True)
        self.beta = self.add_weight(name='beta', shape=input_shape[-1:],
                                    initializer=Zeros(), trainable=True)
        super(LayerNormalization, self).build(input_shape)
    def call(self, x):
        mean = K.mean(x, axis=-1, keepdims=True)
        std = K.std(x, axis=-1, keepdims=True)
        return self.gamma * (x - mean) / (std + self.eps) + self.beta

In [183]:
class EncoderLayer(Layer):
    def __init__(self,h=8,d_k=64,d_hidden=2048,**kwargs):
        self.h=h
        self.d_k=d_k
        self.d_model=self.h*self.d_k
        
        self.d_hidden=d_hidden
        self.mha=MultiHeadAttention(self.h,self.d_k)
        self.ln_1=LayerNormalization()
        self.add_1=Add()
        self.ffwd=PositionWiseFeedForward(d_model=self.d_model,d_ff=self.d_hidden)
        self.ln_2=LayerNormalization()
        self.add_2=Add()
        super(EncoderLayer,self).__init__(**kwargs)
        
        
    def call(self,x):
        y=self.mha([x,x,x])
        y=self.add_1([x,y])
        y=self.ln_1(y)
        
        x=self.ffwd(y)
        print(x.shape)
        x=self.add_2([x,y])
        y=self.ln_2(x)
        
        return y
        
        

In [184]:
x=Input(shape=(150,1),batch_shape=(32,150,1),name='input')

In [185]:
temp=EncoderLayer(h=8,d_k=1,d_hidden=64)

In [186]:
temp_o=temp(x)

(32, 150, 8)
(32, 8, 64)
(32, 8, 8)


ValueError: Dimensions must be equal, but are 8 and 150 for 'encoder_layer_48/add_98/add' (op: 'Add') with input shapes: [32,8,8], [32,150,8].

In [None]:
temp_o.shape

In [74]:
x.shape

TensorShape([Dimension(None), Dimension(150), Dimension(1)])

In [12]:
class DecoderLayer(Layer):
    def __init__(self,h=8,d_k=64,d_hidden=2048,**kwargs):
        self.h=h
        self.d_k=64
        self.d_model=self.h*self.d_k
        self.d_hidden=d_hidden
        self.mha_1=MultiHeadAttention(self.h,self.d_k)
        self.ln_1=LayerNormalization()
        self.add_1=Add()
        self.mha_2=MultiHeadAttention(self.h,self.d_k)
        self.ln_2=LayerNormalization()
        self.add_2=Add()
        self.ffwd=PositionWiseFeedForward(d_model=self.d_model,d_ff=self.d_hidden)
        self.ln_3=LayerNormalization()
        self.add_3=Add()
        super(DecoderLayer,self).__init__(**kwargs)
    
    def call(self,inp):
        x,encoder_output=inp
        y=self.mha_1([x,x,x])
        y=self.add_1([x,y])
        y=self.ln_1(y)
        
        x=self.mha_2([encoder_output,encoder_output,y])
        x=self.add_2([x,y])
        x=self.ln_2(x)
        
        y=self.ffwd(x)
        y=self.add_3([x,y])
        y=self.ln_3(y)
        
        return y
        

In [13]:
class Encoder(Layer):
    def __init__(self,n=6,h=8,d_k=64,d_hidden=2048,**kwargs):
        self.n=n
        self.h=h
        self.d_k=d_k
        self.d_hidden=d_hidden
        self.layers=[]
        for i in range(n):
            self.layers.append(EncoderLayer(h=self.h,d_k=self.d_k,d_hidden=self.d_hidden))
        super(Encoder,self).__init__(**kwargs)
    
    def call(self,x):
        for layer in self.layers:
            x=layer(x)
        return x

In [14]:
class Decoder(Layer):
    def __init__(self,n=6,h=8,d_k=64,d_hidden=2048,**kwargs):
        self.n=n
        self.h=h
        self.d_k=d_k
        self.d_hidden=d_hidden
        self.layers=[]
        for i in range(n):
            self.layers.append(DecoderLayer(h=self.h,d_k=self.d_k,d_hidden=self.d_hidden))
        super(Decoder,self).__init__(**kwargs)
        
    def call(self,x):
        y,encoder_output=x
        for layer in self.layers:
            y=layer([y,encoder_output])
        return y

In [15]:
from keras.utils import np_utils

In [16]:
import keras.preprocessing.sequence as sq

In [17]:
npzdata=np.load("data.npz")
data=npzdata['data']
labels=npzdata['labels']

In [18]:
maxlen=150
data=sq.pad_sequences(data,maxlen=maxlen,padding='post',truncating='post',dtype='float64')

In [19]:
data=data.reshape(data.shape[0],data.shape[1],1)

In [20]:
data.shape

(249996, 150, 1)

In [21]:
dict_label={}
n=0
set_labels=list(set(labels))
for l in set_labels:
    dict_label[l]=n
    n+=1

In [22]:
labels_new=[]
for l in labels:
    labels_new.append(dict_label[l])

label_1=np_utils.to_categorical(labels_new)

In [23]:
def generateData(data,labels,batch_size=32):
    nb_instances = data.shape[0]
    nb_classes = labels.shape[1]
    sample_shape = data[0].shape
    batch_data_shape = tuple([batch_size] + list(sample_shape))
    batch_label_shape = (batch_size, nb_classes)
    # Infinite loop
    while True:
        # Generate an exploration order
        indices = np.arange(nb_instances)

        np.random.shuffle(indices)

        # Generate batches
        imax = int(len(indices) / batch_size)
        for i in range(imax):
            # Form a batch
            x = np.empty(batch_data_shape)
            y = np.empty(batch_label_shape)
            for j, k in enumerate(indices[i * batch_size: (i + 1) * batch_size]):
                x[j] = data[k]
                y[j] = labels[k]
            if x.shape != batch_data_shape:
                print(x.shape)
                exit(0)
            yield x, y

In [24]:
nb_classes=label_1.shape[1]

In [25]:
from sklearn import model_selection
X_train, X_test, y_train, y_test =model_selection.train_test_split(data, label_1, test_size=0.33)
trainGen=generateData(X_train,y_train)
valGen=generateData(X_test,y_test)

In [26]:
X_train.shape[1:]

(150, 1)

In [126]:
x=Input(shape=(150,1),batch_shape=(32,150),name='input')

In [127]:
enc=Encoder(n=6,h=8,d_k=1,d_hidden=64)

In [128]:
enc_out=enc(x)

AssertionError: 

In [30]:

flat_l=Flatten()
y_temp=flat_l(enc_out)

In [31]:
y=Dense(nb_classes,activation='softmax',name='output')(y_temp)

In [32]:
model=Model(inputs=[x],outputs=[y])

In [33]:
from keras.optimizers import Adam
metrics=['accuracy']
optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss="categorical_crossentropy",optimizer=optimizer,metrics=metrics)

In [34]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 150, 1)            0         
_________________________________________________________________
encoder_1 (Encoder)          (None, 150, 1)            0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 150)               0         
_________________________________________________________________
output (Dense)               (None, 101)               15251     
Total params: 15,251
Trainable params: 15,251
Non-trainable params: 0
_________________________________________________________________


In [35]:
history=model.fit_generator(generator=trainGen,steps_per_epoch=X_train.shape[0]//32,validation_data=valGen,validation_steps=X_test.shape[0]//32,epochs=1)

Epoch 1/1


InvalidArgumentError: Input to reshape is a tensor with 2457600 values, but the requested shape has 307200
	 [[Node: encoder_1/encoder_layer_2/position_wise_feed_forward_2/dense_53/Reshape_2 = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](encoder_1/encoder_layer_2/position_wise_feed_forward_2/dense_53/MatMul, encoder_1/encoder_layer_2/position_wise_feed_forward_2/dense_53/Reshape_2/shape)]]
	 [[Node: loss/mul/_533 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_2822_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

In [125]:
from tensor2tensor.models.transformer import Transformer

ImportError: This version of TensorFlow Probability requires TensorFlow version >= 1.12.0; Detected an installation of version 1.10.0. Please upgrade TensorFlow to proceed.