In [138]:
import keras.backend as K
from keras.layers import Layer, Dense, TimeDistributed, Concatenate, InputSpec, Wrapper, RNN,Conv1D,Lambda,Add,Input
import numpy as np
import tensorflow as tf

In [167]:
class ScaledDotProductAttention(Layer):
    def __init__(self,**kwargs):
        super(ScaledDotProductAttention,self).__init__(**kwargs)
    def call(self,x):
        """
            Attention(Q,K,V)=softmax(Q*K^T / sqrt(d_k))*V
        """
        q,k,v=x
        
        d_k=q.shape.as_list()[2]
        
        weights=K.batch_dot(q,k,axes=[2,2])
        
        weights=K.softmax(weights/np.sqrt(d_k))
        
        output=K.batch_dot(weights,v)
        
        return output

In [175]:
class MultiHeadAttention(Layer):
    def __init__(self,h,d_k,**kwargs):
        self.h=h
        self.d_k=d_k
        self.d_v=d_k
        self.d_model=self.h*d_k
        self._q_layers=[]
        self._k_layers=[]
        self._v_layers=[]
        self.sdpa_layer=ScaledDotProductAttention()
        self._output=TimeDistributed(Dense(self.d_model))
        for _ in range(self.h):
            self._q_layers.append(TimeDistributed(Dense(self.d_k,activation="relu",use_bias=False)))
            self._k_layers.append(TimeDistributed(Dense(self.d_k,activation="relu",use_bias=False)))
            self._v_layers.append(TimeDistributed(Dense(self.d_v,activation="relu",use_bias=False)))
            
        super(MultiHeadAttention,self).__init__(**kwargs)
    def build(self,input_shape):
        
        super(MultiHeadAttention, self).build(input_shape)
    
    def call(self,x):
        """
            MultiHeadAttention(q,k,v)=concat(head_1,...head_h)*W_0
            head_i=Attention(q*W_q_i,k*W_k_i,v*W_v_i)
        """
        [q,k,v]=x
        print(q.shape)
        outputs=[]
        attentions=[]
        for i in range(self.h):
            qi=self._q_layers[i](q)
            ki=self._k_layers[i](k)
            vi=self._v_layers[i](v)
            output=self.sdpa_layer([qi,ki,vi])
            outputs.append(output)
        
        concatenated_outputs=Concatenate()(outputs)
        output=self._output(concatenated_outputs)
        return output
        

In [268]:
class PositionWiseFeedForward(Layer):
    def __init__(self,d_model=512,d_ff=2048,**kwargs):
        self.d_model=d_model,
        self.d_ff=d_ff
        self.conv1=Dense(units=d_ff,activation='relu')
        self.conv2=Dense(units=d_model)
        super(PositionWiseFeedForward,self).__init__(**kwargs)
        
    def build(self,input_shape):
        super(PositionWiseFeedForward,self).build(input_shape)
        
    def call(self,x):
        temp_x=self.conv1(x)
        print(temp_x.shape)
        x=self.conv2(temp_x)
        print(x.shape)
        return x
        

In [269]:
class LayerNormalization(Layer):
    def __init__(self,**kwargs):
        super(LayerNormalization,self).__init__(**kwargs)
    
    def build(self,input_shape):
        self.w=self.add_weight(name='normalization_weights',shape=(input_shape[-1],),initializer=Ones(),trainable=True)
        self.b=self.add_weight(name='bias',shape=(input_shape[-1],),initializer=Zeros(),trainable=True)
        super(LayerNormalization,self).build(input_shape)
    
    def call(self,x):
        print(x.shape)
        mean=K.mean(x,axis=-1)
        std=K.std(x,axis=-1)
        print(std.shape)
        output=(x-mean)/(std+1e-8)
        return output

In [270]:
class EncoderLayer(Layer):
    def __init__(self,h=8,d_k=64,d_hidden=2048,**kwargs):
        self.h=h
        self.d_k=d_k
        self.d_model=self.h*self.d_k
        
        self.d_hidden=d_hidden
        self.mha=MultiHeadAttention(self.h,self.d_k)
        #self.ln_1=LayerNormalization()
        self.add_1=Add()
        self.ffwd=PositionWiseFeedForward(d_model=self.d_model,d_ff=self.d_hidden)
        #self.ln_2=LayerNormalization()
        self.add_2=Add()
        super(EncoderLayer,self).__init__(**kwargs)
        
        
    def call(self,x):
        y=self.mha([x,x,x])
        print(y.shape)
        y=self.add_1([x,y])
        #y=self.ln_1(y)
        
        x=self.ffwd(y)
        x=self.add_2([x,y])
        #y=self.ln_2(x)
        
        return x
        
        

In [271]:
x=Input(shape=(150,8))

In [272]:
temp=PositionWiseFeedForward(d_model=8,d_ff=64)

In [273]:
temp_o=temp(x)

(?, 150, 64)


ValueError: setting an array element with a sequence.

In [259]:
temp_o.shape

TensorShape([Dimension(None), Dimension(150), Dimension(8)])

In [260]:
x.shape

TensorShape([Dimension(None), Dimension(150), Dimension(1)])

In [180]:
class DecoderLayer(Layer):
    def __init__(self,h=8,d_k=64,d_hidden=2048,**kwargs):
        self.h=h
        self.d_k=64
        self.d_model=self.h*self.d_k
        self.d_hidden=d_hidden
        self.mha_1=MultiHeadAttention(self.h,self.d_k)
        self.ln_1=LayerNormalization()
        self.add_1=Add()
        self.mha_2=MultiHeadAttention(self.h,self.d_k)
        self.ln_2=LayerNormalization()
        self.add_2=Add()
        self.ffwd=PositionWiseFeedForward(d_model=self.d_model,d_ff=self.d_hidden)
        self.ln_3=LayerNormalization()
        self.add_3=Add()
        super(DecoderLayer,self).__init__(**kwargs)
    
    def call(self,x,encoder_output):
        
        y=self.mha_1([x,x,x])
        y=self.add_1([x,y])
        y=self.ln_1(y)
        
        x=self.mha_2([encoder_output,encoder_output,y])
        x=self.add_2([x,y])
        x=self.ln_2(x)
        
        y=self.ffwd(x)
        y=self.add_3([x,y])
        y=self.ln_3(y)
        
        return y
        

In [181]:
class Encoder(Layer):
    def __init__(self,n=6,h=8,d_k=64,d_hidden=2048,**kwargs):
        self.n=n
        self.h=h
        self.d_k=d_k
        self.d_hidden=d_hidden
        self.layers=[]
        for i in range(n):
            self.layers.append(EncoderLayer(h=self.h,d_k=self.d_k,d_hidden=self.d_hidden))
        super(Encoder,self).__init__(**kwargs)
    
    def call(self,x):
        for layer in self.layers:
            x=layer(x)
        return x

In [182]:
class Decoder(Layer):
    def __init__(self,n=6,h=8,d_k=64,d_hidden=2048,**kwargs):
        self.n=n
        self.h=h
        self.d_k=d_k
        self.d_hidden=d_hidden
        self.layers=[]
        for i in range(n):
            self.layers.append(DecoderLayer(h=self.h,d_k=self.d_k,d_hidden=self.d_hidden))
        super(Decoder,self).__init__(**kwargs)
        
    def call(self,x,encoder_output):
        for layer in self.layers:
            x=layer(x,encoder_output)
        return x

In [183]:
from keras.utils import np_utils

In [106]:
import keras.preprocessing.sequence as sq

In [39]:
npzdata=np.load("data.npz")
data=npzdata['data']
labels=npzdata['labels']

In [40]:
maxlen=150
data=sq.pad_sequences(data,maxlen=maxlen,padding='post',truncating='post',dtype='float64')

In [41]:
data=data.reshape(data.shape[0],data.shape[1],1)

In [42]:
data.shape

(249996, 150, 1)

In [43]:
dict_label={}
n=0
set_labels=list(set(labels))
for l in set_labels:
    dict_label[l]=n
    n+=1

In [44]:
labels_new=[]
for l in labels:
    labels_new.append(dict_label[l])

label_1=np_utils.to_categorical(labels_new)

In [45]:
def generateData(data,labels,batch_size=32):
    nb_instances = data.shape[0]
    nb_classes = labels.shape[1]
    sample_shape = data[0].shape
    batch_data_shape = tuple([batch_size] + list(sample_shape))
    batch_label_shape = (batch_size, nb_classes)
    # Infinite loop
    while True:
        # Generate an exploration order
        indices = np.arange(nb_instances)

        np.random.shuffle(indices)

        # Generate batches
        imax = int(len(indices) / batch_size)
        for i in range(imax):
            # Form a batch
            x = np.empty(batch_data_shape)
            y = np.empty(batch_label_shape)
            for j, k in enumerate(indices[i * batch_size: (i + 1) * batch_size]):
                x[j] = data[k]
                y[j] = labels[k]
            if x.shape != batch_data_shape:
                print(x.shape)
                exit(0)
            yield x, y

In [46]:
nb_classes=label_1.shape[1]

In [47]:
from sklearn import model_selection
X_train, X_test, y_train, y_test =model_selection.train_test_split(data, label_1, test_size=0.33)
trainGen=generateData(X_train,y_train)
valGen=generateData(X_test,y_test)

In [48]:
X_train.shape[1:]

(150, 1)

In [195]:
from keras.layers import Input,Dense
from keras.models import Model
from keras.initializers import Ones, Zeros

In [196]:
x=Input(shape=(150,1),name='input')

In [197]:
enc=Encoder(n=6,h=8,d_k=1,d_hidden=64)

In [198]:
dec=Decoder(n=6,h=8,d_k=1,d_hidden=64)

In [200]:
enc_out=enc(x)

(?, 150, 1)


ValueError: Dimensions must be equal, but are 512 and 150 for 'encoder_15_1/encoder_layer_85/layer_normalization_409/sub_1' (op: 'Sub') with input shapes: [?,150,512], [?,150].

In [135]:
temp=ScaledDotProductAttention()

In [137]:
temp_out=temp(x)

TypeError: Tensor objects are not iterable when eager execution is not enabled. To iterate over this tensor use tf.map_fn.