In [1]:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model


In [2]:
DEFAULT_EMB_NAME = 'easytc_embedding'

def make_embedding(inputs,
                   vocab_size: int,
                   embed_size: int,
                   maxlen: int,
                   weights=None):
    """A simple wrapper function to create embedding. 
    All it dose is check whether to initialize with weights.

    Arguments:
        vocab_size {int} -- vocab size
        embed_size {int} -- embed size
        maxlen {int} -- max seqence length

    Keyword Arguments:
        weights {None or np array} -- Initialize weight of embedding.  (default: {None})

    Returns:
        Tensor -- Embedding tensor
    """

    if weights is not None:
        embed = tf.keras.layers.Embedding(
            input_dim=vocab_size,
            output_dim=embed_size,
            input_length=maxlen,
            weights=[weights],
            name=DEFAULT_EMB_NAME)(inputs)
    else:
        embed = tf.keras.layers.Embedding(
            input_dim=vocab_size,
            output_dim=embed_size,
            input_length=maxlen,
            name=DEFAULT_EMB_NAME)(inputs)
    return embed


def textcnn(num_classes, vocab_size, embed_size, maxlen,
            num_filters, filter_sizes, dropout_rate=0.5,
            embedding_matrix=None):
    """ implemantion of CNN for sentence classification.
    """

    inputs = tf.keras.layers.Input(shape=(maxlen,))
    embed = make_embedding(inputs,
                           vocab_size=vocab_size,
                           embed_size=embed_size,
                           maxlen=maxlen,
                           weights=embedding_matrix)

    pooled_outputs = []
    for fz in filter_sizes:
        conv = tf.keras.layers.Conv1D(
                      num_filters, fz,
                      padding='valid',
                      activation="relu")(embed)
        pool = tf.keras.layers.GlobalMaxPooling1D()(conv)
        #pool = KMaxPooling(k=2)(conv)
        pooled_outputs.append(pool)
    h_pool = tf.keras.layers.Concatenate()(pooled_outputs)

    dense = tf.keras.layers.Dense(num_classes)(h_pool)
    dropout = tf.keras.layers.Dropout(1-dropout_rate)(dense)
    outputs = tf.keras.layers.Activation('softmax')(dropout)

    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    return model

In [3]:
m = textcnn(num_classes=2, vocab_size=10000, embed_size=50, maxlen=50,
            num_filters=150, filter_sizes=[2,3,4])
print(m.summary())

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 50)]         0                                            
__________________________________________________________________________________________________
easytc_embedding (Embedding)    (None, 50, 50)       500000      input_1[0][0]                    
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 49, 150)      15150       easytc_embedding[0][0]           
_____________________________________________________________________________________________

In [102]:
(450+1)*2

902

In [103]:
class MinimalRNNCell(tf.keras.layers.Layer):

    def __init__(self, units, **kwargs):
        self.units = units
        self.state_size = units
        super(MinimalRNNCell, self).__init__(**kwargs)

    def build(self, input_shape):
        self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
                                      initializer='uniform',
                                      name='kernel')
        self.recurrent_kernel = self.add_weight(
            shape=(self.units, self.units),
            initializer='uniform',
            name='recurrent_kernel')
        
        self.bias = self.add_weight(
            shape=(1, self.units),
            initializer='uniform',
            name='recurrent_kernel')
        self.built = True

    def call(self, inputs, states):
        prev_output = states[0]
        h = K.dot(inputs, self.kernel)
        output = h + K.dot(prev_output, self.recurrent_kernel) + self.bias
        return output, [output]


In [104]:
inputs = tf.keras.layers.Input(shape=(10,5,))
rnn = tf.keras.layers.RNN(MinimalRNNCell(10))
outputs = rnn(inputs)

model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
print(model.summary())

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 10, 5)]           0         
_________________________________________________________________
rnn_1 (RNN)                  (None, 10)                160       
Total params: 160
Trainable params: 160
Non-trainable params: 0
_________________________________________________________________
None


In [55]:
import numpy as np
def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
    return pos * angle_rates

def positional_encoding(position, d_model):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(d_model)[np.newaxis, :],
                            d_model)
  
    # 将 sin 应用于数组中的偶数索引（indices）；2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
  
    # 将 cos 应用于数组中的奇数索引；2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    
    pos_encoding = angle_rads[np.newaxis, ...]
    return pos_encoding

res = positional_encoding(100, 10)
print(res.shape)

(1, 100, 10)


In [71]:
# 生成d_model的 1/（10000^(2i/dmodel))
length = 100
d_model = 512

pos = np.array([[i] for i in range(length)]) # (length, 1)

d_is = np.arange(d_model)
rates = 1 / np.power(10000, 2*(d_is//2) / np.float32(d_model))
rates = np.expand_dims(rates, axis=0) # (1, d_model)

pes = pos * rates
print(pes.shape) # (length, d_model)

pes[:, 0::2] = np.sin(pes[:, 0::2]) # 2i 位置sin变换
pes[:, 1::2] = np.sin(pes[:, 1::2]) # 2i+1 位置cos变换


(100, 512)
(100, 512)


In [65]:
angle_rads = get_angles(np.arange(100)[:, np.newaxis],
                        np.arange(10)[np.newaxis, :],
                        10)
print(angle_rads.shape)
print(angle_rads[5])

(100, 10)
[5.00000000e+00 5.00000000e+00 7.92446596e-01 7.92446596e-01
 1.25594322e-01 1.25594322e-01 1.99053585e-02 1.99053585e-02
 3.15478672e-03 3.15478672e-03]


In [28]:
i = np.arange(10)[np.newaxis, :]
print(i.shape)

(1, 10)


In [30]:
angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(10))

In [31]:
print(angle_rates.shape)

(1, 10)


In [32]:
pos = np.arange(100)[:, np.newaxis]

In [33]:
print(pos.shape)

(100, 1)


In [35]:
r = pos*angle_rates

In [36]:
print(r.shape)

(100, 10)


In [89]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    v = np.exp(x - x.max(axis=-1, keepdims=True))    
    return v / v.sum(axis=-1, keepdims=True)

def scaled_dot_product_attention(q, k, v, mask=None):
    qk = np.matmul(q, np.transpose(k)) # matmul
    dk = k.shape[-1]
    s_a_l = qk / np.sqrt(dk) # scale
    if mask:
        s_a_l += (mask*-1e9) # mask
    attention_weights = softmax(s_a_l) # softmax
    output = np.matmul(attention_weights, v)
    return output

q = np.random.rand(10,10)
k = np.random.rand(20,10)
v = np.random.rand(20,50)

output = scaled_dot_product_attention(q, k, v)

print(output.shape)
print(output[0])


(10, 50)
[0.43278054 0.57456335 0.43982745 0.56008432 0.56474883 0.50937905
 0.42628247 0.55650716 0.41340909 0.47868524 0.44457602 0.427981
 0.44789383 0.49969561 0.46462884 0.51235246 0.49596763 0.43443592
 0.53966605 0.4748008  0.59409979 0.59771888 0.51035007 0.52228274
 0.59524209 0.49502827 0.50830026 0.47618258 0.54661573 0.48640033
 0.34033975 0.52204994 0.65990259 0.42549693 0.46292148 0.37291171
 0.53471167 0.47048881 0.65243824 0.47722346 0.46430436 0.52954279
 0.44462303 0.56845949 0.48579302 0.50170649 0.52376332 0.49521957
 0.61476987 0.46595983]


In [91]:
x = np.array([[1,2,3], [4,5,6]])
print(x.max(axis=-1, keepdims=True))
x - x.max(axis=-1, keepdims=True)

[[3]
 [6]]


array([[-2, -1,  0],
       [-2, -1,  0]])

In [93]:
np.array([1,2,3]) - np.array([3])

array([-2, -1,  0])