In [1]:
import tensorflow as tf
import os
import tensorflow_datasets as tfds

In [2]:
embedding_layer = tf.keras.layers.Embedding(1000,5)

In [3]:
result = embedding_layer(tf.constant([1,2,3]))  # 알아서 1,2,3 번 들고나옴

In [4]:
DIRECTORY_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/illiad/'
FILE_NAMES = ['cowper.txt', 'derby.txt', 'butler.txt']

for name in FILE_NAMES:
    text_dir = tf.keras.utils.get_file(name, origin=DIRECTORY_URL+name)

parent_dir = os.path.dirname(text_dir)
parent_dir

'C:\\Users\\LeeWonSeok\\.keras\\datasets'

In [5]:
def labeler(example, index):
    return example, tf.cast(index, tf.int64)  

labeled_data_sets = []

for i, file_name in enumerate(FILE_NAMES):
    lines_dataset = tf.data.TextLineDataset(os.path.join(parent_dir, file_name))
    labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i))
    labeled_data_sets.append(labeled_dataset)

In [6]:
BUFFER_SIZE = 50000
BATCH_SIZE = 64
TAKE_SIZE = 5000

In [7]:
all_labeled_data = labeled_data_sets[0]
for labeled_dataset in labeled_data_sets[1:]:
    all_labeled_data = all_labeled_data.concatenate(labeled_dataset)

all_labeled_data = all_labeled_data.shuffle(
    BUFFER_SIZE, reshuffle_each_iteration=False)  
# reshuffle_each_iteration=False iter 할때마다 shffle안하는듯

In [8]:
for ex in all_labeled_data.take(5):
  print(ex)

(<tf.Tensor: id=89, shape=(), dtype=string, numpy=b'It was when they were doing the last part of the course on their way'>, <tf.Tensor: id=90, shape=(), dtype=int64, numpy=2>)
(<tf.Tensor: id=91, shape=(), dtype=string, numpy=b"Enrag'd, Achilles Hector shall subdue;">, <tf.Tensor: id=92, shape=(), dtype=int64, numpy=1>)
(<tf.Tensor: id=93, shape=(), dtype=string, numpy=b'They all shall fight; and if thou fail, shalt know'>, <tf.Tensor: id=94, shape=(), dtype=int64, numpy=1>)
(<tf.Tensor: id=95, shape=(), dtype=string, numpy=b'Saved him, and with the golden \xc3\xa6gis broad'>, <tf.Tensor: id=96, shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: id=97, shape=(), dtype=string, numpy=b'and killed Iphidamas by striking him on the neck. So there the poor'>, <tf.Tensor: id=98, shape=(), dtype=int64, numpy=2>)


## Encode text lines as numbers

Machine learning models work on numbers, not words, so the string values need to be converted into lists of numbers. To do that, map each unique word to a unique integer.

### Build vocabulary

First, build a vocabulary by tokenizing the text into a collection of individual unique words. There are a few ways to do this in both TensorFlow and Python. For this tutorial:

1. Iterate over each example's `numpy` value.
2. Use `tfds.features.text.Tokenizer` to split it into tokens.
3. Collect these tokens into a Python set, to remove duplicates.
4. Get the size of the vocabulary for later use.

In [9]:
tokenizer = tfds.features.text.Tokenizer()

vocabulary_set = set()
# 유니크한 token update
for text_tensor, _ in all_labeled_data:
    some_tokens = tokenizer.tokenize(text_tensor.numpy())
    vocabulary_set.update(some_tokens)

vocab_size = len(vocabulary_set)
vocab_size

17178

### Encode examples

Create an encoder by passing the `vocabulary_set` to `tfds.features.text.TokenTextEncoder`. The encoder's `encode` method takes in a string of text and returns a list of integers.

In [10]:
encoder = tfds.features.text.TokenTextEncoder(vocabulary_set)

In [11]:
example_text = next(iter(all_labeled_data))[0].numpy()
print(example_text)

b'It was when they were doing the last part of the course on their way'


In [12]:
encoded_example = encoder.encode(example_text)
print(encoded_example) # 단어 Token 번호로 encoding

[6205, 13844, 2444, 528, 13878, 2861, 16294, 16660, 14568, 923, 16294, 14306, 889, 5258, 7986]


In [13]:
def encode(text_tensor, label):
    encoded_text = encoder.encode(text_tensor.numpy())
    return encoded_text, label

def encode_map_fn(text, label):
    return tf.py_function(encode, inp=[text, label], Tout=(tf.int64, tf.int64))
# python 사용자 정의함수를 사용할수있게 해주는ㄴ tf.py_function

all_encoded_data = all_labeled_data.map(encode_map_fn)

In [14]:
train_data = all_encoded_data.skip(TAKE_SIZE).shuffle(BUFFER_SIZE) # skip skip 하는만큼
train_data = train_data.padded_batch(BATCH_SIZE, padded_shapes=([-1],[]))

test_data = all_encoded_data.take(TAKE_SIZE) # take는 take 하는만큼 가지고옴
test_data = test_data.padded_batch(BATCH_SIZE, padded_shapes=([-1],[]))
# padded_batch는 각 문서마다 길이 다른걸 고려한 거임


In [15]:
sample_text, sample_labels = next(iter(test_data))

sample_text[0], sample_labels[0]

(<tf.Tensor: id=99562, shape=(16,), dtype=int64, numpy=
 array([ 6205, 13844,  2444,   528, 13878,  2861, 16294, 16660, 14568,
          923, 16294, 14306,   889,  5258,  7986,     0], dtype=int64)>,
 <tf.Tensor: id=99566, shape=(), dtype=int64, numpy=2>)

In [16]:
vocab_size += 1 # for zero padding

In [227]:
model = tf.keras.Sequential()

In [228]:
embedding_dim = 100
model.add(tf.keras.layers.Embedding(vocab_size,embedding_dim))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))
model.add(tf.keras.layers.Dense(64,activation='relu'))
model.add(tf.keras.layers.Dense(64,activation='relu'))
model.add(tf.keras.layers.Dense(3,activation="softmax"))

In [229]:
model.summary()

Model: "sequential_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_20 (Embedding)     (None, None, 100)         1717900   
_________________________________________________________________
bidirectional_19 (Bidirectio (None, 128)               84480     
_________________________________________________________________
dense_33 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_34 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_35 (Dense)             (None, 3)                 195       
Total params: 1,814,991
Trainable params: 1,814,991
Non-trainable params: 0
_________________________________________________________________


In [230]:
model.compile(optimizer="adam",loss="sparse_categorical_crossentropy",
             metrics=["accuracy"])

In [231]:
history=model.fit(train_data,epochs=5,validation_data=test_data)

Epoch 1/5


    168/Unknown - 16s 16s/step - loss: 1.0998 - accuracy: 0.29 - 16s 8s/step - loss: 1.0954 - accuracy: 0.3438 - 16s 5s/step - loss: 1.0907 - accuracy: 0.354 - 16s 4s/step - loss: 1.0893 - accuracy: 0.335 - 16s 3s/step - loss: 1.0857 - accuracy: 0.337 - 16s 3s/step - loss: 1.0820 - accuracy: 0.333 - 16s 2s/step - loss: 1.0799 - accuracy: 0.339 - 16s 2s/step - loss: 1.0746 - accuracy: 0.353 - 16s 2s/step - loss: 1.0721 - accuracy: 0.357 - 16s 2s/step - loss: 1.0696 - accuracy: 0.354 - 17s 2s/step - loss: 1.0657 - accuracy: 0.353 - 17s 1s/step - loss: 1.0605 - accuracy: 0.367 - 17s 1s/step - loss: 1.0601 - accuracy: 0.366 - 17s 1s/step - loss: 1.0580 - accuracy: 0.371 - 17s 1s/step - loss: 1.0511 - accuracy: 0.379 - 17s 1s/step - loss: 1.0445 - accuracy: 0.389 - 17s 988ms/step - loss: 1.0371 - accuracy: 0.39 - 17s 936ms/step - loss: 1.0336 - accuracy: 0.39 - 17s 889ms/step - loss: 1.0294 - accuracy: 0.40 - 17s 847ms/step - loss: 1.0201 - accuracy: 0.41 - 17s 808ms/step - loss: 1.0099 - a

    502/Unknown - 31s 93ms/step - loss: 0.5980 - accuracy: 0.697 - 31s 93ms/step - loss: 0.5980 - accuracy: 0.697 - 31s 93ms/step - loss: 0.5975 - accuracy: 0.697 - 31s 92ms/step - loss: 0.5969 - accuracy: 0.698 - 31s 92ms/step - loss: 0.5966 - accuracy: 0.698 - 31s 92ms/step - loss: 0.5959 - accuracy: 0.699 - 31s 92ms/step - loss: 0.5954 - accuracy: 0.699 - 31s 92ms/step - loss: 0.5952 - accuracy: 0.699 - 32s 92ms/step - loss: 0.5946 - accuracy: 0.699 - 32s 91ms/step - loss: 0.5944 - accuracy: 0.700 - 32s 91ms/step - loss: 0.5940 - accuracy: 0.700 - 32s 91ms/step - loss: 0.5936 - accuracy: 0.700 - 32s 91ms/step - loss: 0.5935 - accuracy: 0.700 - 32s 91ms/step - loss: 0.5931 - accuracy: 0.700 - 32s 91ms/step - loss: 0.5928 - accuracy: 0.701 - 32s 91ms/step - loss: 0.5923 - accuracy: 0.701 - 32s 91ms/step - loss: 0.5919 - accuracy: 0.701 - 32s 90ms/step - loss: 0.5915 - accuracy: 0.702 - 32s 90ms/step - loss: 0.5915 - accuracy: 0.702 - 32s 90ms/step - loss: 0.5914 - accuracy: 0.702 - 32

    669/Unknown - 39s 77ms/step - loss: 0.5420 - accuracy: 0.734 - 39s 77ms/step - loss: 0.5417 - accuracy: 0.734 - 39s 77ms/step - loss: 0.5412 - accuracy: 0.734 - 39s 77ms/step - loss: 0.5411 - accuracy: 0.734 - 39s 77ms/step - loss: 0.5409 - accuracy: 0.734 - 39s 77ms/step - loss: 0.5406 - accuracy: 0.735 - 39s 77ms/step - loss: 0.5403 - accuracy: 0.735 - 39s 77ms/step - loss: 0.5399 - accuracy: 0.735 - 39s 77ms/step - loss: 0.5396 - accuracy: 0.735 - 39s 77ms/step - loss: 0.5394 - accuracy: 0.735 - 39s 77ms/step - loss: 0.5390 - accuracy: 0.735 - 39s 77ms/step - loss: 0.5388 - accuracy: 0.735 - 39s 77ms/step - loss: 0.5385 - accuracy: 0.736 - 40s 77ms/step - loss: 0.5382 - accuracy: 0.736 - 40s 77ms/step - loss: 0.5381 - accuracy: 0.736 - 40s 77ms/step - loss: 0.5378 - accuracy: 0.736 - 40s 76ms/step - loss: 0.5376 - accuracy: 0.736 - 40s 76ms/step - loss: 0.5372 - accuracy: 0.737 - 40s 76ms/step - loss: 0.5368 - accuracy: 0.737 - 40s 76ms/step - loss: 0.5364 - accuracy: 0.737 - 40

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [232]:
eval_loss, eval_acc = model.evaluate(test_data)





In [233]:
print('\nEval loss: {:.3f}, Eval accuracy: {:.3f}'.format(eval_loss, eval_acc))


Eval loss: 0.463, Eval accuracy: 0.837


In [27]:
model = tf.keras.Sequential()

In [28]:
embedding_dim = 100
model.add(tf.keras.layers.Embedding(vocab_size,embedding_dim)) # 
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,return_sequences=True)))
#model.add(AttentionLayer(128,50,3,0.01))
model.add(AttentionWithContext())
model.add(tf.keras.layers.Dense(64,activation='relu'))
model.add(tf.keras.layers.Dense(64,activation='relu'))
model.add(tf.keras.layers.Dense(3,activation="softmax"))

In [29]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 100)         1717900   
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 128)         84480     
_________________________________________________________________
attention_with_context (Atte (None, 128)               16640     
_________________________________________________________________
dense_3 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_4 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 195       
Total params: 1,831,631
Trainable params: 1,831,631
Non-trainable params: 0
____________________________________________

In [30]:
model.compile(optimizer="adam",loss="sparse_categorical_crossentropy",
             metrics=["accuracy"])

In [31]:
model.trainable_variables

[<tf.Variable 'embedding_2/embeddings:0' shape=(17179, 100) dtype=float32, numpy=
 array([[ 0.0349843 ,  0.04488614,  0.03097877, ..., -0.0277553 ,
         -0.0226357 ,  0.04396429],
        [-0.01445959,  0.04020934,  0.04250174, ...,  0.00063106,
         -0.02241715, -0.01841908],
        [-0.00345569, -0.03979648, -0.0403442 , ..., -0.02423195,
         -0.00714089, -0.02077893],
        ...,
        [ 0.03167503, -0.02202144, -0.00708278, ...,  0.00565876,
          0.02888315, -0.02882534],
        [-0.03373444, -0.04459515,  0.01304585, ..., -0.00698472,
         -0.03796037,  0.01844081],
        [ 0.00186817,  0.0140711 ,  0.03505471, ...,  0.04577067,
         -0.03590371,  0.02632571]], dtype=float32)>,
 <tf.Variable 'bidirectional_1/forward_lstm_1/kernel:0' shape=(100, 256) dtype=float32, numpy=
 array([[-0.07243173,  0.01996113,  0.11412027, ...,  0.02014096,
          0.0772936 , -0.10448751],
        [-0.0416146 ,  0.02809347,  0.09636031, ..., -0.019767  ,
         -0.

In [32]:
history=model.fit(train_data,epochs=5,validation_data=test_data)

Epoch 1/5


    168/Unknown - 15s 15s/step - loss: 1.0978 - accuracy: 0.42 - 15s 7s/step - loss: 1.0959 - accuracy: 0.3906 - 15s 5s/step - loss: 1.0931 - accuracy: 0.427 - 15s 4s/step - loss: 1.0912 - accuracy: 0.433 - 15s 3s/step - loss: 1.0910 - accuracy: 0.421 - 15s 3s/step - loss: 1.0881 - accuracy: 0.427 - 15s 2s/step - loss: 1.0858 - accuracy: 0.412 - 15s 2s/step - loss: 1.0830 - accuracy: 0.406 - 15s 2s/step - loss: 1.0795 - accuracy: 0.411 - 15s 2s/step - loss: 1.0766 - accuracy: 0.404 - 15s 1s/step - loss: 1.0738 - accuracy: 0.394 - 15s 1s/step - loss: 1.0762 - accuracy: 0.384 - 15s 1s/step - loss: 1.0724 - accuracy: 0.388 - 15s 1s/step - loss: 1.0664 - accuracy: 0.395 - 15s 1s/step - loss: 1.0639 - accuracy: 0.389 - 15s 964ms/step - loss: 1.0572 - accuracy: 0.39 - 15s 910ms/step - loss: 1.0525 - accuracy: 0.39 - 15s 861ms/step - loss: 1.0490 - accuracy: 0.38 - 16s 818ms/step - loss: 1.0433 - accuracy: 0.38 - 16s 778ms/step - loss: 1.0378 - accuracy: 0.38 - 16s 743ms/step - loss: 1.0320 -

    335/Unknown - 21s 122ms/step - loss: 0.7040 - accuracy: 0.62 - 21s 122ms/step - loss: 0.7047 - accuracy: 0.62 - 21s 121ms/step - loss: 0.7036 - accuracy: 0.62 - 21s 121ms/step - loss: 0.7032 - accuracy: 0.62 - 21s 120ms/step - loss: 0.7023 - accuracy: 0.62 - 21s 120ms/step - loss: 0.7016 - accuracy: 0.62 - 21s 119ms/step - loss: 0.7000 - accuracy: 0.62 - 21s 119ms/step - loss: 0.6995 - accuracy: 0.62 - 21s 118ms/step - loss: 0.6994 - accuracy: 0.62 - 21s 118ms/step - loss: 0.6993 - accuracy: 0.62 - 21s 117ms/step - loss: 0.6986 - accuracy: 0.62 - 21s 117ms/step - loss: 0.6978 - accuracy: 0.62 - 21s 116ms/step - loss: 0.6973 - accuracy: 0.63 - 21s 116ms/step - loss: 0.6968 - accuracy: 0.63 - 21s 116ms/step - loss: 0.6961 - accuracy: 0.63 - 21s 115ms/step - loss: 0.6957 - accuracy: 0.63 - 21s 115ms/step - loss: 0.6951 - accuracy: 0.63 - 21s 114ms/step - loss: 0.6944 - accuracy: 0.63 - 21s 114ms/step - loss: 0.6940 - accuracy: 0.63 - 21s 113ms/step - loss: 0.6934 - accuracy: 0.63 - 21

    502/Unknown - 26s 78ms/step - loss: 0.6312 - accuracy: 0.681 - 26s 78ms/step - loss: 0.6312 - accuracy: 0.681 - 26s 78ms/step - loss: 0.6308 - accuracy: 0.681 - 26s 78ms/step - loss: 0.6306 - accuracy: 0.681 - 26s 78ms/step - loss: 0.6301 - accuracy: 0.681 - 26s 78ms/step - loss: 0.6297 - accuracy: 0.682 - 26s 77ms/step - loss: 0.6292 - accuracy: 0.682 - 27s 77ms/step - loss: 0.6291 - accuracy: 0.682 - 27s 77ms/step - loss: 0.6287 - accuracy: 0.682 - 27s 77ms/step - loss: 0.6279 - accuracy: 0.683 - 27s 77ms/step - loss: 0.6278 - accuracy: 0.683 - 27s 77ms/step - loss: 0.6276 - accuracy: 0.683 - 27s 77ms/step - loss: 0.6272 - accuracy: 0.683 - 27s 77ms/step - loss: 0.6265 - accuracy: 0.684 - 27s 76ms/step - loss: 0.6261 - accuracy: 0.684 - 27s 76ms/step - loss: 0.6256 - accuracy: 0.684 - 27s 76ms/step - loss: 0.6251 - accuracy: 0.685 - 27s 76ms/step - loss: 0.6247 - accuracy: 0.685 - 27s 76ms/step - loss: 0.6242 - accuracy: 0.685 - 27s 76ms/step - loss: 0.6239 - accuracy: 0.685 - 27

    669/Unknown - 32s 63ms/step - loss: 0.5803 - accuracy: 0.714 - 32s 63ms/step - loss: 0.5801 - accuracy: 0.715 - 32s 63ms/step - loss: 0.5796 - accuracy: 0.715 - 32s 63ms/step - loss: 0.5794 - accuracy: 0.715 - 32s 63ms/step - loss: 0.5790 - accuracy: 0.715 - 32s 63ms/step - loss: 0.5785 - accuracy: 0.715 - 32s 62ms/step - loss: 0.5783 - accuracy: 0.716 - 32s 62ms/step - loss: 0.5782 - accuracy: 0.716 - 32s 62ms/step - loss: 0.5783 - accuracy: 0.716 - 32s 62ms/step - loss: 0.5780 - accuracy: 0.716 - 32s 62ms/step - loss: 0.5779 - accuracy: 0.716 - 32s 62ms/step - loss: 0.5778 - accuracy: 0.716 - 32s 62ms/step - loss: 0.5775 - accuracy: 0.716 - 32s 62ms/step - loss: 0.5772 - accuracy: 0.717 - 32s 62ms/step - loss: 0.5767 - accuracy: 0.717 - 32s 62ms/step - loss: 0.5762 - accuracy: 0.717 - 32s 62ms/step - loss: 0.5761 - accuracy: 0.717 - 32s 62ms/step - loss: 0.5755 - accuracy: 0.718 - 32s 62ms/step - loss: 0.5752 - accuracy: 0.718 - 32s 62ms/step - loss: 0.5750 - accuracy: 0.718 - 32

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5




In [267]:
model.trainable_variables

[<tf.Variable 'embedding_25/embeddings:0' shape=(17179, 100) dtype=float32, numpy=
 array([[-0.09567659,  0.00166395, -0.1060098 , ...,  0.00327255,
         -0.01806049,  0.04753019],
        [-0.05738601,  0.14040187, -0.0940075 , ..., -0.09722354,
         -0.11437219,  0.09325634],
        [ 0.0654543 , -0.04119615,  0.05365785, ...,  0.04737078,
          0.08713071, -0.04407173],
        ...,
        [-0.07416567,  0.14865762, -0.09810467, ..., -0.09540524,
         -0.11657237,  0.09974745],
        [-0.00858533,  0.0738495 , -0.06046059, ..., -0.07151202,
         -0.06736542,  0.03489744],
        [ 0.03109111, -0.04099759,  0.02248403, ..., -0.00060716,
          0.05503817, -0.05373425]], dtype=float32)>,
 <tf.Variable 'bidirectional_24/forward_lstm_24/kernel:0' shape=(100, 256) dtype=float32, numpy=
 array([[ 0.10242917, -0.0090631 , -0.11622679, ...,  0.02952322,
         -0.02083237, -0.00182961],
        [ 0.02337981, -0.14690922, -0.16672744, ..., -0.03077628,
         

In [33]:
eval_loss, eval_acc = model.evaluate(test_data)





In [34]:
print('\nEval loss: {:.3f}, Eval accuracy: {:.3f}'.format(eval_loss, eval_acc))


Eval loss: 0.495, Eval accuracy: 0.833


In [177]:
tf.constant([np.array([1,2,3])]).numpy()

array([[1, 2, 3]])

In [60]:
import numpy as np

In [64]:
tf.nn.softmax(tf.constant([[1.,2.,3.],[4.,5.,6.]]))

<tf.Tensor: id=1113018, shape=(2, 3), dtype=float32, numpy=
array([[0.09003057, 0.24472848, 0.66524094],
       [0.09003057, 0.24472848, 0.66524094]], dtype=float32)>

In [22]:
tf.multiply(tf.ones([3,10,64]),tf.zeros([3,10,3]))

InvalidArgumentError: Incompatible shapes: [3,10,64] vs. [3,10,3] [Op:Mul]

In [80]:
np.array([tf.random.normal([3,10])]).shape

(1, 3, 10)

In [None]:
#  채널 3개로 각 tiem stamp attention weight append하기
#  [batch,attention_r,timestamp]
#  [timestamp 차원 기준으로 sottmax 적용하기]
#  tranpose 사용하여 [batch,timestamp,attion_r]
#  for 문사용 multiply
#  bi LSTM 결과와 multiply 함 attention_r과
# tf.reduce_sum timestamp 기준으로 결과는 
#  [batch,LSTM_hidden_unitsize]

In [82]:
tf.transpose(np.array([tf.random.normal([3,10])]),(2,1,0))

<tf.Tensor: id=1113371, shape=(10, 3, 1), dtype=float32, numpy=
array([[[-0.17747594],
        [-0.11517678],
        [ 0.13129044]],

       [[ 1.5023729 ],
        [-1.1652751 ],
        [-1.3593    ]],

       [[ 0.18969494],
        [-1.2070906 ],
        [-0.6470972 ]],

       [[-0.45896432],
        [-2.1450145 ],
        [-0.3252337 ]],

       [[-0.11928146],
        [ 0.3295987 ],
        [-1.2945286 ]],

       [[-2.1132977 ],
        [ 0.78426933],
        [ 0.28413895]],

       [[-1.4640176 ],
        [-0.77243257],
        [-0.3816451 ]],

       [[-1.2258996 ],
        [ 1.6782123 ],
        [ 0.22770302]],

       [[ 0.5065589 ],
        [ 0.45654002],
        [-0.86232543]],

       [[ 0.8482531 ],
        [-0.4906019 ],
        [-1.3737457 ]]], dtype=float32)>

In [29]:
init=tf.keras.initializers.glorot_normal()
a=tf.constant(init([10,10,128]))

In [31]:
b =  tf.Variable(init([128,50]))

In [39]:
b_=tf.tile(tf.expand_dims(b,0),[10,2,2])

In [41]:
tf.matmul(a,b_)

InvalidArgumentError: In[0] mismatch In[1] shape: 128 vs. 256: [10,10,128] [10,256,100] 0 0 [Op:BatchMatMulV2] name: MatMul/

In [42]:
X  = tf.keras.Input(shape=(None,784))
#shape = [ tf.shape(X)[k] for k in range(4)]
#Y = tf.reshape(X , [shape[0], shape[1]*shape[2], shape[3]])

In [44]:
shape = [ tf.shape(X)[k] for k in range(3)]

In [47]:
Y = tf.reshape(X , [shape[0], shape[1]*shape[2]])

In [58]:
a=tf.constant([[[1.,5.,7.],[1.,2.,3.]],[[2.,5.,7.],[1.,2.,3.]]])
b=tf.nn.softmax(a,axis=1)

In [213]:
c=init([1,3])
c

<tf.Tensor: id=8475491, shape=(1, 3), dtype=float32, numpy=array([[-0.1343434 ,  0.0753022 ,  0.36052102]], dtype=float32)>

In [214]:
b + c

<tf.Tensor: id=8475492, shape=(2, 2, 3), dtype=float32, numpy=
array([[[0.3656566 , 1.0278764 , 1.3425348 ],
        [0.3656566 , 0.12272807, 0.37850723]],

       [[0.5967152 , 1.0278764 , 1.3425348 ],
        [0.13459803, 0.12272807, 0.37850723]]], dtype=float32)>

In [215]:
0.98201376 + 

<tf.Tensor: id=5716, shape=(2, 2, 3), dtype=float32, numpy=
array([[[0.5       , 0.95257413, 0.98201376],
        [0.5       , 0.04742587, 0.01798621]],

       [[0.7310586 , 0.95257413, 0.98201376],
        [0.26894143, 0.04742587, 0.01798621]]], dtype=float32)>

In [13]:
class AttentionLayer(tf.keras.layers.Layer):
    # 첨 class 선언신 변수생성
    def __init__(self, num_unit,input_units):
        super(AttentionLayer, self).__init__()
        self.num_unit = num_unit
        self.input_units = input_units
        
    def build(self, input_shape):  
        self.kernel1 = self.add_weight("kernel1",
                    initializer=tf.keras.initializers.he_normal(seed=1337),                               
                                  shape=[self.input_units,
                                         self.num_unit])
        self.kernel2 = self.add_weight("kernel2",
                                       shape =[self.num_unit])
                    
        
    def call(self, inputs):
        atten_total = tf.matmul(inputs[:,1,:],self.kernel1)+self.kernel2
        lens=inputs.shape[1]
        for i in range(1,lens):        
            try:
                atten = tf.matmul(inputs[:,i,:],self.kernel1)+self.kernel2
                atten_total = tf.concat([atten,atten_total],axis=1)
            except:
                break
        atten_weights_result = tf.nn.softmax(atten_total)
        #return atten_weights_result
        atten_weights_result = tf.expand_dims(atten_weights_result,2)
        #atten_weights_result = tf.transpose(np.array([atten_weights_result.numpy()]),(1,2,0))
        outputs = tf.multiply(inputs,atten_weights_result)
        return outputs

In [18]:
class AttentionLayer(tf.keras.layers.Layer):
    # 첨 class 선언신 변수생성
    def __init__(self,input_units, num_unit,output_unit,lams):
        super(AttentionLayer, self).__init__()
        self.input_units = input_units
        self.num_unit = num_unit
        self.output_unit = output_unit
        self.lambdas = lams
        
    def build(self, input_shape):  
        self.kernel1 = self.add_weight("kernel1",
                    initializer = tf.keras.initializers.glorot_normal(),
                    regularizer = tf.keras.regularizers.l2(self.lambdas),
                    shape =[self.input_units,self.num_unit])
        self.bias1 = self.add_weight("bias1",initializer = tf.zeros_initializer(),shape=[1,self.num_unit])
        
        
        
        self.kernel2 = self.add_weight("kernel2",
                    initializer = tf.keras.initializers.glorot_normal(),
                    regularizer = tf.keras.regularizers.l2(self.lambdas),
                    shape = [self.num_unit,self.output_unit])
        self.bias2 = self.add_weight("bias2",initializer = tf.zeros_initializer(),shape=[1,self.output_unit])
        
                    
        
    def call(self, inputs):
        shape = [ tf.shape(inputs)[k] for k in range(len(inputs.shape))]
        
        shape_list = [shape[0]] + [1 for i in range(len(inputs.shape)-1)]
        kernel1_ = tf.tile(tf.expand_dims(self.kernel1,0),shape_list)
        kernel2_ = tf.tile(tf.expand_dims(self.kernel2,0),shape_list)
        
        h1 = tf.matmul(inputs,kernel1_) + self.bias1
        h1 = tf.nn.tanh(h1)
        out = tf.matmul(h1,kernel2_) + self.bias2
        out = tf.nn.softmax(out,axis=1)
        flat_output_total = tf.multiply(inputs,tf.expand_dims(out[:,:,0],axis=2))
        flat_output_total = tf.reduce_sum(flat_output_total,axis=1)
        for i in range(1,self.output_unit):
            flat_output= tf.multiply(inputs,tf.expand_dims(out[:,:,i],axis=2))
            flat_output = tf.reduce_sum(flat_output,axis=1)
            flat_output_total = tf.concat([flat_output_total,flat_output],axis=1)
        
        return flat_output_total

In [90]:
cc = AttentionLayer(128,50,5,0.01)

In [93]:
dd=cc(init([50,10,128]))

In [26]:
class AttentionWithContext(tf.keras.layers.Layer):

    def __init__(self,
                 W_regularizer=None, u_regularizer=None, b_regularizer=None,
                 W_constraint=None, u_constraint=None, b_constraint=None,
                 bias=True, **kwargs):


        self.init = tf.keras.initializers.get('glorot_uniform')

        self.W_regularizer = tf.keras.regularizers.get(W_regularizer)
        self.u_regularizer = tf.keras.regularizers.get(u_regularizer)
        self.b_regularizer = tf.keras.regularizers.get(b_regularizer)

        self.W_constraint = tf.keras.constraints.get(W_constraint)
        self.u_constraint = tf.keras.constraints.get(u_constraint)
        self.b_constraint = tf.keras.constraints.get(b_constraint)

        self.bias = bias
        super(AttentionWithContext, self).__init__(**kwargs)


    def build(self, input_shape):
        assert len(input_shape) == 3
        self.W = self.add_weight(shape = (input_shape[-1], input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        
        if self.bias:
            self.b = self.add_weight(shape = (input_shape[-1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        #self.u에 대한 input shape 조정
        self.u = self.add_weight(shape = (input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_u'.format(self.name),
                                 regularizer=self.u_regularizer,
                                 constraint=self.u_constraint)

        super(AttentionWithContext, self).build(input_shape)


    def call(self, x):
        uit = tf.tensordot(x, self.W,1)
        if self.bias:
            uit += self.b
        uit = tf.keras.activations.tanh(uit)
        ait = tf.tensordot(uit, self.u,1)
        a = tf.keras.activations.softmax(ait)
        a = tf.expand_dims(a,-1)
        weighted_input = x * a

        return tf.reduce_sum(weighted_input, axis=1)


    def compute_output_shape(self, input_shape):
        return input_shape[0], input_shape[-1]