Skip to content

Commit

Permalink
attention
Browse files Browse the repository at this point in the history
  • Loading branch information
philipperemy committed May 23, 2017
1 parent fd72f30 commit 8dafa5f
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -5,7 +5,7 @@ __pycache__/

# C extensions
*.so

.idea/
# Distribution / packaging
.Python
env/
Expand Down
6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added assets/1.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
86 changes: 86 additions & 0 deletions attention.py
@@ -0,0 +1,86 @@
import numpy as np

np.random.seed(1337) # for reproducibility
from keras.models import *
from keras.layers.core import *
from keras.layers import Input, Dense, merge

input_dims = 32


def get_activations(model, inputs, print_shape_only=False):
# Documentation is available online on Github at the address below.
# From: https://github.com/philipperemy/keras-visualize-activations
print('----- activations -----')
activations = []
inp = model.input
outputs = [layer.output for layer in model.layers] # all layer outputs
funcs = [K.function([inp] + [K.learning_phase()], [out]) for out in outputs] # evaluation functions
if len(inputs.shape) == 3:
batch_inputs = inputs[np.newaxis, ...]
else:
batch_inputs = inputs
layer_outputs = [func([batch_inputs, 1.])[0] for func in funcs]
for layer_activations in layer_outputs:
activations.append(layer_activations)
if print_shape_only:
print(layer_activations.shape)
else:
print(layer_activations)
return activations


def build_model():
inputs = Input(shape=(input_dims,))

# ATTENTION PART STARTS HERE
alpha = Dense(32, activation='softmax', name='alpha')(inputs)
r = merge([inputs, alpha], output_shape=32, name='r', mode='mul')
# ATTENTION PART FINISHES HERE

r = Dense(64)(r)
output = Dense(1, activation='sigmoid')(r)
model = Model(input=[inputs], output=output)
return model


if __name__ == '__main__':
def get_data(n, attention_column=1):
"""
Data generation. x is purely random except that it's first value equals the target y.
In practice, the network should learn that the target = x[attention_column].
Therefore, most of its attention should be focused on the value addressed by attention_column.
:param n: the number of samples to retrieve.
:return: x: model inputs, y: model targets
"""
x = np.random.standard_normal(size=(n, input_dims))
y = np.random.randint(low=0, high=2, size=(n, 1))
x[:, attention_column] = y[:, 0]
return x, y


N = 10000
inputs_1, outputs = get_data(N)

m = build_model()
m.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(m.summary())

m.fit([inputs_1], outputs, epochs=20, batch_size=64, validation_split=0.5)

testing_inputs_1, testing_outputs = get_data(1)

# Attention vector corresponds to the second matrix.
# The first one is the Inputs output.
attention_vector = get_activations(m, testing_inputs_1, print_shape_only=True)[1].flatten()
print('attention =', attention_vector)

# plot part.
import matplotlib.pyplot as plt
import pandas as pd

pd.DataFrame(attention_vector, columns=['attention (%)']).plot(kind='bar',
title='Attention Mechanism as '
'a function of input'
' dimensions.')
plt.show()

0 comments on commit 8dafa5f

Please sign in to comment.