In [None]:
# from keras.layers import Merge, As keras 2, keras.layers.Dot related with dot-product should be imported
from keras.layers import Dot, Input
from keras.layers.core import Dense, Reshape
from keras.layers.embeddings import Embedding
from keras.models import Model
from keras import backend as K

Using TensorFlow backend.


In [None]:
vocab_size = 5000
embed_size = 300

In [None]:
K.clear_session()

word_input = Input(shape = (1,))
x1 = Embedding(vocab_size, embed_size, embeddings_initializer = 'glorot_uniform')(word_input)
x2 = Reshape((embed_size,))(x1)

context_input = Input(shape = ((1,)))
y1 = Embedding(vocab_size, embed_size, embeddings_initializer = 'glorot_uniform')(context_input)
y2 = Reshape((embed_size,))(y1)

dot = Dot(axes = 1)([x2, y2])
out = Dense(1, activation = 'sigmoid')(dot)

model = Model(inputs = [word_input, context_input], outputs = out)
model.summary()






Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 300)       1500000     input_1[0][0]                    
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 1, 300)       1500000     input_2[0][0]                    
_______________________________________________________________________________________

In [None]:
model.compile(loss = 'mse', optimizer = 'adam')




In [None]:
from keras.preprocessing.text import * 
from keras.preprocessing.sequence import skipgrams

In [None]:
text = 'I love green eggs and ham.'

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])

In [None]:
word2id = tokenizer.word_index
word2id

{'and': 5, 'eggs': 4, 'green': 3, 'ham': 6, 'i': 1, 'love': 2}

In [None]:
id2word = {v:k for k,v in word2id.items()}
id2word

{1: 'i', 2: 'love', 3: 'green', 4: 'eggs', 5: 'and', 6: 'ham'}

In [None]:
wids = [word2id[w] for w in text_to_word_sequence(text)]
wids

[1, 2, 3, 4, 5, 6]

In [None]:
pairs, labels = skipgrams(wids, len(word2id)) # pairs : (surrounding word, word), label : True or False
print(len(pairs), len(labels))

56 56


In [None]:
for i in range(10):
  print('({:s} ({:d}), {:s} ({:d})) -> {:d}'.format(
      id2word[pairs[i][0]], pairs[i][0], id2word[pairs[i][1]], pairs[i][1], labels[i] 
  ))

(love (2), ham (6)) -> 1
(eggs (4), ham (6)) -> 1
(green (3), i (1)) -> 0
(i (1), i (1)) -> 0
(eggs (4), and (5)) -> 1
(eggs (4), and (5)) -> 0
(and (5), and (5)) -> 0
(green (3), love (2)) -> 0
(ham (6), and (5)) -> 1
(green (3), love (2)) -> 1


In [None]:
from keras.models import Sequential
from keras.layers.core import Lambda

In [None]:
vocab_size = 5000
embed_size = 300
window_size = 1

In [None]:
K.clear_session()
model = Sequential()
model.add(Embedding(input_dim = vocab_size, output_dim = embed_size,
                    embeddings_initializer = 'glorot_uniform',
                    input_length = 2 * window_size))
model.add(Lambda(lambda x : K.mean(x, axis = 1), output_shape = (embed_size,)))
model.add(Dense(vocab_size, activation = 'softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 2, 300)            1500000   
_________________________________________________________________
lambda_1 (Lambda)            (None, 300)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 5000)              1505000   
Total params: 3,005,000
Trainable params: 3,005,000
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')


