In [123]:
import numpy as np
import re
import itertools
import collections
import keras

from keras.models import Sequential
from keras.layers import Dense, Activation

In [91]:
M = np.load('word2vec.npy')
word2index = dict()
with open('word_lst.txt') as file:
    for counter, line in enumerate(file):
        word = line.strip()
        word2index[word] = counter

def word2vec(word):
    idx = word2index.get(word)
    if idx is None:
        return np.zeros(200)
    return M[idx]

        
def get_vec(word):
    try:
        retval = M[word2index[word]]
    except KeyError:
        retval = np.zeros(200)

In [5]:
def import_dataset(filename):
    
    start_tag = '<S>'
    end_tag = '</S>'
    
    sentence_lst = [] # Sentence is a list word which is list of candidate roots
    sentence_correct_lst = []
    with open(filename) as file:
        start_parsing = False
        for line in file:
            if start_parsing:
                if line.startswith(end_tag):
                    start_parsing = False
                    sentence_lst.append(sentence)
                    sentence_correct_lst.append(sentence_correct)
                else:
                    root_set = set()
                    candidate_lst = line.split()[1:]
                    for parse in candidate_lst:
                        try:
                            root_candidate = parse[:parse.index('+')]
                        except ValueError:
                            continue
                        root_set.add(root_candidate.lower())
                    if root_set:
                        sentence.append(list(root_set))
                    
                        correct = candidate_lst[0][:candidate_lst[0].index('+')]
                        sentence_correct.append(correct.lower())
                        
                
            else:
                if line.startswith(start_tag):
                    start_parsing = True
                    sentence = []
                    sentence_correct = []
                    
        return sentence_correct_lst, sentence_lst

In [7]:
train_dataset_filename = 'dataset/train.merge'
train_sentence_correct_lst, train_sentence_lst = import_dataset(train_dataset_filename)

In [17]:
train_sentence_lst

[[['döviz'], ['asker', 'askerlik'], ['20'], ['bin'], ['mark']],
 [['milli', 'mil'],
  ['savun'],
  ['bakan', 'bakanlık'],
  ['döviz'],
  ['asker', 'askerlik'],
  ['yarar'],
  ['için', 'iç'],
  ['yurt'],
  ['dış', 'dışı'],
  ['bulun', 'bul'],
  ['süre', 'sür'],
  ['3'],
  ['yıl'],
  ['1'],
  ['yıl'],
  ['in'],
  [','],
  ['döviz'],
  ['asker', 'askerlik'],
  ['rakam'],
  ['iki'],
  ['kat', 'katı'],
  ['çık'],
  ['planla']],
 [['evren', 'evre'], ['değer', 'değ'], ['-'], ['ankara']],
 [['milli', 'mil'],
  ['savun'],
  ['bakan', 'bakanlık'],
  ['döviz'],
  ['asker', 'askerlik'],
  ['konu'],
  ['çöz', 'çözüm'],
  ['ara'],
  ['girdi', 'gir']],
 [['bakan', 'bakanlık'],
  ['birçok'],
  ['kişi'],
  [','],
  ['bulun', 'bul'],
  ['ve'],
  ['çalış'],
  ['ülke'],
  ['vatandaşlık', 'vatandaş'],
  ['geç'],
  ['türkiye'],
  ['asker', 'askerlik'],
  ['yüküm'],
  ['ye', 'yer', 'yerin'],
  ['getir'],
  ['üz', 'üzer'],
  ['alternatif'],
  ['üz', 'üzer'],
  ['çalış'],
  ['başla']],
 [['türk'],
  ['vatandaş

In [109]:

model = Sequential([
    Dense(800, input_shape=(400,)),
    Activation('relu'),
    Dense(40),
    Activation('relu'),
    Dense(2),
    Activation('softmax'),
])
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

def generate_samples(sentences):
    for sentence in sentences:
        for w1, w2 in zip(sentence, sentence[1:]):
            itert = itertools.product(w1,w2)
            yield next(itert), 1
            for others in itert:
                yield others, 0
samples = list(generate_samples(train_sentence_lst))

In [25]:
samples[:10]

[(('döviz', 'asker'), 1),
 (('döviz', 'askerlik'), 0),
 (('asker', '20'), 1),
 (('askerlik', '20'), 0),
 (('20', 'bin'), 1),
 (('bin', 'mark'), 1),
 (('milli', 'savun'), 1),
 (('mil', 'savun'), 0),
 (('savun', 'bakan'), 1),
 (('savun', 'bakanlık'), 0)]

In [110]:
def gen_subsamples():
    subsample_size = 10000
    for subsample in range(0, len(samples), subsample_size):
        subsamples = samples[subsample: subsample + subsample_size]
        train_data = np.array([np.append(word2vec(w1), word2vec(w2)) for (w1,w2),_ in subsamples])
        train_labels = np.array([[v==0, v==1] for _, v in subsamples])
        print(subsample, len(samples))
        yield train_data, train_labels
#train_data, train_labels = next(gen_subsamples())
#model.fit(train_data, train_labels, epochs=10, batch_size=32)
for train_data, train_labels in gen_subsamples():
    model.fit(train_data, train_labels, epochs=10, batch_size=32)

0 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
20000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
30000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
40000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
50000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
60000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
70000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


80000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
90000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
100000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
110000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
120000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
130000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
140000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
150000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 

160000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
170000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
180000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
190000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
200000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
210000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
220000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
230000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


Epoch 10/10
240000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
250000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
260000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
270000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
280000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
290000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
300000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
310000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epo

Epoch 10/10
320000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
330000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
340000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
350000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
360000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
370000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
380000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
390000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


Epoch 9/10
Epoch 10/10
400000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
410000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
420000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
430000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
440000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
450000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
460000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
470000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epo

Epoch 9/10
Epoch 10/10
480000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
490000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
500000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
510000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
520000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
530000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
540000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
550000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


Epoch 8/10
Epoch 9/10
Epoch 10/10
560000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
570000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
580000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
590000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
600000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
610000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
620000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
630000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epo

Epoch 8/10
Epoch 9/10
Epoch 10/10
640000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
650000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
660000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
670000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
680000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
690000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
700000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
710000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
720000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
730000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
740000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
750000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
760000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
770000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
780000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
790000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epo

Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
800000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
810000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
820000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
830000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
840000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
850000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
860000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
870000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
880000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
890000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
900000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
910000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
920000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
930000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
940000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
950000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epo

Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
960000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
970000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
980000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
990000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1000000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1010000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1020000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1030000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1040000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1050000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1060000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1070000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1080000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1090000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1100000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1110000 1164279
Epoch 1/10
Epoch 2/10
Epoch 

Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1120000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1130000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1140000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1150000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1160000 1164279
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [111]:
model.save("dense_800_40_2.keras")

In [124]:
model = keras.models.load_model("dense_800_40_2.keras")

In [125]:
model.predict(np.array([np.append(word2vec("asd"), word2vec("asd"))]))

array([[0.27538309, 0.72461694]], dtype=float32)

In [85]:
dataset_filename = 'dataset/test.merge'
sentence_correct_lst, sentence_lst = import_dataset(dataset_filename)

In [116]:
class ScoreModel:
    def __init__(self, verbose=False):
        self.verbose = verbose
    
    def predict(self, sentence):
        max_score = float('-inf')
        predict_sentence = None
        self._cache = {}
        for element in itertools.product(*sentence):
            score = self.calc_sentence_score(element)
            if self.verbose:
                print(element)
                print('Score: %.2f' % score)
                print()
            if score > max_score:
                max_score = score
                predict_sentence = list(element)
        return predict_sentence
    
    def pair_score(self, word1, word2):
        if (word1, word2) in self._cache:
            return self._cache[(word1, word2)]
        try:
            vec1 = M[word2index[word1]]
            vec2 = M[word2index[word2]]
        except:
            return 0
        #return np.abs(np.dot(vec1, vec2))
        self._cache[(word1, word2)] = model.predict(np.array([np.append(vec1, vec2)]))[0][1]
        return self._cache[(word1, word2)]

    def calc_sentence_score(self, sentence):
        score = 0

        if len(sentence) <= 1:
            return score

        for i in range(len(sentence) - 1):
            word1 = sentence[i]
            word2 = sentence[i + 1]
            score += self.pair_score(word1, word2)

        return score

In [117]:
correct_count = 0
false_count = 0

mdl = ScoreModel()
for num, (sentence, sentence_correct) in enumerate(zip(sentence_lst, sentence_correct_lst)):
    predict_sentence = mdl.predict(sentence)
    for word1, word2 in zip(predict_sentence, sentence_correct):
        if word1 == word2:
            correct_count += 1
        else:
            false_count += 1

total_count = correct_count + false_count
accuracy = correct_count / total_count

print('Total number of words : %s' % total_count)
print('Correctly predicted : %s' % correct_count)
print('Accuracy : %.3f' % accuracy)

Total number of words : 861
Correctly predicted : 809
Accuracy : 0.940


In [118]:
def parse_sentence(sentence):
    sentence = '\'' + sentence + '\''
    word_lst = !./trnltk/parser.py {sentence}
    #print(word_lst)
    retval = []
    for word in word_lst:
        root_lst = word.split()
        retval.append(root_lst)
    return retval

    yüklenen yükle+Verb^DB+Verb+Pass+Pos^DB+Adj+PresPart yük+Noun+A3sg+Pnon+Nom^DB+Verb+Acquire+Pos^DB+Adj+PresPart

    Sentence Examples
    
    dolar fiyatları beş TL seviyesinde bulunurken Euro fiyatları altı TL seviyesinde hareket ediyor
    yorulunca alın damarları gözükmeye başladı
    istediğiniz kadar ürün alın 
    sözlerine çok alındı (Wrong)
    ya iyi olarak ölürsün ya da kötüye dönüşecek kadar uzun yaşarsın
    nedir amacımız bunu göndermekle uzaylılara karsı bir sinerji yaratalım dostluk olsun mu
    kafanızı kullansaydınız o taşların doğada bulunan 4 elementi simgelediğini anlardınız

In [119]:
sentence = 'istediğiniz kadar ürün alın'
parsed_sentence = parse_sentence(sentence)

combination_count = 1
for word in parsed_sentence:
  combination_count *= len(word)
print('Total number of possibilities : %s' % combination_count)

print(parsed_sentence)
print(sentence)

vmodel = ScoreModel(verbose=True)
prediction = vmodel.predict(parsed_sentence)

print('-------')
print('Predicted roots : ')
print(prediction)

Total number of possibilities : 7
[['/bin/bash:', './trnltk/parser.py:', 'No', 'such', 'file', 'or', 'directory']]
istediğiniz kadar ürün alın
('/bin/bash:',)
Score: 0.00

('./trnltk/parser.py:',)
Score: 0.00

('No',)
Score: 0.00

('such',)
Score: 0.00

('file',)
Score: 0.00

('or',)
Score: 0.00

('directory',)
Score: 0.00

-------
Predicted roots : 
['/bin/bash:']
