/
LSTMMain.py
129 lines (108 loc) · 4.74 KB
/
LSTMMain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
import numpy as np
from org.mk.training.dl.rnn_cell import LSTMCell
from org.mk.training.dl.rnn import dynamic_rnn
#from org.mk.training.dl.rnn import compute_gradients
from org.mk.training.dl.rnn import print_gradients
from org.mk.training.dl.rnn import zero_state_initializer
from org.mk.training.dl.rnn import LSTMStateTuple
from org.mk.training.dl.common import loss
from org.mk.training.dl.common import softmax
from org.mk.training.dl.common import input_one_hot
from org.mk.training.dl.common import cross_entropy_loss
from org.mk.training.dl.common import WeightsInitializer
from org.mk.training.dl import init_ops
from org.mk.training.dl.optimizer import BatchGradientDescent
from org.mk.training.dl.core import Dense
import sys
import collections
# data I/O
train_file = sys.argv[1]
data = open(train_file, 'r').read()
out_weights = np.array([[-0.09588283, -2.2044923, -0.74828255, 0.14180686, -0.32083616,
-0.9444244, 0.06826905, -0.9728962, -0.18506959, 1.0618515],
[1.156649, 3.2738173, -1.2556943, -0.9079511, -0.82127047,
-1.1448543, -0.60807484, -0.5885713, 1.0378786, -0.7088431],
[1.006477, 0.28033388, -0.1804534, 0.8093307, -0.36991575,
0.29115433, -0.01028167, -0.7357091, 0.92254084, -0.10753923],
[0.19266959, 0.6108299, 2.2495654, 1.5288974, 1.0172302,
1.1311738, 0.2666629, -0.30611828, -0.01412263, 0.44799015],
[0.19266959, 0.6108299, 2.2495654, 1.5288974, 1.0172302,
1.1311738, 0.2666629, -0.30611828, -0.01412263, 0.44799015]]
)
out_biases = np.array([[0.1458478, -0.3660951, -2.1647317, -1.9633691, -0.24532059,
0.14005205, -1.0961286, -0.43737876, 0.7028531, -1.8481724]]
)
def read_data(fname):
with open(fname) as f:
data = f.readlines()
data = [x.strip() for x in data]
data = [data[i].lower().split() for i in range(len(data))]
data = np.array(data)
data = np.reshape(data, [-1, ])
print(data)
return data
def build_dataset(train_data):
count = collections.Counter(train_data).most_common()
print("count:", count)
dictionary = dict()
print("dictionary:", dictionary)
sortedwords = sorted(set(train_data))
print("sortedword:", sortedwords)
for word in sortedwords:
print("word:", word)
dictionary[word] = len(dictionary)
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
return dictionary, reverse_dictionary
train_data = read_data(train_file)
dictionary, reverse_dictionary = build_dataset(train_data)
vocab_size = len(dictionary)
print("dictionary:", dictionary)
print("reverse_dictionary:", reverse_dictionary)
print("vocab_size", vocab_size)
learning_rate = 0.001
# training_iters = 50000
training_iters = 200
display_step = 100
n_input = 3
n_hidden = 5
rnd = np.random.RandomState(42)
step = 0
#offset = rnd.randint(0, n_input + 1)
offset=2
end_offset = n_input + 1
acc_total = 0
loss_total = 0
print("offset:", offset)
with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
cell = LSTMCell(n_hidden,debug=True)
gdo=BatchGradientDescent(learning_rate)
out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases))
while step < training_iters:
if offset > (len(train_data) - end_offset):
offset = rnd.randint(0, n_input + 1)
print("offset:", offset)
symbols_in_keys = [input_one_hot(dictionary[str(train_data[i])],vocab_size) for i in range(offset, offset + n_input)]
symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, vocab_size])
print("symbols_in_keys:",symbols_in_keys)
target=dictionary[str(train_data[offset + n_input])]
result, state = dynamic_rnn(cell, symbols_in_keys)
(c, h) = state.c,state.h
print("final:", repr(result),state,h.shape)
#last layer of Feed Forward to compare to transform result to the shape of target
pred=out_l(h)
print("pred:",pred)
#cross_entropy_loss internally calculates the same softmax as and then the loss as above but for a batch and sequence
#pred- batch,seq,input_size
#labels-batch,seq(has to be transformed before comparision with preds(line-43).)
yhat,cel=cross_entropy_loss(pred.reshape([1,1,vocab_size]),np.array([[target]]))
print("yhat:",yhat)
print("CEL:",cel)
#yhat-Size of yhat should be batch,seq,size
#target-Size of target should be batch,seq
gradients=gdo.compute_gradients(yhat,np.array([[target]]))
gdo.apply_gradients(gradients)
print_gradients(gradients)
step += 1
offset += (n_input + 1)
print("Optimization Finished!")