<a href="https://colab.research.google.com/github/kchaitanya954/Machine-learning-technologies/blob/main/Encoder_Decoder_sorting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from random import randint
from numpy import array, argmax
from numpy import array_equal
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import RepeatVector

In [None]:
# generate a sequence of random integers
def generate_sequence(length, n_unique):
	return [randint(0, n_unique-1) for _ in range(length)]

In [None]:
# one hot encode sequence
def one_hot_encode(sequence, n_unique):
	encoding = list()
	for value in sequence:
		vector = [0 for _ in range(n_unique)]
		vector[value] = 1
		encoding.append(vector)
	return array(encoding)

In [None]:
# decode a one hot encoded string
def one_hot_decode(encoded_seq):
	return sorted([argmax(vector) for vector in encoded_seq])

In [None]:
# generate random sequence
sequence = generate_sequence(5, 50)
print(sequence)

[28, 7, 0, 40, 8]


In [None]:
# one hot encode
encoded = one_hot_encode(sequence, 50)
print(encoded)

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


In [None]:
# decode
decoded = one_hot_decode(encoded)
print(decoded)

[0, 7, 8, 28, 40]


In [23]:
# prepare data for the LSTM
def get_pair(n_in, n_out, cardinality):
	# generate random sequence
  sequence_in = generate_sequence(n_in, cardinality)
  encoded = one_hot_encode(sequence_in, 50)
  sequence_out = one_hot_decode(encoded)
	# one hot encode
  X = one_hot_encode(sequence_in, cardinality)
  y = one_hot_encode(sequence_out, cardinality)
  # reshape as 3D
  X = X.reshape((1, X.shape[0], X.shape[1]))
  y = y.reshape((1, y.shape[0], y.shape[1]))
  return X,y

In [24]:
# configure problem
n_features = 50
n_timesteps_in = 5
n_timesteps_out = 2
# define model
model = Sequential()
model.add(LSTM(150, input_shape=(n_timesteps_in, n_features)))
model.add(RepeatVector(n_timesteps_in))
model.add(LSTM(150, return_sequences=True))
model.add(TimeDistributed(Dense(n_features, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
# train LSTM
for epoch in range(500):
	# generate new random sequence
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	# fit model for one epoch on this sequence
	model.fit(X, y, epochs=1, verbose=2)

In [26]:
# evaluate LSTM
total, correct = 100, 0
for _ in range(total):
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	yhat = model.predict(X, verbose=0)
	if array_equal(one_hot_decode(y[0]), one_hot_decode(yhat[0])):
		correct += 1
print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))

Accuracy: 0.00%


In [27]:
# spot check some examples
for _ in range(10):
	X,y = get_pair(n_timesteps_in, n_timesteps_out, n_features)
	yhat = model.predict(X, verbose=0)
	print('Expected:', one_hot_decode(y[0]), 'Predicted', one_hot_decode(yhat[0]))

Expected: [10, 20, 24, 26, 31] Predicted [10, 10, 10, 31, 31]
Expected: [2, 14, 27, 30, 32] Predicted [10, 10, 27, 32, 32]
Expected: [11, 28, 40, 44, 48] Predicted [10, 23, 40, 40, 48]
Expected: [9, 15, 25, 37, 39] Predicted [10, 25, 25, 37, 48]
Expected: [2, 25, 33, 45, 48] Predicted [12, 33, 33, 33, 49]
Expected: [1, 4, 12, 13, 24] Predicted [3, 13, 13, 13, 23]
Expected: [0, 8, 13, 19, 20] Predicted [3, 10, 10, 19, 19]
Expected: [18, 25, 37, 42, 45] Predicted [10, 25, 25, 44, 44]
Expected: [7, 15, 19, 38, 38] Predicted [17, 23, 38, 38, 38]
Expected: [1, 8, 17, 22, 49] Predicted [17, 17, 17, 17, 48]
