In [9]:
# Larger LSTM Network to Generate Text for Alice in Wonderland
import numpy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical


In [10]:
# load ascii text and covert to lowercase
# filename = "wonderland.txt"
filename = "Dataset\men_in_the_sun_edited.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
# raw_text = raw_text.lower()

In [11]:
# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [12]:
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  71818
Total Vocab:  61


In [13]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  71718


In [14]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = to_categorical(dataY)

In [15]:
# define model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(512))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 100, 256)          264192    
_________________________________________________________________
lstm_6 (LSTM)                (None, 100, 512)          1574912   
_________________________________________________________________
dropout_4 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 100, 512)          2099200   
_________________________________________________________________
dropout_5 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 100, 512)          2099200   
_________________________________________________________________
dropout_6 (Dropout)          (None, 100, 512)         

In [16]:
# load the network weights
filename = "Trained Models\weights-improvement-36-0.5387-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [17]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print( "Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

Seed:
" ان زجاجها المدور قد تشقق د - شقوقاً
مضلعة صغيرة  الفوهة المفتوحة بقيت تخفق بالفراغ لحظة  كان وجه أبي "


In [18]:
# generate characters
generatedText=""
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	generatedText = generatedText + result
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print ("\nDone.")

f = open("Arabic_generated_text.txt", "w", encoding='utf-8')
f.write("seed: \n"+"\n\n\n"+"generated:\n"+ generatedText)
f.close()

 الخيزران يراقبه 

- 

KeyboardInterrupt: 