<a href="https://colab.research.google.com/github/paruliansaragi/DL-Notebooks/blob/master/TextGeneration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import keras
keras.__version__

Using TensorFlow backend.


'2.2.4'

In [0]:
!unzip alllines.txt.zip

Archive:  alllines.txt.zip
  inflating: alllines.txt            


In [0]:
f = open("alllines.txt", "r")
print(f.read(500))

"ACT I"
"SCENE I. London. The palace."
"Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR WALTER BLUNT, and others"
"So shaken as we are, so wan with care,"
"Find we a time for frighted peace to pant,"
"And breathe short-winded accents of new broils"
"To be commenced in strands afar remote."
"No more the thirsty entrance of this soil"
"Shall daub her lips with her own children's blood,"
"Nor more shall trenching war channel her fields,"
"Nor bruise her flowerets with the ar


In [0]:
import keras
import numpy as np

text = open("alllines.txt", "r").read().lower()[:500000]
print('Corpus length:', len(text))

Corpus length: 500000


In [0]:
text[:50]

'"act i"\n"scene i. london. the palace."\n"enter king'

In [0]:
# Length of extracted character sequences
maxlen = 60

# We sample a new sequence every `step` characters
step = 3

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

Number of sequences: 166647


In [0]:
sentences[:5]#creates 60 character long sentence lengths whilst appending i before the max length

['"act i"\n"scene i. london. the palace."\n"enter king henry, lo',
 't i"\n"scene i. london. the palace."\n"enter king henry, lord ',
 '"\n"scene i. london. the palace."\n"enter king henry, lord joh',
 'scene i. london. the palace."\n"enter king henry, lord john o',
 'ne i. london. the palace."\n"enter king henry, lord john of l']

In [0]:
# List of unique characters in the corpus
chars = sorted(list(set(text)))
#set()#Build an unordered collection of unique elements.
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

Unique characters: 44


In [0]:
char_indices

In [0]:
# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [0]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [0]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [0]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [0]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        # We generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

epoch 1
Epoch 1/1
--- Generating with seed: "nry and dame margaret kneel'd to me"
"and on my head did set"
------ temperature: 0.2
nry and dame margaret kneel'd to me"
"and on my head did setaa aa haaaaooahaaaoa aoh aaohaahaoa a a aaahaohaooahaaoaahaaaaa  aaoaaaaaahoaahaa aahaa haaa  aaoaah aaaa oaahaooaaaaaaaaaaaaaaoaaohhaaooaahaaaaaaaaaaaa aaaaa   haaaaaa  oaaaaaa oaoao aaaaoaaaa aoaoaaaaaaaahoaaaoooaaaaa  aaaaaaa aaahoaaahahaahahoahaahaaohaaaooaaaaaaoa aahhahhaaaa aoaao aaa aooaaaaoaaaooo ahaoaaaaaaoaahaaaa aoooaa aaaaaa aahaaa aaa.aha ahoo o aaaa haaooho aaahoa haaaoaoaoaaaaaaoaaa
------ temperature: 0.5
ahaaa aaa.aha ahoo o aaaa haaooho aaahoa haaaoaoaoaaaaaaoaaaa
nh  a  ohahoh . anoadaa.  aha  ah aah hh hhaao oaa   eaahhi ooaarah a oao haaaaooaaahaaa.eaena h  ah h taoah ato ahohhao ooaaoaoaaooaaaa  aaheotoe oaoahhaoaaa holao
oaa
hh aahooo
a aaaoteoao aoha a caaaaaoaa al a  haaaaohooh aaahhahhmal eimaaeeaoa nat taaahhhh aaa aloa oa  oeo o d aaaht ohhaa  hoaaheh avr
ahahaaaaaahaaa 

  This is separate from the ipykernel package so we can avoid doing imports until


h s hh   h    hshh s shh hhs h hhh  hhh  h hhh h h h  h shshhhhhth hs   h   hh  hs thh  s    hs shh shhsh  h     h h  hhshh hh hh h  shh h  hhh  h       h shhhhhh  h hsshth shhhs s     sh   h s   s hhh t h h   hh hh h  h s   h  hhshshsh h   hhhss s hs   hh  hh  hhh hh  hh s   h hhh  hs ss  h  hhhsh s h  s  h h ss h s  ss   hh  sh hhs   s s  h shs   s hsh ths   shh     
------ temperature: 0.5
 ss h s  ss   hh  sh hhs   s s  h shs   s hsh ths   shh     t  httnn sh  s hshhs    hshh heti
rstsohd,hs  a sdsdht  snhst ht  hh  hdst  hh t
hsssh u t  stsshnfhhhssod 
 " td  , hssh r eh  hhrsshr,hnhrsun s nhhfh"tis ssshtuhshbrhshhhh ethtrs h bf t hsth"rrd srs hhh srt  httst sss rrsssf

 n"rt t" r hthh
shtoh trhrsrsshh h hss hfhssrtthuhhs tc enf
 hrseetnsnh h hshs e    tsn   sh,   hh h ss  tshs shsr tsr
 r nshs hssth,her "
 
ees, h   st hhwh rrhhe "rt
i "h
------ temperature: 1.0
sr tsr
 r nshs hssth,her "
 
ees, h   st hhwh rrhhe "rt
i "hh "hbt h
a 
r
sh
v th"sdhht la hn asnttuel "s"u  am rwsisn


KeyboardInterrupt: ignored