In [1]:
import numpy as np

def reweight_distribution(original_distribution, temperature=0.5):
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)
    return distribution / np.sum(distribution)

# 字符级LSTM文本生成

In [1]:
import keras
import numpy as np

path = keras.utils.get_file('nietzsche.txt',
                            origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()    #lower() 方法转换字符串中所有大写字符为小写
print('Corpus length:', len(text))

Using TensorFlow backend.


Corpus length: 600893


In [2]:
text[:1000]

'preface\n\n\nsupposing that truth is a woman--what then? is there not ground\nfor suspecting that all philosophers, in so far as they have been\ndogmatists, have failed to understand women--that the terrible\nseriousness and clumsy importunity with which they have usually paid\ntheir addresses to truth, have been unskilled and unseemly methods for\nwinning a woman? certainly she has never allowed herself to be won; and\nat present every kind of dogma stands with sad and discouraged mien--if,\nindeed, it stands at all! for there are scoffers who maintain that it\nhas fallen, that all dogma lies on the ground--nay more, that it is at\nits last gasp. but to speak seriously, there are good grounds for hoping\nthat all dogmatizing in philosophy, whatever solemn, whatever conclusive\nand decided airs it has assumed, may have been only a noble puerilism\nand tyronism; and probably the time is at hand when it will be once\nand again understood what has actually sufficed for the basis of such\

In [3]:
maxlen = 60

step = 3

sentences = []

next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
    
print('Number of sequences:', len(sentences))

Number of sequences: 200278


In [20]:
sentences[:2]

['preface\n\n\nsupposing that truth is a woman--what then? is the',
 'face\n\n\nsupposing that truth is a woman--what then? is there ']

In [21]:
next_chars[:2]

['r', 'n']

In [4]:
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)
char_indices

Unique characters: 58


{'\n': 0,
 ' ': 1,
 '!': 2,
 '"': 3,
 "'": 4,
 '(': 5,
 ')': 6,
 ',': 7,
 '-': 8,
 '.': 9,
 '0': 10,
 '1': 11,
 '2': 12,
 '3': 13,
 '4': 14,
 '5': 15,
 '6': 16,
 '7': 17,
 '8': 18,
 '9': 19,
 ':': 20,
 ';': 21,
 '=': 22,
 '?': 23,
 '[': 24,
 ']': 25,
 '_': 26,
 'a': 27,
 'b': 28,
 'c': 29,
 'd': 30,
 'e': 31,
 'f': 32,
 'g': 33,
 'h': 34,
 'i': 35,
 'j': 36,
 'k': 37,
 'l': 38,
 'm': 39,
 'n': 40,
 'o': 41,
 'p': 42,
 'q': 43,
 'r': 44,
 's': 45,
 't': 46,
 'u': 47,
 'v': 48,
 'w': 49,
 'x': 50,
 'y': 51,
 'z': 52,
 '忙': 53,
 '毛': 54,
 '盲': 55,
 '脝': 56,
 '茅': 57}

In [5]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [9]:
x[0][:2]  #p 42  r 44

array([[False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False,  True, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False]], dtype=bool)

In [10]:
from keras import layers
from keras.models import Sequential

model = Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               95744     
_________________________________________________________________
dense_1 (Dense)              (None, 58)                7482      
Total params: 103,226
Trainable params: 103,226
Non-trainable params: 0
_________________________________________________________________


In [11]:
from keras import Input, layers
from keras.models import Model, Sequential

input_tensor = Input(shape=(maxlen, len(chars)))
layer1 = layers.LSTM(128)(input_tensor)
output_tensor = layers.Dense(len(chars), activation='softmax')(layer1)

model = Model(input_tensor, output_tensor)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 60, 58)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               95744     
_________________________________________________________________
dense_2 (Dense)              (None, 58)                7482      
Total params: 103,226
Trainable params: 103,226
Non-trainable params: 0
_________________________________________________________________


In [12]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [6]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [14]:
preds = np.asarray([0.1, 0.2, 0.3, 0.4]).astype('float64')
np.random.multinomial(1, preds, 10)

array([[1, 0, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 1, 0],
       [0, 0, 0, 1],
       [0, 0, 1, 0],
       [0, 0, 0, 1],
       [0, 0, 0, 1],
       [0, 0, 1, 0],
       [0, 1, 0, 0],
       [0, 0, 0, 1]])

In [15]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')
    
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)
        
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1
            
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            
            generated_text += next_char
            generated_text = generated_text[1:]
            
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

epoch 1
Epoch 1/1
--- Generating with seed: "l the habits of such a spirit resist the absurdissimum, in
t"
------ temperature: 0.2
l the habits of such a spirit resist the absurdissimum, in
the present and something to the potting the content and present interrect and something to the present and the potting that the power to the present in the woll the something and the polatical to the pole of the present interress of the present interrestions and and the present has to the present and that the present and the posting to the present and the present interrections and the present of t
------ temperature: 0.5
e present and the present interrections and the present of the good and so refleed mandess of the science and operate of the lame and the each a way to forther and interress of the disturent at a retarting the forme in preself the distance
of all present and the poltar to day have of the many the way anight be so man to the doen the expeasitical to greak and was gerrance, mane the 

always one, this psocon of wholr cluarimously put of the eopers which foll, the
prefouct, that, nature experience and functions and stromptylears, in matters in the perfictly who are look of
------ temperature: 1.2
d stromptylears, in matters in the perfictly who are look of the methang) is himl if, norlyonage does neceldis
ones" as clowe, is the moreing, ordinary hask,
such emple, to be more symptoms if valing highor, inner to sympatanted by that in please beings by read end,  ial invy of wench, many-misstrar, thousands: we great
edeplisiritations--it was
more seffients whare rale" to views gradually darsts), towict?--vanmannants than, moramity has
widlenm an incorr
epoch 9
Epoch 1/1
--- Generating with seed: "olution of man to the similar, the ordinary, the average, th"
------ temperature: 0.2
olution of man to the similar, the ordinary, the average, the states of the discovering the world of the soul, who must be a most delication of the sense of the sense and spirit of the present 

  This is separate from the ipykernel package so we can avoid doing imports until


h last endure and the method.

an only as a condition different and therefore. that in the sense of they cannot
a condition of the science and who are not translat the basis it is against enough that it is has under the past and all the p
------ temperature: 1.0
s against enough that it is has under the past and all the pusacible, in the highty nogayition of homeraly with opinions with the pointer to
religious and
opinion to-native dightersme and immense, should be one say, humantices under
but like sympathy is capacity has been thinking just at presence and notarmed by the most brunder to my spiritual spoten to sutstracle when phins reason whoms.

inal and wepparance is of in express, regarding also, people of fi
------ temperature: 1.2
nd wepparance is of in express, regarding also, people of firaring spiritations,
and it is eyes vencuous  s, it she tates--from them indence. but the
nobline and aid, we sensi last couns, thems always e
d? placilations in
the unmcatenly he carns itself

must get the delivefjuitly (in ranking commin it would cluarry in
inactoracation in order to religious as vourald abo
epoch 19
Epoch 1/1
--- Generating with seed: " spiritualization by the symbolism of art, perhaps as
music,"
------ temperature: 0.2
 spiritualization by the symbolism of art, perhaps as
music, and the soul the soul, and the conscience of the belief and profound and such a privile the same interpretation of the same any man and soul of the present man and soul of the existence of the same all the stom, the spirit of the same principles and the spirit into the soul of the soul, and all the spirit of the more sense of the present soul of the same intellect and sense of a more intellect an
------ temperature: 0.5
 soul of the same intellect and sense of a more intellect and wantings of the sense of the mankind so the fathers and of the opinion of his another and the belief--not been opposite our opinion of such as a conditions to
from the easily such a mand as instructive t

ciest every prived little hoored but discitdanded thanism--world of
lighness,"
possiriouslys one prevition, like,--as is gives to realishes of
serious tastes, i have
to do sochehrultpicnity
to it--or new
respect: they freedonary is tangely, should tuite with an other, one who do the arcistsi,ness of truth impressiod of you eason perijman. for .wholicsite, felieved a motpiniod where ones, in suichstand, and true to 
epoch 23
Epoch 1/1
--- Generating with seed: "d then bit by bit grow
healthy--i mean healthier. it is wisd"
------ temperature: 0.2
d then bit by bit grow
healthy--i mean healthier. it is wisdom." it is a strength and there is the antithesic and strength to the consciously and man and superior to the superior of the superiority of the strength and there are such a contemporation of the consciously which we must be the superiority of the spirit of the superior to a person which is the conscious and superior the first and there is the experience of the present the superior the

 the sense of the most manifest and the superiority and the influent to the most propour of the faculty and in morality of the same in the scienc of the might be in the fact to the sense of the pare and conscience of the will to a sense of the mentirally in bad to so above all "good to be already to be more thing the same to like to the distrustian to he must desire for the spirit of the present the society and soul is like to the soul. in the desire of th
------ temperature: 1.0
he society and soul is like to the soul. in the desire of the seducy, it, and at the distinctibal and mentints--and it who prestance, false
heaven not
from gain to pain to plenk the present,
they not for we favis-." every man and follep significance--the greates and first, bringsand also, he dilf for generatives, asseromant.


5e near to prich   genesralc spavidy," althic we furlious conducpationaxont, unholutigation benevens, with the relation and excepti
------ temperature: 1.2
xont, unholutigation benevens,

wheveich good instance, at germans, to spe autund mamow it: possession of truth;
go good place from a pudir of order newrist
same "enjoy, it
homoqwording, too, refl
epoch 38
Epoch 1/1
--- Generating with seed: "to give victory to knowledge, often
amounts to no more than "
------ temperature: 0.2
to give victory to knowledge, often
amounts to no more than the world in the spirit the conscience of the most powerful to the interesting the father and the most super-demons the presented in the present, the spirit and the present the conscience of the most stupidity and the perhaps a body and presented that the conscience of the sense of the present itself of the art of the seriousness of the most antiboted and the spirit of the fact that it is a consci
------ temperature: 0.5
ost antiboted and the spirit of the fact that it is a conscience and in the distance as the success of the such any anything the spirit of the master to the such conduct to possible, and with every generally because th

the could notenning with a enjoyment quare of nebegorical its
sanbs music chantamed it, to ariubaty knohiwd
in man god-there share
currepalemment
arronutr to refined--prripgle digutc in the higher
sense-"below?s to rewingous or desire pide of the vanie which is-doctrine.=--it is enomically, sleape
of
suffering of enjoyment gues of the
great fither a
faiths of any of all the need of
the ponean."
in itterued if-corten
the
pretents it.

ititu; of
no
sensible 
epoch 42
Epoch 1/1
--- Generating with seed: ",--ye, too, really find the noise of its wheels
insupportabl"
------ temperature: 0.2
,--ye, too, really find the noise of its wheels
insupportable the most and the success of the interposes an action to the self-desire and the action of the suppose the most desires to the superficial and self-conduct the world. the precisely the moral tendency of the world of the world of the suppose in the philosophical the most thing and the superficial man that the world of the position of the world o


13

=the hinties of with causation of the surpring and erverst than with a worldt these we recognishes of an onged fexercary or hows ingood and further syll."

144

=something gan see, the produce of churrums must be somethe central and de olds the past--ansimited depudion, with longin-lows f
------ temperature: 1.2
and de olds the past--ansimited depudion, with longin-lows forcy primozings"--this and
with hounces his
own
securiated are.

20ror
hall-disciplinion--donediness), moquese as metabilation--who nothing underseads suffer (to
mankird "t, as we-pieshensl.

68. immediate, that they deeced therein self--agine, withon
ipiner. on formingly, into nituloptic icka(whald a motives that who must daneed, con"lamite! in slaf: whyeverebrivoped by the question , his epiles 
epoch 46
Epoch 1/1
--- Generating with seed: "rstitions which have
hitherto flourished with almost tropica"
------ temperature: 0.2
rstitions which have
hitherto flourished with almost tropical individual and the fact of

practice of supreme justice, the father and more will to the sense of the more origin of the spirit and the morality and the philosophers of the world and the more whole and any one wishes to be a sense of the same and complete in the superiority of the person of the present sense of the same origin of the philosophers to the spirit of the same will to the spirit and the world and the constance and who man who believe that the st
------ temperature: 0.5
 world and the constance and who man who believe that the stome which and there betugher by a hating and man who are decedion of the store that the interpretation of the world betwilt! one wishes to be something who so the person or a higher and there is a he will done more of the "man is a so easily strike of their saded as they are something with a sacrifice of the very our own and stronger of the commonle the capacity. a sense. the badness, there is not
------ temperature: 1.0
he commonle the capacity. a sense. the badness, there is 

In [16]:
model.save('p20181018.h5')

In [7]:
from keras.models import load_model
model = load_model('p20181018.h5')

In [9]:
start_index = 10
maxlen = 60
generated_text = text[start_index: start_index + maxlen]
print(generated_text)

sampled = np.zeros((1, maxlen, len(chars)))
for t, char in enumerate(generated_text):
    sampled[0, t, char_indices[char]] = 1
sampled

supposing that truth is a woman--what then? is there not gro


array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]])

In [23]:
sampled.shape

(1, 60, 58)

In [26]:
model.predict(sampled, verbose=0)

array([[0.0000000e+00, 1.3454086e-12, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 1.5377448e-16, 1.0681714e-24,
        1.5564617e-20, 2.1883440e-25, 2.3838775e-24, 8.2914340e-16,
        9.9273131e-20, 9.7093523e-25, 1.8981710e-26, 1.2249539e-17,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        1.3228315e-07, 1.6785821e-21, 0.0000000e+00, 1.8444809e-01,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.4226248e-01,
        3.6959390e-33, 0.0000000e+00, 0.0000000e+00, 6.5049402e-02,
        0.0000000e+00, 0.0000000e+00, 4.3683366e-20, 0.0000000e+00,
        8.3595319e-07, 3.8485715e-01, 5.8052078e-37, 0.0000000e+00,
        2.3381699e-02, 1.6872093e-07, 1.5793412e-17, 5.9272093e-14,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 8.8889625e-09, 6.7145551e-09, 4.8545745e-09,
        4.7197402e-09, 1.3393892e-08]], dtype=fl

In [27]:
model.predict(sampled, verbose=0)[0]

array([0.0000000e+00, 1.3454086e-12, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 1.5377448e-16, 1.0681714e-24,
       1.5564617e-20, 2.1883440e-25, 2.3838775e-24, 8.2914340e-16,
       9.9273131e-20, 9.7093523e-25, 1.8981710e-26, 1.2249539e-17,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       1.3228315e-07, 1.6785821e-21, 0.0000000e+00, 1.8444809e-01,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.4226248e-01,
       3.6959390e-33, 0.0000000e+00, 0.0000000e+00, 6.5049402e-02,
       0.0000000e+00, 0.0000000e+00, 4.3683366e-20, 0.0000000e+00,
       8.3595319e-07, 3.8485715e-01, 5.8052078e-37, 0.0000000e+00,
       2.3381699e-02, 1.6872093e-07, 1.5793412e-17, 5.9272093e-14,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 8.8889625e-09, 6.7145551e-09, 4.8545745e-09,
       4.7197402e-09, 1.3393892e-08], dtype=float32)

In [28]:
model.predict(sampled, verbose=0)[0].shape  #Unique characters: 58

(58,)

In [30]:
model.predict(sampled, verbose=0).shape

(1, 58)

In [29]:
y.shape

(200278, 58)

In [31]:
y[0].shape

(58,)

In [12]:
import sys
temperature = 0.2
for i in range(4000):
    sampled = np.zeros((1, maxlen, len(chars)))
    for t, char in enumerate(generated_text):
        sampled[0, t, char_indices[char]] = 1

    preds = model.predict(sampled, verbose=0)[0]
    next_index = sample(preds, temperature)
    next_char = chars[next_index]

    generated_text += next_char
    generated_text = generated_text[1:]

    sys.stdout.write(next_char)
    sys.stdout.flush()

茅 and t茅mes pro (therue concer ait f pheh茅e

  This is separate from the ipykernel package so we can avoid doing imports until


       co sosten c茅be becon t茅 and thenial茅t and and the o the toane tui2at an and    che eret茅t and takian an of8er
t in heo3vfer  to
he ares the an the m p te5 茅ment whit茅as be the te mong 茅mi animatuioe as thinthe tnt t茅pe the ute t due wrnu茅t it la茅 any ther aner an3bere rartingeelont ohe  stilh 茅ge茅 and the  on th=cone the 
aoptertyn and evalf cain re 茅cenaw byfant茅 ther e- rom9n and th茅ther mena cane4 ortheneedpritenhe8r paint newhe茅e thatu  an      d茅 an off plentocis malre茅d9m tone poin the9 wher[o hemw a
authe wowe pwer cen8rne of wen men9 ofhere30 the in 5g =the atrqlustitios weme f  ep8 allsn th te oun t sondi ratt3 an so whan oe  eatis o 茅; there8pp the  tx outigithesniolinues the xin9茅t and ther ten  ] aevestiti[a th4th at thitiorhecer:s coaeee  theoe dt prim 茅gremesd on茅 and the  on de  
. the    th af affte of un roven an茅ment of an  oe5, of anan ao[e ant wowe anperle t tofi茅 thrredunt ol no be and the aolishpisnce stin0 an e9'herst and on thee his the san the sonvestial