In [23]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
import numpy as np
import random

In [2]:
# read file content
filename = "ShortCommentsFiltered"
raw_text = open(filename).read()
raw_text = raw_text.lower()

# All unique characters in text
chars = set(raw_text)

# mapping character to intiger
char_indices = dict((c, i) for i, c in enumerate(chars))
# mapping integer to character back
indices_char = dict((i, c) for i, c in enumerate(chars))

# number of previous characters require to predict next character
maxlen = 100

In [3]:
def get_text_chunks(raw_text_part):
    sentences = []
    next_chars = []
    for i in range(0, len(raw_text_part) - maxlen, 1):
        sentences.append(raw_text_part[i: i + maxlen])
        next_chars.append(raw_text_part[i + maxlen])
    print('nb sequences:', len(sentences))
    X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
    y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1
    return X,y

In [4]:

# defining a model
model = Sequential()
model.add(LSTM(512,  input_shape=(maxlen,len(chars)), return_sequences=False))
model.add(Dropout(0.5))

# you may use this unused layers for bigger dataset, I'm not using it
# model.add(LSTM(512, return_sequences=True))
# model.add(Dropout(0.5))

# model.add(LSTM(512, return_sequences=True))
# model.add(Dropout(0.20))

# model.add(LSTM(256, return_sequences=False))
# model.add(Dropout(0.5))

model.add(Dense(len(chars)))
model.add(Activation('softmax'))

# compile or load weights then compile depending


In [14]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
#     print np.argmax(preds),
    exp_preds = np.exp(preds)
#     print np.argmax(preds),
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
#     print np.argmax(probas),
    return np.argmax(probas)

In [34]:
def generatetext(text_partition,i,model):
    """
    To generate text from trained modle
    i = can be any integer, while training you may pass epoch iterator as i to keep watch on quality of model.
    modle =  a trained model
    """
    # seed text  provides previous n( here 100) characters on basis of which n+ characters will be predicted.
    # randomly take seed text from text
    start_index = random.randint(0, len(raw_text) - maxlen - 1)
    seed_text  =  raw_text[start_index : start_index + maxlen]
    generated = '' + seed_text[-100:]
    print("EPOCH : ", i," | TEXT PARTITION : ",text_partition,)
    # will print next 300 characters
    for iteration in range(500):
            # create x vector from seed to predict on
            #generating numpy array as generated above
            x = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(seed_text[-100:]):
                x[0, t, char_indices[char]] = 1.
            #predict next character
            preds = model.predict(x, verbose=0)[0]
            next_index = sample(preds)
            next_char = indices_char[next_index]
            #append next character to seed text, on the basis on new 100 character generate next to next character and so on. 
            generated += next_char
            seed_text = seed_text[1:] + next_char
#             print seed_text,next_char
    print('follow up with: ' + generated)

In [35]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop',metrics=['accuracy'])

In [36]:
# model.load_weights('data_augmentation_7_0.h5')

In [37]:
number_of_partition = 8
epochs = 5
chunkSize = len(raw_text)/number_of_partition
for i in range(0,epochs):
    for text_partition in range(number_of_partition):
        X,y = get_text_chunks(raw_text[text_partition*chunkSize: (text_partition+1)*chunkSize])
        model.fit(X,y, batch_size=1000,nb_epoch=1,verbose=0)  
        generatetext(text_partition,i,model)
    model.save_weights('small/data_augmentation_'+str(text_partition)+"_"+str(i)+'.h5')

('nb sequences:', 2646813)
('EPOCH : ', 0, ' | TEXT PARTITION : ', 0)
follow up with: as a good play as usual. tyler perry plays never be boring. wish he could have a new play out every history, and forlong and excellent acting.
had not seen the movie on a time with the music at his best. but it has a realistic film, some.
i love this movie which you pay back creating, this film will nother get familiars to watch again. excellent movie. i aw love.
what a great ending, great dvd for the magices great people. feel s to walten house and clarnom monster finding the ce of home scept, of your fast bed shy econolic and morgan freeman great should, shows this - a movie i heart felt tru
('nb sequences:', 2646813)
('EPOCH : ', 0, ' | TEXT PARTITION : ', 1)
follow up with:  in life . to great  people showing friend ship , and closeness in hard times  to the end
this is a from the best of the best film with my anniousoberantem's ineey did. ticklo collectoo, don't get the marlen, auch.  tim allen i

('nb sequences:', 2646813)
('EPOCH : ', 1, ' | TEXT PARTITION : ', 4)
follow up with: es at a faster rate. waiting for the next one.
ok this movie was totally worth the money. i love dake it very much. it is a true to watch!depp makes they do it again.
saw this for years on tv get this movie.  it is barking movies and these very very glorual american series
definitely enjoys the movie.  great show.  jrs awesome quality for the next sitcom that i can foun  great cheaping  a fun.
en my only 1 season perfect as well, i would recommend this to anyone who has this on dvd now.  every one needs old staging to the
love this series. it is a very dark movie. sooe for the series. loving m
('nb sequences:', 2646813)
('EPOCH : ', 1, ' | TEXT PARTITION : ', 5)
follow up with: rest is finally one dvd.
dickens' stories in film will entertain almost anone who loves a good story. where we way love and tenget so many of 2 mencisich
i enjoyed this again - breakis captivating and well protuce. cate blark p

('nb sequences:', 2646813)
('EPOCH : ', 3, ' | TEXT PARTITION : ', 0)
follow up with: ound then the dvd  and  better this is a very good movie tps the best
another fantastic dip into the twilight fans
this is a classic true story, the plot sides to sex life make your teen i liked it! great classic surprises!!!!
love this movie..i think the well enough. all the made is great! oy brilliant.
my husband and i had fot give it one this version really reminding this movie longer will watch it again and again. i lived when the lets you buy it.
this is a very good movie... tell my if you like classic movies this is a great movie mories could get a lot xuman.
the way to disappoint a: a w
('nb sequences:', 2646813)
('EPOCH : ', 3, ' | TEXT PARTITION : ', 1)
follow up with: you loved the older montalbano, you will like the young one as well.  the series is just as well done.
this was owned the best hitnic wereson civil war. there ended their refines.with this movie was clear and cheesy and sali an

('nb sequences:', 2646813)
('EPOCH : ', 4, ' | TEXT PARTITION : ', 4)
follow up with: ime. each episode left you satisfied.
i loved the show when it was on and i'm very much enjoying  ago.  my sacing fred is one of the friends agains on great shaller.
this is a great movie. it is inspirational and wonderful but as starring older senses. why thanks quited.
a great series.  i think it has all ramei and the discs, or captainy.
the case was great and arrived in great condition.  i am still in a film of begin. good comedy and it's really great!
paig is a wonderful set of that einard'in is out at first in the movie concern, this is no.
very good, super and romantic cartoon.  still a 
('nb sequences:', 2646813)
('EPOCH : ', 4, ' | TEXT PARTITION : ', 5)
follow up with: eat.
he plays a little different part in this film. and as always he does well. good story.  good action.brand nets.
this selm she applay and i enjoyed it very much. anyone that likes his true who still they snefflless.
this is

('| EPOCH : ', 1, 'TEXT PARTITION : ', 1)
follow up with: opment is good and the interaction is fun. enjoyed this season!
great! it is quite interesting and the fish true acting on dvds do not relate to it in this before this gibtes.
good boughter queen old time.  how great i watched this one!  this is the slow movie funny like it is outstanding fans!
enjoyed watching this movie over and over again to me.the cettarious are very good. if you are anytrics but amazon does another :)
another classic christmas movie that the whole family enjoyate and too saw this and love what is a great.
my favorite our niphers did it again. it's because johns to teir to
