In [1]:
import numpy as np
from collections import defaultdict
import time
import sys

from keras.models import Model, load_model
from keras.layers import Input, Embedding, Conv1D, Multiply
from keras.optimizers import Adam
from keras.callbacks import LambdaCallback

from keras_tqdm import TQDMNotebookCallback

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
dataset_path = 'C:/Datasets/amazonreviews'
with open('{0}/train.ft.txt'.format(dataset_path), encoding='utf-8') as f:
    train_str = f.read()
with open('{0}/test.ft.txt'.format(dataset_path), encoding='utf-8') as f:
    test_str = f.read()

In [3]:
def data_parse(s):
    d = s.split('__label__')[1:]
    y = [int(s[0])-1 for s in d]
    #titles = [s.split(': ', 1)[0][2:] for s in d]
    x = [s.split(': ', 1)[1] for s in d]
    return (x, y)

(train_x, train_y) = data_parse(train_str)
(test_x, test_y) = data_parse(test_str)

In [4]:
del train_str
del test_str

In [5]:
num_groups = 1
num_layers = 8
num_filters = 128

batch_size = 16
batch_len = 2**num_layers * 2

model_id = int(time.time())

In [6]:
# remove ~10% longest, sort by length
def data_filter(x, y):
    mask = [len(s) < 800 for s in x]
    x = [s for i, s in enumerate(x) if mask[i]]
    y = [s for i, s in enumerate(y) if mask[i]]
    sort = np.argsort([len(s) for s in x])
    x = [x[i] for i in sort]
    y = [y[i] for i in sort]
    return (x, y)

(train_x, train_y) = data_filter(train_x, train_y)
(test_x, test_y) = data_filter(test_x, test_y)

In [7]:
print(len(train_x), len(train_y))

3302164 3302164


In [8]:
#np.mean([(ord(c)>31 and ord(c)<127) for c in ''.join(s for s in train_x)])
# 99.7% within this set
chars = [chr(i+32) for i in range(126 - 32 + 1)]
print(''.join(chars))
char2idx = defaultdict(lambda: 2, {c: i+2 for i, c in enumerate(chars)})
idx2char = defaultdict(lambda: ' ', {i+2: c for i, c in enumerate(chars)})
input_dim = len(chars) + 2

 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~


In [9]:
def generate_data(batch_size=batch_size, test=False):
    if test:
        x, y = test_x, test_y
    else:
        x, y = train_x, train_y
    while True:
        bx = np.zeros((batch_size, batch_len), dtype='int32')
        by_text = np.zeros((batch_size, batch_len, input_dim), dtype='int32')
        by_sent = np.zeros((batch_size, batch_len, 2), dtype='int32')
        for batch_idx in range(batch_size):
            data_idx = np.random.randint(len(x))
            slice_s = max(0, len(x[data_idx]) - batch_len)
            if slice_s != 0:
                slice_s = np.random.randint(slice_s)
            slice_e = min(slice_s + batch_len, len(x[data_idx]))
            x_sub = x[data_idx][slice_s:slice_e]
            bx[batch_idx][-len(x_sub):] = [char2idx[c] for c in x_sub]
            for k, c in enumerate(x_sub[1:]):
                by_text[batch_idx][k + batch_len - len(x_sub)][char2idx[c]] = 1
            if slice_e == len(x[data_idx]):
                by_text[batch_idx, -1, 1] = 1
            else:
                by_text[batch_idx, -1, char2idx[x[data_idx][-1]]]
            by_sent[batch_idx, batch_len - len(x_sub):, y[data_idx]] = 1
        yield (bx, [by_text, by_sent])

In [10]:
bx, by = next(generate_data())
print(''.join([idx2char[i] for i in bx[0]]))
print(''.join([idx2char[np.argmax(v)] for v in by[0][0]]))

                                  Everyone I knew had this album when I was a teenage stoner and it's not only because it's what all the other stoners expected you to have - it's just a damn good album and thanks to all those Zep heads out there, it's now been remastered with a little help by some of the members. So I get "Rock & Roll", "Stairway To Heaven" and all the others in full glory. Makes me wanna smoke up, but maybe I'll just pour myself some wine and go back to the Carly Simon albums - age sucks. 
                                  veryone I knew had this album when I was a teenage stoner and it's not only because it's what all the other stoners expected you to have - it's just a damn good album and thanks to all those Zep heads out there, it's now been remastered with a little help by some of the members. So I get "Rock & Roll", "Stairway To Heaven" and all the others in full glory. Makes me wanna smoke up, but maybe I'll just pour myself some wine and go back to the Carly Si

In [11]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [12]:
l_in = Input(shape=(None,), dtype='int32')
h = Embedding(input_dim=input_dim, output_dim=num_filters)(l_in)
for i in range(num_groups):
    for j in range(num_layers):
        l_filter = Conv1D(filters=num_filters, kernel_size=2,
            dilation_rate=2**j, padding='causal', activation='tanh')(h)
        l_gate = Conv1D(filters=num_filters, kernel_size=2,
            dilation_rate=2**j, padding='causal', activation='sigmoid')(h)
        h = Multiply()([l_filter, l_gate])
l_out_text = Conv1D(filters=input_dim, kernel_size=1, activation='softmax')(h)
l_out_sent = Conv1D(filters=2, kernel_size=1, activation='softmax')(h)

model = Model(inputs=l_in, outputs=[l_out_text, l_out_sent])

adam = Adam(0.001)
model.compile(optimizer=adam, loss='categorical_crossentropy')

In [13]:
print(model.summary())

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None)          0                                            
____________________________________________________________________________________________________
embedding_1 (Embedding)          (None, None, 128)     12416       input_1[0][0]                    
____________________________________________________________________________________________________
conv1d_1 (Conv1D)                (None, None, 128)     32896       embedding_1[0][0]                
____________________________________________________________________________________________________
conv1d_2 (Conv1D)                (None, None, 128)     32896       embedding_1[0][0]                
___________________________________________________________________________________________

In [14]:
def generate_text(epoch=None, logs=None, length=400):
    print()
    print('-- epoch {0}\n'.format(epoch))
    bx, by = next(generate_data(batch_size=1, test=True))
    bx = bx[0][-(2**num_layers):]
    seed_str = ''.join([idx2char[i] for i in bx])
    print('-- seed: {0}'.format(seed_str))
    print()
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('-- diversity: {0}'.format(diversity))
        #sys.stdout.write(seed_str)
        for i in range(length):
            y, y_sent = model.predict(np.array([bx]))
            # don't sample mask
            y[0, -1, 0] = 0
            y = sample(y[0, -1])
            if y == 1:
                break
            c = idx2char[y]
            sys.stdout.write(c)
            sys.stdout.flush()
            bx = bx[:][1:]
            bx = np.concatenate((bx, [y]), axis=-1)
        print('\n')

gen_callback = LambdaCallback(on_epoch_end=generate_text)

In [15]:
def save_model(epoch=None, logs=None):
    model.save('models/wntv_{0}.h5'.format(model_id))
    print('saved model')

save_callback = LambdaCallback(on_epoch_end=save_model)

In [16]:
def trainfor(epochs, save=True, gen=True):
    callbacks = [TQDMNotebookCallback()]
    if save:
        callbacks.append(save_callback)
    if gen:
        callbacks.append(gen_callback)
    model.fit_generator(generate_data(), int(1e4), epochs,
        verbose=0, callbacks=callbacks)

In [17]:
trainfor(10)

saved model

-- epoch 0

-- seed:                                                                                                                                             Received battery and it was dead. I charged it.Still dead. I cleaned the contacts, which seemed dirty...still dead. 

-- diversity: 0.2


  This is separate from the ipykernel package so we can avoid doing imports until


It didn't got thero at only store it faipment and the najpror Eur Magno6* 

-- diversity: 0.5
Bas written Protendated the vifting Giilelie Spinabras dongs ar the box on the HP+-won't have have something door wrekence doests to strolt/Went curled-even one's head shome. 

-- diversity: 1.0
I have style. Ergliate and News (clape stalls or does big chelds" to cxace I havem when mysteing as a grarner heagal hot fillers look pregation Clams. I would else sing...when onced at have seeing that is settings up up standing, (expensives suppose I'ch role that like the Wars were not but it with an orided and 2gibo is one a resport weimm said I would no quality payes. 

-- diversity: 1.2
My bous meanks. Aftit kich, I can't have, mucoed, could check. bolly the brand cards and eye, lame. I feel could even exannel book and furming it upswquave that if it is times to not the imition and years that drant! There shutty harelilled hold, reading for cwe cooking I can, but I will understand there upsion poli

saved model

-- epoch 1

-- seed: od" was absolutly astonishing. The re-birth of southern rock even. This, this, sounds like all the other crap you damn kids are listening to theese days-(that is a minus sign)a year or two. And for all y'all who don't like it, espodeeza.(that is a period) 

-- diversity: 0.2
would should several on many stud as a weak curate the keeps, the day page is a birth which I had to get all with terrible shows 

-- diversity: 0.5
because some of 3 on the various but charge. My Carblilate. I'll be homes. I cd sericle thit together emihes. I howledes the electric creativisouls loted... withouse, we contain failer my tovemic, and i am a compeletus out this delight --- no go of Fiult 4 (Good Aafs everything the USS baren's maangry, and herpy and how she left, makin' is a depictly who want to pund coDe worth what weody copy I h

-- diversity: 1.0
ave to head in recieve mebrability. We jecies or ridiculousble in with no good dvies, but is also gameshow that should" p

saved model

-- epoch 2

-- seed: azon as soon as it was possible. It was the biggest disappointment in my life, when I watched the movie. It was looking exactly the same as on VHS! Even subtitles were encoded on the screen. The chinese bootleg was a lot better than this original release! 

-- diversity: 0.2
I part, button imodges to reality. 

-- diversity: 0.5
Although in their hospysees to it on 1/2), stay fast. But the beliec one of my chocked. And the shagwortre! Ohe exception, if you away with how two must black and gui kissing in, it isn't not support for Xpoaebo maaning the large handle three day vocal. The book is flaft last tIttaked the test. So installed by a little cheap. 

-- diversity: 1.0
Too causing business. I sent would do it over the heaust. 

-- diversity: 1.2
This book defender after world-tettever the while thought properden you have a glasses for styone who has the same of the best pumedlits answered getting the rest brand historlotty, glush and grace to detail. 

saved model

-- epoch 3

-- seed: y been knitting for a couple of years, but thought this book would give me some helpful advise. I would reccommend (one of my most favorite books) "Teach yourself Visually Knitting" by Sharon Turner...lots of pictures with easy to understand intructions.. 

-- diversity: 0.2
I wouldn't be long Orylus, and armg flaus've done standlible! I gave it working on time at advance this album was hole colling with inction and then to know. I useg, what you, on literational choices years, Christians for are jell food and who true to the Savins, therefy varial discan, however, then your fans, it's said time to the lengthrooms coasts approach two much getting it to fursion I recy 

-- diversity: 0.5
music on pictured on the mail account, but this stutated bring is done turning up his reader star and great buy graorylighting and for the fisticss up plannerely on world. 

-- diversity: 1.0
My levelation, and should praying the scene, percet imaginate, and skin. After

saved model

-- epoch 4

-- seed: ow the question is why buy this mouse if you can't make use of all the functions?! Of course an optical mouse is a great improvement over the former mechanical ones, but you have alternatives to this specific one.I have purchased a similar product (an opti

-- diversity: 0.2
on, was is so many waffles that I wanzoial around read many different examples wheels on "Dodwooa you are of Trait bookabeautiful Histical works.This is the base is truly rune out what just about 1pend this kept well, but you could not personallythlages if her how to say and still think in the world close, and Bewago less is seriously type. 

-- diversity: 0.5


-- diversity: 1.0
All of the markine dench they really great through the presups is a fantasser bad) and sit, these backs I've ever relies. The shorte. The actual's only thin a business I finally one could have to do in the battery album 

-- diversity: 1.2
(these volument belt is riods. A part of shift 



saved model

-- epoch 5

-- seed:  so bad that it seems as if this guy is trying to degrade Josh. I don't know why---Josh only brings joy to anyone who hears him. I hope nobody is fooled into buying this by the title. I agree with other reviewers who wish there was a "zero star" category. 

-- diversity: 0.2
And her so instead on cliwes right by Leeon Drive feel/fan of under lives with triday has been a LOT nardoff, then it will be know special fantastic. I wouldn't as amortding them because I would make it just recently for the worst Cheer, and has been reading, since I like the cent from the bigital as left less only then your storget customer, but its due to disposable. I've reading this book, usar

-- diversity: 0.5
ly for a few digital copy unknown, it's next. My for it. The design hideoized. Wound quality well!! 

-- diversity: 1.0
While wonning remackine for cont and keepers, reading to decommend role thing that I review, the thrash is. Haarliate the voices of Movisy Johnlock, y

saved model

-- epoch 6

-- seed:                                                                                       Absolutely fantastic series.BUT THE SOUND TRACK THAT ACCOMPANIES IT IS SO ANNOYING THAT I'D LIKE TO CHOKE OUT THE PRODUCER .AT TIMES IT ACTUALLY DROWNS OUT THE DIALOGUE. 

-- diversity: 0.2
BUY I WOWN FASE STORY AREVER YOU LEVEN WAS THIS IS DVD ENDROMRE A VRETAVE IF YOU STORLIFTLityme EDP ALP CREWE 

-- diversity: 0.5
READE AS (IT GET Ro23 OF ANM FOR SWOLBOD - ACCRABALY DICRAVER COBONTONS REVERYED OROE IT. Mblairs text would after FOR SMB but I have AM (..but the score singing a family just got have alocking. Still, but they are making and layassourply would be a new. 

-- diversity: 1.0
I am now usually, he would purct park it, draws NNH ES,it after heavy to have to say that I like more him ridden. It's large will make not as a decusine can-run and docuygrainer disposually you feel easily, and interesting. Best gravo. Don't easier when it great, and along with ways. 

saved model

-- epoch 7

-- seed: n three days,AND in bed, after a bad accident. It has taken me two years to write mine and as a result of reading her book I was empowered to start another e-book which I completed in just a couple of weeks.Thank you Ponn for your courage and inspiration. 

-- diversity: 0.2


-- diversity: 0.5
It's though a result seem at the top bulboard to see I underfeverable and $226 on insight adaptruncs and doing scratchen the help this with a for the search and off too. In 16 along ruines and fighting. It was another of Transfer is really the movies and it works to stand about together. I'm knowledge songs that game, and his old very disappointed by the jutrefippa) off for 12 software, Timardrein

-- diversity: 1.0
d of. Is the good being as it information. What a detailed finished. Ocal abr. If uso expercial book would just happening on the invalued and cast and you turn it.Nice pieces with the actualary a complicated I am "Spais. I enjoy the old scope better 

saved model

-- epoch 8

-- seed: acters. The sense that you never really know someone...or something. There is much that you will never know. The book flowed perfectly from chapter to chapter and I couldn't put it down. Thanks Alice Hoffman for making me stay up past my bedtime once again

-- diversity: 0.2
, and it sand really concern. It's so well CDGOA Ston's any cartoon printing and the first they just time.Totally threw the most. Even all 1 drag also in a descriptions. While apixigals were pretty giving and the worst album reinforce all that care and avoid for the coffee style in the. NEVSVEN years all to Jahrough him is a meeler. 

-- diversity: 0.5
So, it was him to the result yet portrayy. Villed is the intepart all but this book is not accyy anything like 30 part isn't!!!!~ Jechism, nicely even knys this. 

-- diversity: 1.0
I absolutlent when the meatened to smorteen today to into "also reading this book is like free) about holding possible, it is a nice product.. 

-- diver

saved model

-- epoch 9

-- seed: a TV and that lady still look fat & short.One note, try not to use this TV with the 4:3 mode. Similar to other rear projection TV, LCD, plasma TV, the 2 sides of the screen may generate color/contrast/brightness mismatch when being used in that mode for an

-- diversity: 0.2
 at contribules or an easier goes so much better his move to date of mothers are for a group. 

-- diversity: 0.5
This is the best particular eaping reading, audio dremed and cassional bad star him dish at the actors from the house book was hard to get AWL. But is one of the best storamed. 

-- diversity: 1.0
It was also right! It has been bules so so far. On. Was a lot of No'sned CLtFR4. 

-- diversity: 1.2
However, the only issue in great for years. The only code, that's snot absolute. It all, left the way for people it rocks, terribly wonderful simply more of patternold from the closer sick not introll world but in a while. We un buround of moms. 




In [18]:
adam = Adam(0.0005)
model.compile(optimizer=adam, loss='categorical_crossentropy')
trainfor(1e9)

saved model

-- epoch 0

-- seed:                                                                                                             It was the best value for my money, with four grandchildren using it. They had no trouble attaching it to the easel. The paper was very heavy duty. 

-- diversity: 0.2


  This is separate from the ipykernel package so we can avoid doing imports until


I have learned this thing simply to fascinety". Well, have narrating thoughts and I shared more than the cuts" of Girl" "Waits Lem down and Az; but whist, the scallet bungan beats much better book to answeros. I have a Himate amazon. But i"Ander. 

-- diversity: 0.5
Don't waste your money I don't think that role. If you are shode, Amazon constantly nothing like this film of big at about what they are the most obzies it red. Did repes]. I have liked the author to ruinnia bukes: Deaff once with Christmas) geber and talking to the class.Was slog no matter-5 dvd. If you want my teased as my back in last time. If they weren it when a Suds all they worked oneetingi

-- diversity: 1.0
ng said the variety of pleasure and good thing would hundred to beliefbeant window a nemoods. The characters" what is, Comparers that it kinda with their change a lot of save it does those trouble book. Every time I got play for octful number's time to helpful to so)they think they never real description and wel

saved model

-- epoch 1

-- seed:                                                                                                      Thicker than I thought it would be. Utterly worthless for reading Menus in a restaurant (unless you want to make a complete fool out of yourself). A joke. 

-- diversity: 0.2
Best -," Basicploitto of Kentena pset. There is not flash, they strong and notes He is helpful. I had $.! 

-- diversity: 0.5


-- diversity: 1.0


-- diversity: 1.2
Don't provide the table mother songs and they sell this was my husband to a tape...Critics CD self-condensational actually out of a remote; this is educerial message and Adooge their share when authors IROEBIN racked it with comparison-CD invaluably lyrics (read. 



saved model

-- epoch 2

-- seed: esting. I had to read it for a school project and i'm glad i picked this book because it really helped me realize what the blacks really had to go through and that wasn't even 1/2 of it. I recommend it to everyone because it was such a heart warming book. 

-- diversity: 0.2
Trucks level in CD when I don't arrive a mesults of continuous book to see what she loves product) sure. I keep getting exercies. i do never heard to hearing Jerriss!!! 

-- diversity: 0.5


-- diversity: 1.0


-- diversity: 1.2
The last year from Amazon helped to carf. While fairly at options. 



saved model

-- epoch 3

-- seed:                                             Use all flavors to flavor food without calories and sugar. Tastes great, but you MUST be careful on pricing. I only paid $2.99 per bottle and at the time of writing this, the price is now $6.95. Consumer beware! 

-- diversity: 0.2
will by Earth the Generant''s brings and a composed his masters killed tried to end.The handling retire immediately, so. Its well to metap today song, I always run story movie. This is an intentions. i gave about $31).It's providing these badleny and significance. 

-- diversity: 0.5
I use it or Juliatura and Can't Great Gods IN charged in programs. She going to read at a hero and it seems a good for 1.1- Toge. I am essofs sense true musice without but the remake, and in the bieome actors a continue that successfully boring, and have the naffet. When I heard a word of action and guaracneocher,=- However, the story line, in children is indeed covers that cealing out" and with t

-- 

saved model

-- epoch 4

-- seed:                                    Bought for my son who is 16. The bag is well built with a lot of padding and large pockets. He loves it. I would not hesitate to buy this bag again. Well worth the money. Shipped out fast and arrived on time as promised. 

-- diversity: 0.2
They were working wypparances areas from the tea girlfrica. So dog might. Where if other story models a wonderful, or our antots, but other than the humor slided: Amazon for at English, a really gave me hearings. Readers. The things are much better? Iflesher was very broade my ceast two picture of Acceview container again. It for some position of your first lies Luck Kindle Season/solution again, 

-- diversity: 0.5
I found problems, but I highly be my favourit and I ordered sleep takes while to get more about and went being mix or hype a movie: "wravery which are very hard as much down doing anything suspense small fact that drugs saved apart advis who needs. It air I have ever ow

saved model

-- epoch 5

-- seed: otions, even the story itself is not bad, but detailing it closer to the end is just too much.Spoilers here onward --> I mean a plant that can eat people is good fiction, but a plant that speak german and imitate smells of hamburger, then this is bad!!!!! 

-- diversity: 0.2
Is better leaked out still very positive deep for kids that it came off behind taking brands obtained that as my car as the gi-Beatens to wade work having cd, but like a long symphony shut on the late 1959 which added, and easy and pressure eoolgucks, best between Neam banjo ever even increase all very well learn is superb.MS disk band spirited by in a greater so you can fit 8th previously, but no

-- diversity: 0.5
w it should can't be assertive retractive. Only practice, like many of 8 They've thought has irony, and the reviewing Heavy being color-off uplorates any feel to add airing, are a few years at "agbicity of Effefloassage. It's tor a dog. Do Now WANTA THE CAN a bores of t

KeyboardInterrupt: 