### Collab

#### git clone

In [1]:
#!git clone https://github.com/<your name>/<your project>.git
#pip install -r requirements.txt

### Importamos las librerias y el dataset

In [2]:
import numpy as np
import os
import shutil
import datetime

In [3]:
from keras.models import Sequential
from keras.preprocessing import sequence
from keras.layers import Embedding, LSTM, Dense
from keras.callbacks import TensorBoard

Using TensorFlow backend.


In [4]:
from libs.workshop_libs.tensorboard import TrainValTensorBoard

### Importar el dataset

In [5]:
from keras.datasets import imdb

In [6]:
top_words = 5000
max_review_length = 500
pad_char=0
start_char=1
oov_char=2
index_from=3
test_split=0.2

In [7]:
(x_train, y_train), (x_dev, y_dev) = imdb.load_data(seed=1,
                                                      num_words=top_words,
                                                      maxlen=max_review_length,
                                                      start_char=start_char,
                                                      oov_char=oov_char,
                                                      index_from=index_from,
                                                     )

In [9]:
x_train.shape

(25000,)

In [10]:
len(x_train)

25000

In [11]:
len(x_train[0])

284

In [13]:
x_train[0][:10]

[1, 13, 28, 1039, 7, 14, 23, 1856, 13, 104]

In [None]:
x_dev[0][:10]

In [None]:
max_data=10000
x_train=x_train[0:max_data]
y_train=y_train[0:max_data]
x_dev=x_dev[0:max_data]
y_dev=y_dev[0:max_data]

### Revisemos el dataset

In [None]:
wordIDX = imdb.get_word_index()
wordIDX = {k:(v+index_from) for k,v in wordIDX.items()}
wordIDX["<>"] = pad_char
wordIDX["<INIT>"] = start_char
wordIDX["<?>"] = oov_char
IDXword = {value:key for key,value in wordIDX.items()}

IDXsentiment={}
IDXsentiment[0]="positive"
IDXsentiment[1]="negative"   

In [None]:
def printExample(x,y,i,IDXword,IDXsentiment):
    print("="*10)
    print(' '.join(IDXword[i] for i in x[i] ))
    print('-'*10)
    print('Sentiment: {}'.format(IDXsentiment[y[i]]))

In [None]:
printExample(x_train,y_train,0,IDXword,IDXsentiment)
printExample(x_train,y_train,1,IDXword,IDXsentiment)
printExample(x_dev,y_dev,1,IDXword,IDXsentiment)

In [None]:
x_dev_list=x_dev.tolist()

In [None]:
x_train[100] in x_dev_list

In [None]:
## Cargamos tensorboard

In [None]:
%load_ext tensorboard
#%reload_ext tensorboard

In [None]:
#!kill 4430

In [None]:
#!ps aux | grep -i tb

In [None]:
logs_base_dir = "./logs"
print(shutil.rmtree(logs_base_dir))

In [None]:
os.makedirs(logs_base_dir, exist_ok=True)
%tensorboard --logdir {logs_base_dir}

In [None]:
### Uniformizemos los datos, truncados y padding

In [None]:
def prepare_data(x_train,x_dev,max_review_length):
    x_train = sequence.pad_sequences(x_train, maxlen=max_review_length)
    x_dev = sequence.pad_sequences(x_dev, maxlen=max_review_length)
    return x_train,x_dev

In [None]:
def create_model(top_words,max_review_length):
    embedding_vector_length = 32
    model = Sequential()
    model.add(Embedding(top_words, embedding_vector_length, input_length=max_review_length))
    model.add(LSTM(100))
    model.add(Dense(1, activation='sigmoid'))
    print(model.summary())
    return model

In [None]:
def train_model(model,x_train,y_train,x_dev,y_dev):
        logdir = os.path.join(logs_base_dir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy']
                     )
        model.fit(x_train,
                  y_train,
                  batch_size=128, #64,
                  validation_data=(x_dev, y_dev),
                  nb_epoch=10,
                  callbacks=[TrainValTensorBoard(logdir, 
                                                 histogram_freq=1,
                                                 write_graph=True
                                                )]
                 )

In [None]:
x_train,x_dev = prepare_data(x_train,x_dev,max_review_length)

In [None]:
print(x_train.shape)
print(x_dev.shape)

In [None]:
model=create_model(top_words,max_review_length)

In [None]:
train_model(model,x_train,y_train,x_dev,y_dev)

### Evaluamos el modelo

In [None]:
scores = model.evaluate(x_dev, y_dev, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

### Guardamos el modelo

In [None]:
outpath='models/'
os.makedirs(outpath, exist_ok=True)

In [None]:
model.save(outpath+"model_other.h5")

In [None]:
# serialize model to JSON
model_json = model.to_json()
with open(outpath+"model_other.json", "w") as json_file:
    json_file.write(model_json)

### ADDENDUM: One hot encoder

In [None]:
from keras.preprocessing.text import one_hot

In [None]:
texto=["Esta es una frase","Otra frase mas larga", "todo esto viene de nuestro dataset de interes"]

In [None]:
vocab_size = 50
palabras_vectorizadas = [one_hot(d, vocab_size) for d in texto]
print(palabras_vectorizadas)

### Problemas con numpy nuevos y keras

In [None]:
# save np.load
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

# call load_data with allow_pickle implicitly set to true
#(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

In [None]:
# restore np.load for future normal usage
np.load = np_load_old

In [None]:
### Otra solucion

In [None]:
#!pip uninstall numpy

In [None]:
#!pip install numpy==1.16.1