# Keras
[Keras] (https://keras.io/) es una librería de alto nivel que facilita la rápida experimentación utilizando Deep Learning. Soporta diversos backends, como tensorflow, theano, y CNTK.

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
from keras.datasets.mnist import load_data
from keras.layers import Dense, Input, Conv2D, Flatten
from keras.models import Model
from keras.utils import to_categorical
import numpy as np

(x_train, y_train),(x_test, y_test) = load_data()
x_train = np.expand_dims(x_train, axis=-1) / 127.5 - 1
x_test = np.expand_dims(x_test, axis=-1) / 127.5 - 1 
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

i = Input(shape=(28, 28, 1))
d = Conv2D(10, (3, 3))(i)
d = Conv2D(10, (5, 5), strides=(2, 2))(d)
d = Flatten()(d)
d = Dense(100)(d)
d = Dense(10, activation='softmax')(d)

model = Model(inputs=i, outputs=d)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])

history = model.fit(x_train, y_train, epochs=10, batch_size=1000, verbose=1, validation_data=(x_test, y_test))
# Plot training & validation accuracy values
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Word Embeddings
Word Embeddings es una técnica que consiste en asignarle a cada palabra un vector de características que representa el concepto asociado a la palabra. En el siguiente ejemplo, utilizaremos el modelo preentrenado utilizando la técnica Word2Vec[1] utilizando artículos de [Google News](https://code.google.com/archive/p/word2vec/). 

[1] Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. Efficient Estimation of Word Representations in Vector Space. In Proceedings of Workshop at ICLR, 2013.


In [None]:
%matplotlib inline
import pickle
import numpy as np
from matplotlib import pyplot as plt
import gensim

model_wv = gensim.models.KeyedVectors.load_word2vec_format('d:\GoogleNews-vectors-negative300.bin', binary=True)

In [None]:
words = ['king', 'queen', 'man', 'woman']
print('Vector king: {}'.format(model_wv['king']))

vec = np.empty((4, 300))
for i, w in enumerate(words):
    vec[i, :] = model_wv[w]

from sklearn.manifold import TSNE

x = TSNE().fit_transform(vec)
fig, ax = plt.subplots()
ax.scatter(x[:, 0], x[:, 1])
for i, w in enumerate(words):
    ax.annotate(w, (x[i, 0], x[i, 1]))

plt.show()

In [None]:
print(model_wv.most_similar(positive=['king', 'woman'], negative=['man']))

# Reinforcement learning
Open IA Gym example:

In [None]:
import gym
import time
env = gym.make('Breakout-v0')
env.reset()
for i in range(100):
    env.render()
    time.sleep(0.05)
    observation, reward, done, info = env.step(env.action_space.sample())
    if (i % 10 == 0):
        print('Observation: {}\nReward: {}\nDone: {}'.format(observation.shape, reward, done))
    if done:
        print('Reseting...')
        print('Observation: {}\nReward: {}\nDone: {}'.format(observation.shape, reward, done))
        env.reset()
env.close()

Cargando el modelo:

In [None]:
from PIL import Image
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute
from keras.optimizers import Adam
import keras.backend as K

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint


class AtariProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('uint8')  # saves storage in experience memory

    def process_state_batch(self, batch):
        # We could perform this processing step in `process_observation`. In this case, however,
        # we would need to store a `float32` array instead, which is 4x more memory intensive than
        # an `uint8` array. This matters if we store 1M observations.
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

env = gym.make('BreakoutDeterministic-v4')

INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4

nb_actions = env.action_space.n
env.close()

input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = Sequential()
if K.image_dim_ordering() == 'tf':
    # (width, height, channels)
    model.add(Permute((2, 3, 1), input_shape=input_shape))
elif K.image_dim_ordering() == 'th':
    # (channels, width, height)
    model.add(Permute((1, 2, 3), input_shape=input_shape))
else:
    raise RuntimeError('Unknown image_dim_ordering.')
model.add(Convolution2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=1000000)

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

dqn.load_weights('dqn_BreakoutDeterministic-v4_weights_1750000.h5f')

In [None]:
env = gym.make('BreakoutDeterministic-v4')
dqn.test(env, nb_episodes=5, visualize=True)
env.close()

# Image Captioning
La red fue entrenada utilizando [IARP 2012](https://www.imageclef.org/photodata). 
* Para utilizar el modelo preentrenado, descargarlo de [aqui](https://mega.nz/#!4spxAQCL!KqQPiPYoK3xkwvccbhVVewQojt7WiSuDJ4x92sGaQvc).
* Para realizar el entrenamiento bajar el dataset y descomprimirlo en la carpeta dataset. Luego ejecutar:

```python preprocess.py```
Para entrenar: 
```python train.py```

Prediciendo captions:

In [None]:
#Cargando modelos
%matplotlib inline
from matplotlib import pyplot as plt
from keras.models import load_model
from keras.applications import InceptionV3
from preprocess import preprocess_image
from model import ImageGen, start_token, end_token
import cv2
import numpy as np
import random

print('Loading Models')
img_model = InceptionV3(include_top=False)
model = load_model('D:\PycharmProjects\caption2\weights.1600-1.74.hdf5')
data_gen = ImageGen('D:\PycharmProjects\caption2\images_features', 'D:\PycharmProjects\caption2\images_captions', caption_max_len=30, min_reps=2)
print('Models Load')

In [None]:
def to_sentence(x):
    res = []
    prev = None
    for v in x:
        if v != prev:
            res.append(v)
            prev = v
    return res

def text_gen(img_features, model, words_ids, ids_words, caption_len):
    x_img = np.expand_dims(img_features, axis=0)
    x_cap = np.zeros((1, caption_len, len(words_ids)), dtype=np.float32)
    predicted = [words_ids[start_token]]
    end_id = words_ids[end_token]
    x_cap[0, 0, words_ids[start_token]] = 1
    for i in range(0, caption_len - 1):
        pred = model.predict([x_cap, x_img])[0, :, :]
        x_cap[0, i + 1, :] = pred[i, :] 
        pred_val = np.argmax(pred[i, :])
        predicted.append(pred_val)
        if pred_val == end_id:
            break
    return to_sentence([ids_words[i] for i in predicted])

In [None]:
img_path = 'D:/PycharmProjects/caption2/dataset/iaprtc12/images/00/116.jpg'
img_path = 'D:/PycharmProjects/caption2/dataset/iaprtc12/images/18/18101.jpg'
img = preprocess_image(img_model, img_path)
%matplotlib inline
from matplotlib import pyplot as plt
import cv2
plt.imshow(cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB))
plt.show()
print(img)

In [None]:
print(text_gen(img, model, data_gen.word_id, data_gen.id_words, 30))