<a href="https://colab.research.google.com/github/saulolks/CDSentimentAnalysis-Project/blob/master/cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive/')



---



In [0]:
import numpy as np

from keras.layers import Dense, Input, Flatten, Reshape, concatenate, Dropout
from keras.layers import Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Embedding
from keras.layers import LSTM, Bidirectional
from keras.models import Model
from keras import optimizers
from keras import regularizers

## Modelo Yoon Kim

Modelo original de **Yoon Kim** [Convolutional Neural Networks for Sentence Classification](https://arxiv.org/abs/1408.5882)

Implementação [Jverma/cnn-text-classification-keras
](https://github.com/Jverma/cnn-text-classification-keras)

*   **EMBEDDING_DIM**: Dimension of the embedding space.
*   **MAX_SEQUENCE_LENGTH:** Maximum length of the sentence.
*   **MAX_NB_WORDS:** Maximum number of words in the vocabulary.
*   **embeddings_index:** A dict containing words and their embeddings.
*   **word_index:** A dict containing words and their indices.
*   **labels_index**: A dict containing the labels and their indices.
*   **fname**: Path to the file containing Google word2vecs.

In [0]:
MAX_SEQUENCE_LENGTH = 50
MAX_NB_WORDS = 10000
EMBEDDING_DIM = 300
VALIDATION_SPLIT = 0.2
fname = 'drive/My Drive/UFRPE/'

In [0]:
# implementar word_index
num_words = min(MAX_NB_WORDS, len(word_index))

### Modelo

In [0]:
embedding_layer = Embedding(num_words,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=True)

In [0]:
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)

# add first conv filter
embedded_sequences = Reshape((MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, 1))(embedded_sequences)
x = Conv2D(100, (5, EMBEDDING_DIM), activation='relu')(embedded_sequences)
x = MaxPooling2D((MAX_SEQUENCE_LENGTH - 5 + 1, 1))(x)


# add second conv filter.
y = Conv2D(100, (4, EMBEDDING_DIM), activation='relu')(embedded_sequences)
y = MaxPooling2D((MAX_SEQUENCE_LENGTH - 4 + 1, 1))(y)


# add third conv filter.
z = Conv2D(100, (3, EMBEDDING_DIM), activation='relu')(embedded_sequences)
z = MaxPooling2D((MAX_SEQUENCE_LENGTH - 3 + 1, 1))(z)


# concate the conv layers
alpha = concatenate([x,y,z])

# flatted the pooled features.
alpha = Flatten()(alpha)

# dropout
alpha = Dropout(0.5)(alpha)

# predictions
preds = Dense(len(labels_index), activation='softmax')(alpha)

# build model
model = Model(sequence_input, preds)

In [0]:
adadelta = optimizers.Adadelta()

model.compile(loss='categorical_crossentropy',
              optimizer=adadelta,
              metrics=['acc'])

### Treinamento

In [0]:
model.fit(x=x_train, y=y_train, batch_size=50, epochs=25 , validation_data=(x_val, y_val))