# DATA CLEANSING
---
##   PART1: DATASET HANDLING

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import os

#Importing nltk library for preprocessing
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords, wordnet

# Downloading stopwords from nltk library
nltk.download("omw-1.4")
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('all')

# jupyter run .\DatasetHandling.ipynb

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\sglbl\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sglbl\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sglbl\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\sglbl\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\sglbl\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\sglbl\AppData\Ro

True

In [2]:
# Opening dataset with python
dataset = pd.read_csv('amazon_reviews.csv')
# Getting "reviewText" column
reviewText = dataset['reviewText']

dataset.columns

Index(['Unnamed: 0', 'reviewerName', 'overall', 'reviewText', 'reviewTime',
       'day_diff', 'helpful_yes', 'helpful_no', 'total_vote',
       'score_pos_neg_diff', 'score_average_rating', 'wilson_lower_bound'],
      dtype='object')

In [3]:

# Remove the NaN values from the dataset
reviewText = reviewText.dropna()

# Remove the stopwords from the dataset
stop_words = set(stopwords.words('english'))

# Remove "no" from the stopwords
stop_words.remove('no')

# # Remove the punctuation from the dataset
from string import punctuation
punctuations = set(punctuation)
# remove
punctuations.remove('.')
punctuations.remove(',')
punctuations.remove('!')
punctuations.remove('?')
punctuations.remove('-')
punctuations.remove('_')
punctuations.remove('"')

# applying removed punctuations
reviewText = reviewText.apply(lambda x: ''.join([word for word in x if word not in punctuations]))
dataset['reviewText'] = reviewText


In [7]:
'''***Spacy Lemmatizer***'''
'''run the pip command on terminal or here'''
# %pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz --user
import spacy
lem = spacy.load('en_core_web_sm', disable=['parser', 'ner'])
def handled_text(text):
    text = str(text).lower()
    text = lem(text)
    # convert spacy.tokens.doc.Doc to str
    text = " ".join([token.lemma_ for token in text])
    
    return text

In [8]:
''' ************************** FULLY PRINTING ************************** '''
# defining a NUMBER as NUMBER_OF_REVIEWS_TO_PRINT
NUMBER_OF_REVIEWS_TO_PRINT = -1 # -1 means all reviews
print("\nORIGINAL FIRST 5 REVIEWS")
print(reviewText[0], "\n", reviewText[1], "\n", reviewText[2], "\n", reviewText[3], "\n", reviewText[4]);

# Apply the handled_text function to the dataset
reviewText = reviewText[:NUMBER_OF_REVIEWS_TO_PRINT].apply(handled_text)
# download handled text to use it in the future

print("\nCLEANSED FIRST 5 REVIEWS")
print(reviewText[0], "\n", reviewText[1], "\n", reviewText[2], "\n", reviewText[3], "\n", reviewText[4]);


ORIGINAL FIRST 5 REVIEWS
no issue . 
 purchase this for my device , it work as advertise . you can never have too much phone memory , since I download a lot of stuff this be a no brainer for I . 
 it work as expect . I should have spring for the high capacity .   I think its make a bit cheesy than the early version the paint look not as clean as before 
 this think have work out great.had a diff . bran 64 gb card and if go south after 3 months.this one have hold up pretty well since I have my s3 , now on my note3 . update 32114ive have this for a few month and have have zero issue since it be transfer from my s3 to my note3 and into a note2 . this card be reliable and solid!cheer ! 
 buy it with retail packaging , arrive legit , in a orange envelope , english version not asian like the picture show . arrive quickly , buy a 32 and 16 both retail packaging for my htc one sv and lg optimus , both card in working order , probably good price you ll get for a nice sd card

CLEANSED FIRST 5 

In [13]:
def getReviews(rating): # Overall ratings, star from 1 to 5
    if (rating == 5.0):
        return dataset[dataset['overall'] == 5.0]['reviewText']
    # if (rating == 4.0):
    #     return dataset[dataset['overall'] == 4.0]['reviewText']
    # if (rating == 3.0):
    #     return dataset[dataset['overall'] == 3.0]['reviewText']
    # if (rating == 2.0):
    #     return dataset[dataset['overall'] == 2.0]['reviewText']
    if (rating == 1.0):
        return dataset[dataset['overall'] == 1.0]['reviewText']
    else:
        return "Error in getReviews()"

def getReviewSentenceList(rating):
    reviews = getReviews(rating)
    
    reviews = reviews.apply(lambda x: str(x).split("\n")) # convert every review into a list of sentences
    reviews = [item for sublist in reviews for item in sublist] # flatten the list of sentences
    return reviews
# Getting the reviews with 5 stars
print("5 star reviews:\n", getReviews(5.0))
print("1 star reviews:\n", getReviews(1.0))

5 star reviews:
 1       Purchased this for my device, it worked as adv...
3       This think has worked out great.Had a diff. br...
4       Bought it with Retail Packaging, arrived legit...
5       Its mini storage.  It doesnt do anything else ...
6       I have it in my phone and it never skips a bea...
                              ...                        
4909    I bought this to stretch the capability of my ...
4911    Used this for extending the capabilities of my...
4912    Great card that is very fast and reliable. It ...
4913    Good amount of space for the stuff I want to d...
4914    Ive heard bad things about this 64gb Micro SD ...
Name: reviewText, Length: 3922, dtype: object
1 star reviews:
 32      I bought 2 of those SanDisk 32 GB microSD , us...
38      I bougth this micro SD card after some trubles...
43      Ordered this for a Galaxy S3.  Lasted a few mo...
143     I have an old SanDisk SD card that still works...
174     I bought this on August of 2013.  As of Ap

---
## PART2: PREPARE DEEP LEARNING

In [14]:
import tensorflow as tf
from tensorflow.python.keras.layers import Embedding, Dense, SpatialDropout1D, Dropout, Convolution1D, Flatten, LSTM, GlobalMaxPooling1D 
from tensorflow.python.keras.engine.sequential import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [15]:
# Creating lists of all the rating reviews
reviews1 = getReviewSentenceList(1.0)
# reviews2 = getReviewSentenceList(2.0)
# reviews3 = getReviewSentenceList(3.0)
# reviews4 = getReviewSentenceList(4.0)
reviews5 = getReviewSentenceList(5.0)

print("Rating 1 reviews: ", len(reviews1), "rating 5 reviews: ", len(reviews5))
# print("rating 1: ", len(reviews1), " rating 2: ", len(reviews2), " rating 3: ", len(reviews3), " rating 4: ",len(reviews4)," rating 5: ", len(reviews5))


Rating 1 reviews:  244 rating 5 reviews:  3922


In [16]:
# print total reviews for each rating
# print(reviews1)

# Output: rating 1:  244  rating 2:  80  rating 3:  142  rating 4:  527  rating 5:  3920

# total reviews for each rating will be this.
if len(reviews1) < len(reviews5):
    totalReviewsForEachRating = len(reviews1)
else:
    totalReviewsForEachRating = len(reviews5)

reviews1 = reviews1[:totalReviewsForEachRating]
# reviews2 = reviews2[:80]
# reviews3 = reviews3[:80]
# reviews4 = reviews4[:80]
reviews5 = reviews5[:totalReviewsForEachRating]

testRating1 = int(totalReviewsForEachRating * 0.1)
# testRating2 = int(totalReviewsForEachRating * 0.1)
# testRating3 = int(totalReviewsForEachRating * 0.1)
# testRating4 = int(totalReviewsForEachRating * 0.1)
testRating5 = int(totalReviewsForEachRating * 0.1)
print("Total test reviews for each rating: (244/10) ", testRating1)

# Train and test docs
train_docs = reviews1[:-testRating1] + reviews5[:-testRating5]
test_docs = reviews1[-testRating1:] + reviews5[-testRating5:]
print("Length of train_docs: ", len(train_docs), " Length of test_docs: ", len(test_docs))

trainLabels = np.concatenate((np.ones(totalReviewsForEachRating-testRating1), np.zeros(totalReviewsForEachRating-testRating5)), axis=0)
testLabels = np.concatenate((np.ones(testRating1), np.zeros(testRating5)), axis=0)

print("Length of trainLabels: ", len(trainLabels), " Length of testLabels: ", len(testLabels))

Total test reviews for each rating: (244/10)  24
Length of train_docs:  440  Length of test_docs:  48
Length of trainLabels:  440  Length of testLabels:  48


In [17]:
print("trainLabels: \n", trainLabels)
print("testLabels: \n", testLabels)

trainLabels: 
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

In [18]:
allDocsOfTrainAndTest = train_docs + test_docs
print("Length of allDocsOfTrainAndTest: ", len(allDocsOfTrainAndTest))
print("Length of train_docs: ", len(train_docs), " Length of test_docs: ", len(test_docs))
print("allDocsOfTrainAndTest[First 4 sentences]: ", allDocsOfTrainAndTest[:4])

Length of allDocsOfTrainAndTest:  488
Length of train_docs:  440  Length of test_docs:  48
allDocsOfTrainAndTest[First 4 sentences]:  ['I bought 2 of those SanDisk 32 GB microSD , used them on my Galaxy Note and Galaxy S4First one , my phone started saying it was removed , then recognize it again  then diedI thought its just a luck , plugged in the 2nd one  stayed for about 2 months and died suddenly ! and lost everythingnever buying from SanDisk again .. ever', 'I bougth this micro SD card after some trubles with other that i got from Kingston, i can tell that SanDisk is other quality, this is the best, my i9300 phone can read a 64GB very fast, the dta transfer speed from my pc to my is amazing, if you are looking for additional storage to you phone, for music, pictures and other type of files, this is for you.Pros-Very good quality materials of construction-Amazing speedUpdate after one year this memory is damaged I can not delete or put new files on it, I can not format this memory,

In [19]:
# Tokenize the corpus reviews
tokenizer = Tokenizer()
tokenizer.fit_on_texts(allDocsOfTrainAndTest)

document_count = tokenizer.document_count # total number of documents the tokenizer has seen
vocab_size = len(tokenizer.word_index) # total number of words the tokenizer has seen

# Encode data sentences into sequences
allDocsOfTrainAndTest_sequences = tokenizer.texts_to_sequences(allDocsOfTrainAndTest)

for x in allDocsOfTrainAndTest_sequences:
    if(len(x) >= 50):
        # Trimming the sentence to 50 words
        x = x[:50]
    else:
        # Padding the sentence to 50 words
        x = pad_sequences([x], maxlen=50, padding='post')[0]
    # print("X is ", x)

# Get max sequence length
# max_training_sequence_length = max([len(s) for s in allDocsOfTrainAndTest_sequences])
max_sequence_length = 50

data_word_index = tokenizer.word_index
print("document_count: ", document_count)
print("vocab_size: ", vocab_size)
print("max_sequence_length: ", max_sequence_length)
print("data_word_index: ", data_word_index)

document_count:  488
vocab_size:  3445
max_sequence_length:  50


### PADDING TRAINING AND TEST SEQUENCES

In [20]:
# Encode training data sentences into sequences
train_sequences = tokenizer.texts_to_sequences(train_docs)

# Pad the training sequences
train_padded = pad_sequences(train_sequences, padding='post', truncating='post', maxlen=max_sequence_length)

# Output the results of our work
print("Train Doc Summary")
print("\nTraining sequences: (First 3)\n", train_sequences[:3])
print("\nPadded training sequences: (first 3)\n", train_padded[:3])
print("\nPadded training shape:", train_padded.shape)
print("Training sequences data type:", type(train_sequences))
print("Padded Training sequences data type:", type(train_padded))

Train Doc Summary

Training sequences: (First 3)
 [[2, 45, 74, 10, 310, 20, 215, 143, 130, 113, 70, 15, 8, 42, 111, 4, 42, 1761, 30, 8, 21, 207, 450, 3, 18, 311, 64, 248, 3, 140, 64, 1762, 232, 43, 44, 6, 286, 451, 11, 1, 1302, 30, 1303, 12, 56, 74, 60, 4, 176, 689, 4, 141, 1763, 198, 31, 20, 140, 275], [2, 1764, 9, 105, 29, 7, 38, 96, 1765, 14, 98, 16, 2, 58, 31, 566, 2, 52, 418, 16, 20, 13, 98, 170, 9, 13, 1, 312, 8, 1766, 21, 52, 67, 6, 117, 79, 86, 1, 1767, 177, 106, 31, 8, 183, 5, 8, 13, 567, 33, 23, 65, 488, 12, 615, 88, 5, 23, 21, 12, 122, 89, 4, 98, 616, 10, 108, 9, 13, 12, 23, 1304, 79, 80, 170, 1768, 10, 1769, 567, 1770, 38, 30, 287, 9, 35, 13, 690, 2, 52, 17, 617, 25, 135, 118, 108, 15, 3, 2, 52, 17, 73, 9, 35, 2, 97, 5, 73, 14, 8, 21, 14, 8, 183, 14, 6, 115, 4, 288, 59, 9, 13, 119, 6, 117, 1771, 1, 68, 11, 1, 35, 13, 61, 366, 5, 67, 22, 2, 52, 17, 489, 25, 617, 3, 2, 45, 9, 35, 5, 28, 113, 5, 419, 36, 8, 68, 10, 8, 152, 125, 22, 59, 13, 61, 6, 891, 35, 2, 52, 17, 99, 68, 15

### PADDING TEXT

In [21]:
# Encode test data sentences into sequences
test_sequences = tokenizer.texts_to_sequences(test_docs)

# Pad the test sequences
test_padded = pad_sequences(test_sequences, padding='post', truncating='post', maxlen=max_sequence_length)

# Output the results of our work
print("Test Doc Summary")
print("\nTest sequences: (First 5)\n", test_sequences[:5])
print("\nPadded test sequences: (First 5)\n", test_padded[:5])
print("\nPadded test shape:", test_padded.shape)
print("Test sequences data type:", type(test_sequences))
print("Padded Test sequences data type:", type(test_padded))

Test Doc Summary

Test sequences: (First 5)
 [[3247, 277, 3, 4, 58, 429, 562, 74, 58, 3, 11, 77, 197, 77, 544, 3, 53, 10, 1, 732, 128, 544, 3, 11, 1, 1650, 200, 540, 3248, 203, 414, 14, 6, 345, 3249, 74, 72, 242, 3250, 63, 77, 313, 63, 227, 343, 5, 675, 1, 1105, 18, 2, 464, 128, 313, 227, 343, 5, 55, 3, 53, 10, 1, 732, 95, 303, 3251, 2, 19, 5, 1422, 9, 319, 407, 5, 95, 268, 8, 117, 29, 7, 44, 1495, 53, 109, 263, 1, 3252, 1325, 4, 2, 173, 73, 3, 25, 358, 108, 5, 3, 2, 462, 855, 264, 4, 288, 50, 2, 103, 165, 33, 2, 37, 28, 198, 71, 83, 29, 32, 140, 31, 70, 879, 2, 18, 229, 5, 467, 36, 8, 108, 107, 3253, 9, 62, 4, 9, 13, 6, 586, 62, 14, 92, 32], [2, 367, 23, 19, 323, 286, 3, 57, 1517, 6, 160, 10, 48, 206, 22, 124, 1, 3254, 3255, 1, 1582, 15, 1, 84, 1355, 250, 64, 1, 7, 176, 509, 826, 43, 85, 3256, 3257, 22, 109, 425, 6, 161, 121, 11, 344, 43, 6, 159, 388], [81, 127, 12, 6, 233, 4, 64, 38, 56, 6, 233, 1, 21, 179, 147, 1, 7, 71, 83, 97, 5, 67, 3, 15, 226, 619, 4, 710, 10, 70, 748, 1, 7, 524

---
## PART3: DEEP LEARNING MODEL

In [22]:
# 0 EMBEDDING + LSTM
input_dim = vocab_size+1
output_dim = 50

# define the model
model3 = Sequential()
model3.add(Embedding(input_dim, output_dim, input_length=max_sequence_length, name= 'embeded'))
model3.add(SpatialDropout1D(0.25))
model3.add(LSTM(16, return_sequences=True))
model3.add(LSTM(8))
model3.add(Dropout(0.25))
model3.add(Dense(1, activation='sigmoid'))

# compile the model
model3.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# summarize the model
print(model3.summary())

# fit the model
model3.fit(train_padded, trainLabels, epochs=100, verbose=0)

# evaluate the model
loss, accuracy = model3.evaluate(test_padded, testLabels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embeded (Embedding)          (None, 50, 50)            172300    
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, 50, 50)            0         
_________________________________________________________________
lstm (LSTM)                  (None, 50, 16)            4288      
_________________________________________________________________
lstm_1 (LSTM)                (None, 8)                 800       
_________________________________________________________________
dropout (Dropout)            (None, 8)                 0         
_________________________________________________________________
dense (Dense)                (None, 1)                 9         
Total params: 177,397
Trainable params: 177,397
Non-trainable params: 0
__________________________________________________

In [23]:
# 1 DENSE
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(max_sequence_length,)))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

epochs = 6
batch_size = 32

history = model.fit(train_padded, trainLabels, epochs=epochs, batch_size=batch_size,
                    verbose=1, validation_split=0.1)

score = model.evaluate(test_padded, testLabels, batch_size=batch_size, verbose=1)

print('Test score:', score[0])
print('Test accuracy:', score[1])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 64)                3264      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                1040      
_________________________________________________________________
dropout_2 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 8)                 136       
_________________________________________________________________
dropout_3 (Dropout)          (None, 8)                 0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                

In [24]:
# # 2 WORD EMBEDDING + Deep NN
input_dim = vocab_size+1
output_dim = 50

# define the model
model2 = Sequential()
model2.add(Embedding(input_dim, output_dim, input_length=max_sequence_length, name= 'embeded'))
model2.add(Flatten())
model2.add(Dense(32, activation='relu'))
model2.add(Dense(1, activation='sigmoid'))

# compile the model
model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# summarize the model
print(model2.summary())

# fit the model
model2.fit(train_padded, trainLabels, epochs=epochs, verbose=0)

# evaluate the model
loss, accuracy = model2.evaluate(test_padded, testLabels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embeded (Embedding)          (None, 50, 50)            172300    
_________________________________________________________________
flatten (Flatten)            (None, 2500)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 32)                80032     
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 33        
Total params: 252,365
Trainable params: 252,365
Non-trainable params: 0
_________________________________________________________________
None
Accuracy: 79.166669


In [25]:
# 3 Word Embedding + CNN
input_dim = vocab_size+1
output_dim = 50

# define the model
model4 = Sequential()
model4.add(Embedding(input_dim, output_dim, input_length=max_sequence_length, name= 'embeded'))
model4.add(Dropout(0.50))
model4.add(Convolution1D(16,3))
model4.add(Convolution1D(16,5))
model4.add(GlobalMaxPooling1D())
model4.add(Dropout(0.50))
model4.add(Dense(16, activation='relu'))
model4.add(Dense(1, activation='sigmoid'))

# compile the model
model4.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# summarize the model
print(model4.summary())

# fit the model
model4.fit(train_padded, trainLabels, epochs=epochs, verbose=0)

# evaluate the model
loss, accuracy = model4.evaluate(test_padded, testLabels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embeded (Embedding)          (None, 50, 50)            172300    
_________________________________________________________________
dropout_4 (Dropout)          (None, 50, 50)            0         
_________________________________________________________________
conv1d (Conv1D)              (None, 48, 16)            2416      
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 44, 16)            1296      
_________________________________________________________________
global_max_pooling1d (Global (None, 16)                0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 16)               