In [58]:
import pandas as pd
import numpy as np
import random as rn
from nltk.corpus import stopwords
import re, unicodedata, zipfile, os, nltk
np.random.seed(1337)
rn.seed(12345)
PYTHONHASHSEED=0
os.environ['KERAS_BACKEND'] = 'theano'
from sklearn.feature_extraction.text import CountVectorizer
from nltk import WordNetLemmatizer
from sklearn import model_selection, metrics
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
from keras.models import Sequential
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras import layers
from keras.utils import to_categorical
import plotly.plotly as py
import plotly.graph_objs as go
import matplotlib.pyplot as plt
from plotly import tools, offline
offline.init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings('ignore')

### Loading data and preprocessing

In [3]:
def read_files(path, folder):
    files = os.listdir(path+'/%s' % folder)    
    
    theme = []
    for i in range(len(files)):
        zfile = zipfile.ZipFile(path+'/{0}/{1}'.format(folder, files[i]))
        for finfo in zfile.infolist():
            ifile = zfile.open(finfo)
            text = ifile.readlines()
            theme.append(text)
    return theme

In [4]:
def generate_dataframe(path):
    data = pd.DataFrame()
    folder_files = os.listdir(path)
    for folder in folder_files:
        data = data.append(pd.DataFrame({'text': read_files(path, folder), 'target':folder}), ignore_index=True)
    return data

def delete_stop_words(row):
    stop = stopwords.words('english')
    new_row = [word for word in row if word not in stop]
    return new_row

def clean_text(text):
    new_text = re.sub(r"[^a-z ]", " ", text.lower())
    return new_text

def clean(text):
    cleaned_1 = [row.decode("utf-8", errors='ignore').splitlines() for row in text]
    cleaned_2 = [[' '.join(item.split())] for row in cleaned_1 for item in row]
    cleaned_3 = [row for row in cleaned_2 if row != ['']]
    return cleaned_3

def lemmatization(row):
    lemmatizer = WordNetLemmatizer()
    new_row = [lemmatizer.lemmatize(word, pos='v') for word in row]
    return new_row

def stack(text):
    new_text = [' '.join(row) for row in text]
    return ' '.join(new_text)

def remove_non_ascii(row):
    new_row = [unicodedata.normalize('NFKD', word).encode('ascii', 'ignore').decode('utf-8', 'ignore') for word in row]
    return new_row

def preproc_text(df):
    df['text'] = df['text'].apply(lambda x: clean(x))
    data['text'] = data['text'].apply(lambda x: [clean_text(row[0]) for row in x])
    data['text'] = data['text'].apply(lambda x: [delete_stop_words(list(row.split())) for row in x])
    data['text'] = data['text'].apply(lambda x: [remove_non_ascii(row) for row in x])
    data['text'] = data['text'].apply(lambda x: [lemmatization(row) for row in x])
    data['text'] = data['text'].apply(lambda x: stack(x))

In [12]:
data = generate_dataframe('data')
preproc_text(data)
data = data.sample(frac=1).reset_index(drop=True)

In [13]:
sentences = data['text']
y = data['target']

In [59]:
distribution = y.value_counts()
picture = [go.Bar(
            x=distribution.index,
            y=distribution
    )]
layout = go.Layout(
    title='Plot Title',
    yaxis=dict(
        title='Number of classes',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)
fig = go.Figure(data=picture, layout=layout)
offline.iplot(fig)

The sample is not balanced. I'm going to write function which split the sample in equal proportions between all classes.

In [15]:
def train_test_split(X, y, test_size, random_state=42, shuffle=False):
    test_indexes, train_indexes = [], []
    np.random.seed(random_state)
    for class_ in np.unique(y):    
        N = y[y == class_].value_counts().values[0]
        part = int(round(N * test_size))

        full_indexes = np.array(y[y == class_].index)
        low = full_indexes.min()
        high = full_indexes.max()

        temp = np.random.randint(low, high, part)
        test_indexes.extend(temp)

        temp = np.delete(full_indexes, test_indexes)
        train_indexes.extend(temp)
        
    if shuffle is True:
        np.random.shuffle(train_indexes)
        np.random.shuffle(test_indexes)
        
    X_train = X.iloc[train_indexes]
    y_train = y.iloc[train_indexes]
    X_test = X.iloc[test_indexes]
    y_test = y.iloc[test_indexes]
    return X_train, X_test, y_train, y_test

I splited data into train for gridsearch and test, which i will use when the best model is selected.

In [16]:
X_train, X_test, y_train, y_test = train_test_split(sentences, y, test_size=0.2, random_state=15, shuffle=True)

In [17]:
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y_train = encoder.transform(y_train)
encoded_Y_test = encoder.transform(y_test)
# convert integers to dummy variables (i.e. one hot encoded)
y_train = np_utils.to_categorical(encoded_Y_train)
y_test = np_utils.to_categorical(encoded_Y_test)

In [26]:
def multilayer_perceptron_one_layer(input_dim, units):
    model = Sequential()
    
    model.add(layers.Dense(units=units, activation='relu', input_dim=input_dim))
    model.add(layers.Dense(units=5, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [18]:
vectorizer = CountVectorizer()

vectorizer.fit(sentences)
X_train = vectorizer.transform(X_train)
X_test  = vectorizer.transform(X_test)

In [19]:
cv = model_selection.StratifiedShuffleSplit(n_splits=3, random_state=42)

In [21]:
np.random.seed(42)
param_grid_multilayer_perceptron_one_layer = dict(epochs=[5,10,20,30], batch_size=[10,32,64],
                                                 units=[100,200,300,400])
model = KerasClassifier(build_fn=multilayer_perceptron_one_layer, input_dim=X_train.shape[1])
grid = GridSearchCV(estimator=model, param_grid=param_grid_multilayer_perceptron_one_layer,
                              cv=cv, verbose=1)
grid_result_multilayer_perceptron_one_layer = grid.fit(X_train, y_train)

Fitting 3 folds for each of 48 candidates, totalling 144 fits
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch

Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20

Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 1

Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 1

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 2

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10


Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
E

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 

Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 1

Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
E

Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoc

Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 2

Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 2

[Parallel(n_jobs=1)]: Done 144 out of 144 | elapsed: 77.0min finished


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [20]:
def embedding_multilayer_perceptron_one_layer(input_dim, units, maxlen):
    model = Sequential()
    model.add(layers.Embedding(input_dim=input_dim, 
                               output_dim=200, 
                               input_length=maxlen))
    model.add(layers.GlobalMaxPool1D())
    model.add(layers.Dense(units, activation='relu'))
    model.add(layers.Dense(5, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    return model

In [21]:
sentences_train, sentences_test, _, _ = train_test_split(sentences, y, test_size=0.2, random_state=15, shuffle=True)

In [22]:
tokenizer = Tokenizer(num_words=70000)
tokenizer.fit_on_texts(sentences)

X_train_embedding = tokenizer.texts_to_sequences(sentences_train)
X_test_embedding = tokenizer.texts_to_sequences(sentences_test)

vocab_size = len(tokenizer.word_index) + 1  # Adding 1 because of reserved 0 index


In [23]:
maxlen = 5000
X_train_embedding = pad_sequences(X_train_embedding, padding='post', maxlen=maxlen)
X_test_embedding = pad_sequences(X_test_embedding, padding='post', maxlen=maxlen)

In [31]:
np.random.seed(42)
param_grid_embedding_multilayer_perceptron_one_layer = dict(epochs=[5,10,20], batch_size=[10,32,64],
                                                 units=[100,200,300])
model = KerasClassifier(build_fn=embedding_multilayer_perceptron_one_layer, input_dim=vocab_size,
                        maxlen=maxlen)
grid = GridSearchCV(estimator=model, param_grid=param_grid_embedding_multilayer_perceptron_one_layer,
                              cv=cv, verbose=1)
grid_result_embedding_multilayer_perceptron_one_layer = grid.fit(X_train_embedding, y_train)

Fitting 3 folds for each of 27 candidates, totalling 81 fits
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 100)               0         
_________________________________________________________________
dense_293 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_294 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embeddin

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_8 (Glob (None, 100)               0         
_________________________________________________________________
dense_305 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_306 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (None, 10000,

Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_17 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_17 (Glo (None, 100)               0         
_________________________________________________________________
dense_323 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_324 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding

Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_24 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_24 (Glo (None, 100)               0         
_________________________________________________________________
dense_337 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_338 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 6,377,305
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_30 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_30 (Glo (None, 100)               0         
_________________________________________________________________
dense_349 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_350 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_31 (Embedding)     (No

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_36 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_36 (Glo (None, 100)               0         
_________________________________________________________________
dense_361 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_362 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_37 (Embedding)     (No

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_41 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_41 (Glo (None, 100)               0         
_________________________________________________________________
dense_371 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_372 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 6,377,305
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
__________________________________________

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_49 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_49 (Glo (None, 100)               0         
_________________________________________________________________
dense_387 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_388 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_52 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_52 (Glo (None, 100)               0         
_________________________________________________________________
dense_393 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_394 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 6,377,305
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_55 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_55 (Glo (None, 100)               0         
_________________________________________________________________
dense_399 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_400 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_59 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_59 (Glo (None, 100)               0         
_________________________________________________________________
dense_407 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_408 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 6,377,305
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_60 (Embedding)     (No

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_65 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_65 (Glo (None, 100)               0         
_________________________________________________________________
dense_419 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_420 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output S

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_74 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_74 (Glo (None, 100)               0         
_________________________________________________________________
dense_437 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_438 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_80 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_80 (Glo (None, 100)               0         
_________________________________________________________________
dense_449 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_450 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[Parallel(n_jobs=1)]: Done  81 out of  81 | elapsed: 42.5min finished


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_83 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_83 (Glo (None, 100)               0         
_________________________________________________________________
dense_455 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_456 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [39]:
def pretrained_embedding_multilayer_perceptron_one_layer(input_dim, embedding_matrix, embedding_dim, units, maxlen, trainable):
    model = Sequential()
    model.add(layers.Embedding(input_dim=input_dim, 
                               weights=[embedding_matrix],
                               output_dim=embedding_dim, 
                                input_length=maxlen,
                                trainable=trainable))
    model.add(layers.GlobalMaxPool1D())
    model.add(layers.Dense(units, activation='relu'))
    model.add(layers.Dense(5, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    return model

In [25]:
def create_embedding_matrix(filepath, word_index, embedding_dim):
    vocab_size = len(word_index) + 1  # Adding again 1 because of reserved 0 index
    embedding_matrix = np.zeros((vocab_size, embedding_dim))

    with open(filepath) as f:
        for line in f:
            word, *vector = line.split()
            if word in word_index:
                idx = word_index[word] 
                embedding_matrix[idx] = np.array(
                    vector, dtype=np.float32)[:embedding_dim]

    return embedding_matrix

In [26]:
embedding_dim = 200
embedding_matrix = create_embedding_matrix('glove.6B.200d.txt', tokenizer.word_index, embedding_dim)

In [42]:
np.random.seed(42)
param_grid_pretrained_embedding_multilayer_perceptron_one_layer = dict(epochs=[5,10,20], batch_size=[10,32,64],
                                                 units=[100,200,300], trainable=[False, True])
model = KerasClassifier(build_fn=pretrained_embedding_multilayer_perceptron_one_layer, input_dim=vocab_size,
                        maxlen=maxlen, embedding_dim=embedding_dim, embedding_matrix=embedding_matrix)
grid = GridSearchCV(estimator=model, param_grid=param_grid_pretrained_embedding_multilayer_perceptron_one_layer,
                              cv=cv, verbose=1)
grid_result_pretrained_embedding_multilayer_perceptron_one_layer = grid.fit(X_train_embedding, y_train)

Fitting 3 folds for each of 54 candidates, totalling 162 fits
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_84 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_84 (Glo (None, 100)               0         
_________________________________________________________________
dense_457 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_458 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 10,605
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_85 (E

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_90 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_90 (Glo (None, 100)               0         
_________________________________________________________________
dense_469 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_470 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 31,805
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_91 (Embedding)   

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_96 (Embedding)     (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_96 (Glo (None, 100)               0         
_________________________________________________________________
dense_481 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_482 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 6,377,305
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_97 (Embedding)     (No

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_102 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_102 (Gl (None, 100)               0         
_________________________________________________________________
dense_493 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_494 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 10,605
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Out

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_111 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_111 (Gl (None, 100)               0         
_________________________________________________________________
dense_511 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_512 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
__________________________________________

Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_119 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_119 (Gl (None, 100)               0         
_________________________________________________________________
dense_527 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_528 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_123 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_123 (Gl (None, 100)               0         
_________________________________________________________________
dense_535 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_536 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 21,205
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_126 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_126 (Gl (None, 100)               0         
_________________________________________________________________
dense_541 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_542 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 31,805
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_129 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_129 (Gl (None, 100)               0         
_________________________________________________________________
dense_547 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_548 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_132 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_132 (Gl (None, 100)               0         
_________________________________________________________________
dense_553 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_554 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 6,377,305
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_135 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_135 (Gl (None, 100)               0         
_________________________________________________________________
dense_559 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_560 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_138 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_138 (Gl (None, 100)               0         
_________________________________________________________________
dense_565 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_566 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 10,605
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_143 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_143 (Gl (None, 100)               0         
_________________________________________________________________
dense_575 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_576 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 21,205
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_144 (Embedding)  

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_149 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_149 (Gl (None, 100)               0         
_________________________________________________________________
dense_587 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_588 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_150 (Embedding)    (No

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_155 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_155 (Gl (None, 100)               0         
_________________________________________________________________
dense_599 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_600 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_156 (Embedding)    (No

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_160 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_160 (Gl (None, 100)               0         
_________________________________________________________________
dense_609 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_610 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 21,205
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_____________________________________

Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_168 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_168 (Gl (None, 100)               0         
_________________________________________________________________
dense_625 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_626 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 6,377,305
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_173 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_173 (Gl (None, 100)               0         
_________________________________________________________________
dense_635 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_636 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
__________________________________________

Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_179 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_179 (Gl (None, 100)               0         
_________________________________________________________________
dense_647 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_648 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 21,205
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoc

Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_182 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_182 (Gl (None, 100)               0         
_________________________________________________________________
dense_653 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_654 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 31,805
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_185 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_185 (Gl (None, 100)               0         
_________________________________________________________________
dense_659 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_660 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_188 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_188 (Gl (None, 100)               0         
_________________________________________________________________
dense_665 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_666 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 6,377,305
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_191 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_191 (Gl (None, 100)               0         
_________________________________________________________________
dense_671 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_672 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_195 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_195 (Gl (None, 100)               0         
_________________________________________________________________
dense_679 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_680 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 21,205
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_196 (Embedding)  

Total params: 6,387,905
Trainable params: 31,805
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_201 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_201 (Gl (None, 100)               0         
_________________________________________________________________
dense_691 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_692 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_207 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_207 (Gl (None, 100)               0         
_________________________________________________________________
dense_703 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_704 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_208 (Embedding)    (No

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_216 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_216 (Gl (None, 100)               0         
_________________________________________________________________
dense_721 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_722 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 31,805
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_217 (E

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_221 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_221 (Gl (None, 100)               0         
_________________________________________________________________
dense_731 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_732 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
__________________________________________

Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_229 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_229 (Gl (None, 100)               0         
_________________________________________________________________
dense_747 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_748 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 10,605
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoc

Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_232 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_232 (Gl (None, 100)               0         
_________________________________________________________________
dense_753 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_754 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 21,205
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_235 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_235 (Gl (None, 100)               0         
_________________________________________________________________
dense_759 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_760 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 31,805
Non-trainable params: 6,356,100
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_238 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_238 (Gl (None, 100)               0         
_________________________________________________________________
dense_765 (Dense)            (None, 100)               10100     
_________________________________________________________________
dense_766 (Dense)            (None, 5)                 505       
Total params: 6,366,705
Trainable params: 6,366,705
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_241 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_241 (Gl (None, 100)               0         
_________________________________________________________________
dense_771 (Dense)            (None, 200)               20200     
_________________________________________________________________
dense_772 (Dense)            (None, 5)                 1005      
Total params: 6,377,305
Trainable params: 6,377,305
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_244 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_244 (Gl (None, 100)               0         
_________________________________________________________________
dense_777 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_778 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20


[Parallel(n_jobs=1)]: Done 162 out of 162 | elapsed: 58.7min finished


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_246 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
global_max_pooling1d_246 (Gl (None, 100)               0         
_________________________________________________________________
dense_781 (Dense)            (None, 300)               30300     
_________________________________________________________________
dense_782 (Dense)            (None, 5)                 1505      
Total params: 6,387,905
Trainable params: 6,387,905
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [47]:
def pretrained_embedding_CNN(input_dim, embedding_matrix, embedding_dim, units, maxlen, trainable):
    model = Sequential()
    model.add(layers.Embedding(input_dim=input_dim, 
                               weights=[embedding_matrix],
                               output_dim=embedding_dim, 
                               input_length=maxlen,
                               trainable=trainable))
    model.add(layers.Conv1D(128, 5, activation='relu'))
    model.add(layers.GlobalMaxPool1D())
    model.add(layers.Dense(units, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(5, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    return model

In [48]:
np.random.seed(42)
param_grid_pretrained_embedding_CNN = dict(epochs=[5,7],  batch_size=[10,32],
                                                 units=[100,200,300], trainable=[False, True])
model = KerasClassifier(build_fn=pretrained_embedding_CNN, input_dim=vocab_size,
                        maxlen=maxlen, embedding_dim=embedding_dim, embedding_matrix=embedding_matrix)
grid = GridSearchCV(estimator=model, param_grid=param_grid_pretrained_embedding_CNN,
                              cv=cv, verbose=1)
grid_result_pretrained_embedding_CNN = grid.fit(X_train_embedding, y_train)

Fitting 3 folds for each of 24 candidates, totalling 72 fits
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_344 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_98 (Conv1D)           (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_344 (Gl (None, 128)               0         
_________________________________________________________________
dense_977 (Dense)            (None, 100)               12900     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_978 (Dense)            (None, 5)                 505       
Total params: 6,433,633
Trainable params: 77,533
Non-trainable params: 6,356,100


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_354 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_108 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_354 (Gl (None, 128)               0         
_________________________________________________________________
dense_997 (Dense)            (None, 100)               12900     
_________________________________________________________________
dropout_11 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_998 (Dense)            (None, 5)                 505       
Total params: 6,433,633
Trainable params: 6,433,633
Non-trainable params: 0
________________

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_359 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_113 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_359 (Gl (None, 128)               0         
_________________________________________________________________
dense_1007 (Dense)           (None, 300)               38700     
_________________________________________________________________
dropout_16 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_1008 (Dense)           (None, 5)                 1505      
Total params: 6,460,433
Trainable params: 6,460,433
Non-trainable params: 0
________________

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_364 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_118 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_364 (Gl (None, 128)               0         
_________________________________________________________________
dense_1017 (Dense)           (None, 100)               12900     
_________________________________________________________________
dropout_21 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_1018 (Dense)           (None, 5)                 505       
Total params: 6,433,633
Trainable params: 77,533
Non-trainable params: 6

Epoch 7/7
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_368 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_122 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_368 (Gl (None, 128)               0         
_________________________________________________________________
dense_1025 (Dense)           (None, 300)               38700     
_________________________________________________________________
dropout_25 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_1026 (Dense)           (None, 5)                 1505      
Total params: 6,460,433
Trainable params: 104,333
Non-trainable params: 6,356,100
__________________________________________________

Total params: 6,433,633
Trainable params: 6,433,633
Non-trainable params: 0
_________________________________________________________________
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_373 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_127 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_373 (Gl (None, 128)               0         
_________________________________________________________________
dense_1035 (Dense)           (None, 100)               12900     
_________________________________________________________________
dropout_30 (Dropout)         (None, 100)               0         
______________________________________________________________

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_382 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_136 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_382 (Gl (None, 128)               0         
_________________________________________________________________
dense_1053 (Dense)           (None, 100)               12900     
_________________________________________________________________
dropout_39 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_1054 (Dense)           (None, 5)                 505       
Total params: 6,433,633
Trainable params: 77,533
Non-trainable params: 6,356,100
___________

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_387 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_141 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_387 (Gl (None, 128)               0         
_________________________________________________________________
dense_1063 (Dense)           (None, 300)               38700     
_________________________________________________________________
dropout_44 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_1064 (Dense)           (None, 5)                 1505      
Total params: 6,460,433
Trainable params: 104,333
Non-trainable params: 6,356,100
__________

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_392 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_146 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_392 (Gl (None, 128)               0         
_________________________________________________________________
dense_1073 (Dense)           (None, 200)               25800     
_________________________________________________________________
dropout_49 (Dropout)         (None, 200)               0         
_________________________________________________________________
dense_1074 (Dense)           (None, 5)                 1005      
Total params: 6,447,033
Trainable params: 6,447,033
Non-trainable params: 0
________________

Total params: 6,460,433
Trainable params: 6,460,433
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_397 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_151 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_397 (Gl (None, 128)               0         
_________________________________________________________________
dense_1083 (Dense)           (None, 300)               38700     
_________________________________________________________________
dropout_54 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_1084 (Dens

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_406 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_160 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_406 (Gl (None, 128)               0         
_________________________________________________________________
dense_1101 (Dense)           (None, 300)               38700     
_________________________________________________________________
dropout_63 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_1102 (Dense)           (None, 5)                 1505      
Total params: 6,460,433
Trainable params: 104,333
Non-trainable params: 

Total params: 6,460,433
Trainable params: 6,460,433
Non-trainable params: 0
_________________________________________________________________
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_415 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_169 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_415 (Gl (None, 128)               0         
_________________________________________________________________
dense_1119 (Dense)           (None, 300)               38700     
_________________________________________________________________
dropout_72 (Dropout)         (None, 300)               0         
______________________________________________________________

[Parallel(n_jobs=1)]: Done  72 out of  72 | elapsed: 202.1min finished


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_416 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
conv1d_170 (Conv1D)          (None, 9996, 128)         64128     
_________________________________________________________________
global_max_pooling1d_416 (Gl (None, 128)               0         
_________________________________________________________________
dense_1121 (Dense)           (None, 300)               38700     
_________________________________________________________________
dropout_73 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_1122 (Dense)           (None, 5)                 1505      
Total params: 6,460,433
Trainable params: 6,460,433
Non-trainable params: 0
_________________________________________________________________


In [52]:
def pretrained_embedding_LSTM(input_dim, embedding_matrix, embedding_dim, units, maxlen, trainable):
    model = Sequential()
    model.add(layers.Embedding(input_dim=input_dim, 
                               weights=[embedding_matrix],
                               output_dim=embedding_dim, 
                               input_length=maxlen,
                               trainable=trainable))
    model.add(layers.LSTM(64))
    model.add(layers.Dense(units, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(5, activation='softmax'))
    model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
    model.summary()
    return model

In [55]:
np.random.seed(42)
param_grid_pretrained_embedding_LSTM = dict(epochs=[7], batch_size=[64],
                                                 units=[200], trainable=[True])
model = KerasClassifier(build_fn=pretrained_embedding_LSTM, input_dim=vocab_size,
                        maxlen=maxlen, embedding_dim=embedding_dim, embedding_matrix=embedding_matrix)
grid = GridSearchCV(estimator=model, param_grid=param_grid_pretrained_embedding_LSTM,
                              cv=cv, verbose=1)
grid_result_pretrained_embedding_LSTM = grid.fit(X_train_embedding, y_train)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_430 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
lstm_14 (LSTM)               (None, 64)                42240     
_________________________________________________________________
dense_1149 (Dense)           (None, 200)               13000     
_________________________________________________________________
dropout_87 (Dropout)         (None, 200)               0         
_________________________________________________________________
dense_1150 (Dense)           (None, 5)                 1005      
Total params: 6,412,345
Trainable params: 6,412,345
Non-trainable params: 0
_________________________________________________________________
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
___

[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 83.8min finished


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_433 (Embedding)    (None, 10000, 100)        6356100   
_________________________________________________________________
lstm_17 (LSTM)               (None, 64)                42240     
_________________________________________________________________
dense_1155 (Dense)           (None, 200)               13000     
_________________________________________________________________
dropout_90 (Dropout)         (None, 200)               0         
_________________________________________________________________
dense_1156 (Dense)           (None, 5)                 1005      
Total params: 6,412,345
Trainable params: 6,412,345
Non-trainable params: 0
_________________________________________________________________
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


After one day of training all these models i have some results. Let's look into it:

- the fastest and the most accuracy model is simple perceptron.
- neural networks like CNN or LSTM are too slowly but can show better quality. 
- the model with bag of words works better than with embedding.

I think that CNN or LSTM will work better at all, but i can't afford them.
As we can see below models like simple perceptron, CNN, and model with embedding the most accurate.
I'm going to continue to search the best parameters for model with embedding.

In [136]:
def result_df(params, grid, name):
    temp = pd.concat([pd.DataFrame(params),
              pd.DataFrame(grid.best_params_, index=['best params'])])
    temp.index.name = name
    return temp

In [113]:
best_scores = {'best_score':[]}
best_scores['best_score'].append(grid_result_embedding_multilayer_perceptron_one_layer.best_score_)
best_scores['best_score'].append(grid_result_multilayer_perceptron_one_layer.best_score_)
best_scores['best_score'].append(grid_result_pretrained_embedding_CNN.best_score_)
best_scores['best_score'].append(grid_result_pretrained_embedding_LSTM.best_score_)
best_scores['best_score'].append(grid_result_pretrained_embedding_multilayer_perceptron_one_layer.best_score_)

In [128]:
pd.DataFrame(best_scores, index=['embedding', 'simple perceptron', 'CNN', 'LSTM', 'pretrained embedding'])

Unnamed: 0,best_score
embedding,0.916667
simple perceptron,0.916667
CNN,0.861111
LSTM,0.5
pretrained embedding,0.805556


In [138]:
result_df(param_grid_embedding_multilayer_perceptron_one_layer, 
          grid_result_embedding_multilayer_perceptron_one_layer, 'embedding')

Unnamed: 0_level_0,batch_size,epochs,units
embedding,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,10,5,100
1,32,10,200
2,64,20,300
best params,10,20,300


In [139]:
result_df(param_grid_multilayer_perceptron_one_layer, 
          grid_result_multilayer_perceptron_one_layer, 'simple perceptron')

Unnamed: 0_level_0,batch_size,epochs,units
simple perceptron,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,10.0,5,100
1,32.0,10,200
2,64.0,20,300
3,,30,400
best params,32.0,5,100


In [140]:
result_df(param_grid_pretrained_embedding_CNN, 
          grid_result_pretrained_embedding_CNN, 'CNN')

Unnamed: 0_level_0,batch_size,epochs,trainable,units
CNN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,10.0,5.0,False,100
1,32.0,7.0,True,200
2,,,,300
best params,10.0,5.0,True,300


In [141]:
result_df(param_grid_pretrained_embedding_LSTM, 
          grid_result_pretrained_embedding_LSTM, 'LSTM')

Unnamed: 0_level_0,batch_size,epochs,trainable,units
LSTM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,64,7,True,200
best params,64,7,True,200


In [137]:
result_df(param_grid_pretrained_embedding_multilayer_perceptron_one_layer, 
          grid_result_pretrained_embedding_multilayer_perceptron_one_layer, 'pretrained embedding')

Unnamed: 0_level_0,batch_size,epochs,trainable,units
pretrained embedding,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,10,5,False,100
1,32,10,True,200
2,64,20,,300
best params,10,20,True,300


In [27]:
np.random.seed(42)
param_grid_embedding_multilayer_perceptron_one_layer_2 = dict(epochs=[20], batch_size=[10],
                                                 units=[200])
model = KerasClassifier(build_fn=embedding_multilayer_perceptron_one_layer, input_dim=vocab_size,
                        maxlen=maxlen)
grid = GridSearchCV(estimator=model, param_grid=param_grid_embedding_multilayer_perceptron_one_layer_2,
                              cv=cv, verbose=1)
grid_result_embedding_multilayer_perceptron_one_layer_2 = grid.fit(X_train_embedding, y_train)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 5000, 200)         12712200  
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 200)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 1005      
Total params: 12,753,405
Trainable params: 12,753,405
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18

Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  5.7min finished


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 5000, 200)         12712200  
_________________________________________________________________
global_max_pooling1d_4 (Glob (None, 200)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_8 (Dense)              (None, 5)                 1005      
Total params: 12,753,405
Trainable params: 12,753,405
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [45]:
def get_report(y_true, y_predicted):
    """Calcucate metrics such as: Precision, recall, f1 and accuracy"""
    
    N = np.unique(y_true).shape[0]
    confusion_matrix = np.zeros((N, N), dtype=int)
    
    for index in range(y_true.shape[0]):
        confusion_matrix[y_predicted[index]][y_true[index]] += 1
        
    recall = np.array([confusion_matrix[i,i]/sum(confusion_matrix[:,i]) for i in range(N)])
    precision = np.array([confusion_matrix[i,i]/sum(confusion_matrix[i, :]) for i in range(N)])
    f1 = 2 * precision * recall / (precision + recall)
    
    correct_answers = 0
    for i in range(len(y_predicted)):
        if y_true[i] == y_predicted[i]:
            correct_answers += 1
    
    return confusion_matrix, precision, recall, f1, correct_answers/y_true.shape[0]

In [47]:
def to_dataframe_confussion_matrix(confussion_matrix, names):
    """Function to visualizate confussion_matrix"""
    
    actual = lambda: ['Actual ' + i for i in names]
    predicted = lambda: ['Predicted ' + i for i in names]
    
    df = pd.DataFrame(confussion_matrix, index=predicted(), columns=actual())
    
    return df

In [48]:
def PRA(precision, recall, f1, accuracity, names):
    """Function to visualizate metrics"""
    
    name_class = lambda: [i for i in names]
    
    df_1 = pd.DataFrame(precision, index=name_class(), columns=['Precision'])
    df_2 = pd.DataFrame(recall, index=name_class(), columns=['Recall'])
    df_3 = pd.DataFrame(f1, index=name_class(), columns=['F1'])
    df_4 = pd.DataFrame(accuracity, index=[names[0]], columns=['Accuracity'])
    df = pd.concat((df_1, df_2, df_3, df_4), axis=1, sort=False)
    
    df.fillna('', inplace=True)
    return df

In [35]:
predicted = grid_result_embedding_multilayer_perceptron_one_layer_2.predict(X_test_embedding)

In [46]:
confusion_matrix, precision, recall, f1, acc = get_report(np.argmax(y_test, axis=1), predicted)

In [50]:
PRA(precision, recall, f1, acc, np.unique(data['target'].values))

Unnamed: 0,Precision,Recall,F1,Accuracity
DRAMA,1.0,1.0,1.0,1.0
EROTIC,1.0,1.0,1.0,
RELIGION,1.0,1.0,1.0,
SONGS,1.0,1.0,1.0,
TEACHER,1.0,1.0,1.0,


In [51]:
to_dataframe_confussion_matrix(confusion_matrix,np.unique(data['target'].values))

Unnamed: 0,Actual DRAMA,Actual EROTIC,Actual RELIGION,Actual SONGS,Actual TEACHER
Predicted DRAMA,13,0,0,0,0
Predicted EROTIC,0,8,0,0,0
Predicted RELIGION,0,0,1,0,0
Predicted SONGS,0,0,0,2,0
Predicted TEACHER,0,0,0,0,6


The results for test is perfect. The main purpose of ML is to predict unseen data. 
The most likely this problem is exclusion from rules, rarely happens that metrics are so good.

In [52]:
predicted = grid_result_embedding_multilayer_perceptron_one_layer_2.predict(X_train_embedding)

In [53]:
confusion_matrix, precision, recall, f1, acc = get_report(np.argmax(y_train, axis=1), predicted)

In [54]:
PRA(precision, recall, f1, acc, np.unique(data['target'].values))

Unnamed: 0,Precision,Recall,F1,Accuracity
DRAMA,1.0,1.0,1.0,1.0
EROTIC,1.0,1.0,1.0,
RELIGION,1.0,1.0,1.0,
SONGS,1.0,1.0,1.0,
TEACHER,1.0,1.0,1.0,


In [56]:
to_dataframe_confussion_matrix(confusion_matrix,np.unique(data['target'].values))

Unnamed: 0,Actual DRAMA,Actual EROTIC,Actual RELIGION,Actual SONGS,Actual TEACHER
Predicted DRAMA,63,0,0,0,0
Predicted EROTIC,0,29,0,0,0
Predicted RELIGION,0,0,3,0,0
Predicted SONGS,0,0,0,16,0
Predicted TEACHER,0,0,0,0,16


The same we see with train data, i can't say that model overfitted(am i wrong?) because metrics on test data is good. Maybe if i had more data on test, the results would be another