In [1]:
# import packages

# Hyperas/TensorFlow
# the __future__ import command must be in the beginning of the notebook
from __future__ import print_function

from hyperopt import Trials, STATUS_OK, tpe
from tensorflow.python.keras.layers.core import Dense, Dropout, Activation, Flatten
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.utils import np_utils

from hyperas import optim
from hyperas.distributions import choice, uniform

import tensorflow as tf
from tensorflow.keras.layers import LSTM, BatchNormalization
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import GlobalAveragePooling1D
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping

# Basics
import pandas as pd
pd.set_option("display.max_columns", None)
import numpy as np

# Filter warnings
import warnings
warnings.filterwarnings("ignore")

# Preprocessing; model selection and evaluation
from sklearn.model_selection import train_test_split

# text handling
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

# for custom countvectorizer with SpaCy lemmatization
import spacy
from sklearn.feature_extraction.text import CountVectorizer, VectorizerMixin
from sklearn.base import TransformerMixin, BaseEstimator
from scipy.sparse import csr_matrix

# WordCloud
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

Using TensorFlow backend.


------
If we need to move virtual ENV to use Tensorflow we will need to install:

1. spacy

<code> conda install -c spacy spacy </code>

2. 'en_core_web_md'

<code> python -m spacy download en_core_web_md </code>

3. wordcloud

<code> conda install -c conda-forge wordcloud </code>

------

In [2]:
# import packages
data = pd.read_csv("saved_csv/df.csv")
data.drop(columns = "Unnamed: 0",inplace=True)

df = data.copy()

### Creating a model to predict comfort level using text responses
------

In [3]:
# Grabbing the responses as independent variables
corpus = df.iloc[:,-9]

# grabbing the dependent variables
dependent_class = pd.read_csv("saved_csv/q1_dependent_alt.csv")
dependent_class.drop('Unnamed: 0',axis=1,inplace=True)

dependent_class_alt = dependent_class.copy()

dependent_class_alt[dependent_class_alt < 2] = 0
dependent_class_alt[dependent_class_alt >= 2] = 1

In [4]:
# Creating a table with both independent and dependent variables
table = pd.concat([corpus,dependent_class_alt],axis=1)

# dropping columns that did not answer the question
index = table[table.iloc[:,0]=="Did not answer"].index

table.drop(index,axis=0,inplace=True)

In [5]:
# Text processing to prepare data for RNN

# Lemmatization using SpaCy
nlp = spacy.load('en_core_web_md')

sentences = []

for num in range(len(table)):
    doc = nlp(table.iloc[num,0])

    sentence = []
    for token in doc:
        sentence.append(token.lemma_)

    sentences.append(" ".join(sentence))

# Processing text with TfidfVectorizer
tf_model = TfidfVectorizer(stop_words=STOPWORDS,ngram_range=(1,3), min_df=3)
tf_vectors = tf_model.fit_transform(sentences); tf_vectors

<789x1127 sparse matrix of type '<class 'numpy.float64'>'
	with 11866 stored elements in Compressed Sparse Row format>

In [6]:
# saving files to be loaded in Hyperas functions
np.save("saved_csv/tf_vectors.npy", tf_vectors.toarray(), allow_pickle=True, fix_imports=True)

table.to_csv("saved_csv/table.csv")

In [19]:
# Tuning hyperparameter with Hyperas
# Code source: https://github.com/maxpumperla/hyperas

# for RNN

def data():
    """
    Data providing function:

    This function is separated from create_model() so that hyperopt
    won't reload data for each evaluation run.
    """
    tf_vectors = np.load("saved_csv/tf_vectors.npy")
    
    table = pd.read_csv("saved_csv/table.csv")
    table.drop(columns = "Unnamed: 0",inplace=True)
    
    x_train, x_test, y_train, y_test = train_test_split(tf_vectors,table.iloc[:,1].values,test_size = 0.2)

    x_train = x_train.reshape(631,1127,1)
    y_train = y_train.reshape(631,1)
    x_test = x_test.reshape(158,1127,1)
    y_test = y_test.reshape(158,1)
    
    return x_train, y_train, x_test, y_test


def create_model(x_train, y_train, x_test, y_test):
    """
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    """
    model = Sequential()

    model.add(LSTM({{choice([32,64,96,128])}},activation={{choice(["relu","elu"])}}, 
                   input_shape = (x_train.shape[1:]), return_sequences=True, dropout={{uniform(0,1)}}))
    model.add(BatchNormalization())

    model.add(LSTM({{choice([32,64,96,128])}}, activation={{choice(["relu","elu"])}}, dropout={{uniform(0,1)}}))
    model.add(BatchNormalization())

    model.add(Dense({{choice([32,64,96,128])}}, activation={{choice(["relu","elu"])}}))
    model.add(Dropout({{uniform(0,1)}}))

    model.add(Dense(2, activation={{choice(["softmax","sigmoid"])}}))

    # setting up optimizer hyperparameters
    sgd = SGD(lr={{uniform(0,0.01)}},decay=0.0, momentum = 0.0, nesterov=False, clipnorm=2.0)

    # compile model
    model.compile(loss="sparse_categorical_crossentropy", optimizer = sgd, metrics = ["accuracy"])

    es = EarlyStopping(monitor='val_loss', mode="min", patience=2, verbose=1)

    result = model.fit(x_train,y_train, batch_size = {{choice([16, 32, 64])}}, epochs = {{choice([5, 10, 15])}}, 
                       callbacks = [es], validation_split=0.2)

    validation_acc = np.amax(result.history['val_acc']) 
    print('Best validation acc of epoch:', validation_acc)
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}


if __name__ == '__main__':
    best_run, best_model = optim.minimize(model=create_model,data=data,algo=tpe.suggest,max_evals=5,trials=Trials(),
                                          notebook_name='Capstone modelling stage v.3-RNN')
    X_train, Y_train, X_test, Y_test = data()
    print("Evaluation of best performing model:")
    print(best_model.evaluate(X_test, Y_test))
    print("Best performing model chosen hyper-parameters:")
    print(best_run)

>>> Imports:
#coding=utf-8

from __future__ import print_function

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from tensorflow.python.keras.layers.core import Dense, Dropout, Activation, Flatten
except:
    pass

try:
    from tensorflow.python.keras.models import Sequential
except:
    pass

try:
    from tensorflow.python.keras.utils import np_utils
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    import tensorflow as tf
except:
    pass

try:
    from tensorflow.keras.layers import LSTM, BatchNormalization
except:
    pass

try:
    from tensorflow.keras.layers import Conv1D
except:
    pass

try:
    from tensorflow.keras.layers import MaxPooling1D
except:
    pass

try:
    from tensorflow.keras.layers import GlobalAveragePooling1D
except:
    pass

try:
    from tensorflow.keras.optimizers import SGD, Adam, RMSprop
except:
    pass

try:


 64/504 [==>...........................]           
 - ETA: 28s - loss: 0.6450 - acc: 0.6250           
                                                  
 - ETA: 24s - loss: 0.8242 - acc: 0.5625           
                                                  
 - ETA: 19s - loss: 0.8061 - acc: 0.5365           
                                                  
 - ETA: 15s - loss: 0.7884 - acc: 0.5273           
                                                  
 - ETA: 11s - loss: 0.7880 - acc: 0.5125           
                                                  
 - ETA: 7s - loss: 0.8047 - acc: 0.5078            
                                                   
 - ETA: 3s - loss: 0.7957 - acc: 0.5000            
                                                   
 - 32s 63ms/step - loss: 0.7996 - acc: 0.4861 - val_loss: 0.6936 - val_acc: 0.5039

Epoch 00003: early stopping                        
Best validation acc of epoch:                      
0.5039370104553192                   

                                                                            
 - ETA: 17s - loss: 0.7289 - acc: 0.4766                                     
                                                                            
 - ETA: 12s - loss: 0.7287 - acc: 0.4808                                     
                                                                            
 - ETA: 7s - loss: 0.7298 - acc: 0.4732                                      
                                                                             
 - ETA: 3s - loss: 0.7249 - acc: 0.4896                                      
                                                                             
 - 76s 151ms/step - loss: 0.7238 - acc: 0.4940 - val_loss: 0.6931 - val_acc: 0.5039

Epoch 2/15                                                                   
 32/504 [>.............................]                                     
 - ETA: 59s - loss: 0.7788 - acc: 0.5938                    

                                                                             
 - ETA: 3s - loss: 0.7177 - acc: 0.5271                                      
                                                                             
 - 77s 154ms/step - loss: 0.7175 - acc: 0.5258 - val_loss: 0.6931 - val_acc: 0.5039

Epoch 00003: early stopping                                                  
Best validation acc of epoch:                                                
0.5039370048233843                                                           
Train on 504 samples, validate on 127 samples                                 
Epoch 1/10                                                                    
 16/504 [..............................]                                      
 - ETA: 4:46 - loss: 0.6953 - acc: 0.6250                                     
                                                                              
 32/504 [>.............................]            

                                                                              
 - 129s 255ms/step - loss: 0.6953 - acc: 0.5119 - val_loss: 0.6932 - val_acc: 0.5039

Epoch 2/10                                                                    
 16/504 [..............................]                                      
 - ETA: 2:03 - loss: 0.6625 - acc: 0.8125                                     
                                                                              
 32/504 [>.............................]                                      
 - ETA: 1:56 - loss: 0.6472 - acc: 0.7188                                     
                                                                              
 48/504 [=>............................]                                      
 - ETA: 1:54 - loss: 0.6517 - acc: 0.7083                                     
                                                                              
 64/504 [==>...........................]     

 32/504 [>.............................]                                      
 - ETA: 1:56 - loss: 0.7190 - acc: 0.5000                                     
                                                                              
 48/504 [=>............................]                                      
 - ETA: 1:49 - loss: 0.7025 - acc: 0.5000                                     
                                                                              
 64/504 [==>...........................]                                      
 - ETA: 1:49 - loss: 0.7034 - acc: 0.4688                                     
                                                                              
 80/504 [===>..........................]                                      
 - ETA: 1:44 - loss: 0.7165 - acc: 0.4375                                     
                                                                              
 96/504 [====>.........................]            

 - ETA: 1:34 - loss: 0.6794 - acc: 0.4844                                     
                                                                              
 80/504 [===>..........................]                                      
 - ETA: 1:32 - loss: 0.6811 - acc: 0.5000                                     
                                                                              
 96/504 [====>.........................]                                      
 - ETA: 1:29 - loss: 0.6992 - acc: 0.4688                                     
                                                                              
112/504 [=====>........................]                                      
 - ETA: 1:27 - loss: 0.6964 - acc: 0.4911                                     
                                                                              
 - ETA: 1:24 - loss: 0.6947 - acc: 0.5000                                     
                                                    

                                                                              
112/504 [=====>........................]                                      
 - ETA: 1:21 - loss: 0.7003 - acc: 0.5268                                     
                                                                              
 - ETA: 1:18 - loss: 0.6982 - acc: 0.5469                                     
                                                                              
 - ETA: 1:15 - loss: 0.7188 - acc: 0.5347                                     
                                                                              
 - ETA: 1:12 - loss: 0.7229 - acc: 0.5312                                     
                                                                              
 - ETA: 1:08 - loss: 0.7165 - acc: 0.5568                                     
                                                                              
 - ETA: 1:05 - loss: 0.7166 - acc: 0.5417           

 - 42s 83ms/step - loss: 0.7243 - acc: 0.5099 - val_loss: 0.6932 - val_acc: 0.5039

Epoch 2/15                                                                  
 64/504 [==>...........................]                                    
 - ETA: 28s - loss: 0.7263 - acc: 0.5000                                    
                                                                           
 - ETA: 24s - loss: 0.7274 - acc: 0.5391                                    
                                                                           
 - ETA: 19s - loss: 0.7071 - acc: 0.5521                                    
                                                                           
 - ETA: 15s - loss: 0.7191 - acc: 0.5391                                    
                                                                           
 - ETA: 11s - loss: 0.7084 - acc: 0.5469                                    
                                                                         

In [20]:
# Using results from Hyperas to create the model

def RNN_model(X_train,y_train):
    model = Sequential()

    model.add(LSTM(64,activation="elu", input_shape = (X_train.shape[1:]), return_sequences=True, dropout=0.11729755246044238))
    model.add(BatchNormalization())

    model.add(LSTM(128, activation="relu", dropout=0.8444244099007299))
    model.add(BatchNormalization())

    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5350807190884803))

    model.add(Dense(2, activation="softmax"))

    # setting up SGD (optimizer) hyperparameters
    sgd = SGD(lr=0.00026079803111884515,decay=0.0, momentum = 0.0, nesterov=False, clipnorm=2.0)

    # compile model
    model.compile(loss="sparse_categorical_crossentropy", optimizer = sgd, metrics = ["accuracy"])

    es = EarlyStopping(monitor='val_loss', mode='min', patience=2, verbose=1)

    result = model.fit(X_train,y_train, batch_size = 64, epochs = 10, callbacks = [es], validation_split=0.2)
    
    return model, result

In [21]:
X_train, X_test, y_train, y_test = train_test_split(tf_vectors,table.iloc[:,1].values,test_size = 0.2)

X_train = X_train.toarray().reshape(631,1127,1)
y_train = y_train.reshape(631,1)
X_test = X_test.toarray().reshape(158,1127,1)
y_test = y_test.reshape(158,1)

model, result = RNN_model(X_train,y_train)

Train on 504 samples, validate on 127 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 00010: early stopping


In [22]:
# checking test accuracy
_, test_acc = model.evaluate(X_test, y_test, verbose=0)
test_acc

0.4810126586050927

In [28]:
# for CNN
def data():
    """
    Data providing function:

    This function is separated from create_model() so that hyperopt
    won't reload data for each evaluation run.
    """
    tf_vectors = np.load("saved_csv/tf_vectors.npy")
    
    table = pd.read_csv("saved_csv/table.csv")
    table.drop(columns = "Unnamed: 0",inplace=True)
    
    x_train, x_test, y_train, y_test = train_test_split(tf_vectors,table.iloc[:,1].values,test_size = 0.2)

    x_train = x_train.reshape(631,1127,1)
    y_train = y_train.reshape(631,1)
    x_test = x_test.reshape(158,1127,1)
    y_test = y_test.reshape(158,1)
    
    return x_train, y_train, x_test, y_test


def create_model(x_train, y_train, x_test, y_test):
    """
    Model providing function:

    Create Keras model with double curly brackets dropped-in as needed.
    Return value has to be a valid python dictionary with two customary keys:
        - loss: Specify a numeric evaluation metric to be minimized
        - status: Just use STATUS_OK and see hyperopt documentation if not feasible
    The last one is optional, though recommended, namely:
        - model: specify the model just created so that we can later use it again.
    """
    model = Sequential()

    model.add(Conv1D({{choice([32,64,96,128])}},{{choice([5,10,15,20])}},activation={{choice(["relu","elu"])}}, 
                   input_shape = (x_train.shape[1:])))
    model.add(Conv1D({{choice([32,64,96,128])}},{{choice([5,10,15,20])}},activation={{choice(["relu","elu"])}}))
    model.add(MaxPooling1D({{choice([1,2,3,4,5,6])}}))

    model.add(Conv1D({{choice([32,64,96,128])}},{{choice([5,10,15,20])}},activation={{choice(["relu","elu"])}}))
    model.add(Conv1D({{choice([32,64,96,128])}},{{choice([5,10,15,20])}},activation={{choice(["relu","elu"])}}))
    model.add(GlobalAveragePooling1D())
              
    model.add(Flatten())
    model.add(Dense({{choice([32,64,96,128])}},activation={{choice(["relu","elu"])}}))
    model.add(Dropout({{uniform(0,1)}}))
    model.add(Dense(2, activation='softmax'))
    
    sgd = SGD(lr={{uniform(0,0.01)}},decay=0.0, momentum = 0.0, nesterov=False, clipnorm=2.0)
              
    model.compile(loss="sparse_categorical_crossentropy",optimizer=sgd, metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, mode='min', patience=2, verbose=1)

    result = model.fit(x_train,y_train, batch_size = {{choice([16, 32, 64])}}, epochs = {{choice([5, 10, 15])}}, 
                       validation_split = 0.2, callbacks=[early_stop])

    validation_acc = np.amax(result.history['val_acc']) 
    print('Best validation acc of epoch:', validation_acc)
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}


if __name__ == '__main__':
    best_run, best_model = optim.minimize(model=create_model,data=data,algo=tpe.suggest,max_evals=5,trials=Trials(),
                                          notebook_name='Capstone modelling stage v.3-RNN')
    X_train, Y_train, X_test, Y_test = data()
    print("Evaluation of best performing model:")
    print(best_model.evaluate(X_test, Y_test))
    print("Best performing model chosen hyper-parameters:")
    print(best_run)

>>> Imports:
#coding=utf-8

from __future__ import print_function

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from tensorflow.python.keras.layers.core import Dense, Dropout, Activation, Flatten
except:
    pass

try:
    from tensorflow.python.keras.models import Sequential
except:
    pass

try:
    from tensorflow.python.keras.utils import np_utils
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    import tensorflow as tf
except:
    pass

try:
    from tensorflow.keras.layers import LSTM, BatchNormalization
except:
    pass

try:
    from tensorflow.keras.layers import Conv1D
except:
    pass

try:
    from tensorflow.keras.layers import MaxPooling1D
except:
    pass

try:
    from tensorflow.keras.layers import GlobalAveragePooling1D
except:
    pass

try:
    from tensorflow.keras.optimizers import SGD, Adam, RMSprop
except:
    pass

try:


                                                   
 - ETA: 1s - loss: 0.6929 - acc: 0.5260            
                                                   
 - ETA: 0s - loss: 0.6927 - acc: 0.5312            
                                                   
 - 9s 18ms/step - loss: 0.6926 - acc: 0.5317 - val_loss: 0.6935 - val_acc: 0.4961

Epoch 3/5                                          
 64/504 [==>...........................]           
 - ETA: 6s - loss: 0.6915 - acc: 0.5469            
                                                   
 - ETA: 5s - loss: 0.6893 - acc: 0.6094            
                                                   
 - ETA: 4s - loss: 0.6919 - acc: 0.5521            
                                                   
 - ETA: 3s - loss: 0.6919 - acc: 0.5469            
                                                   
 - ETA: 2s - loss: 0.6920 - acc: 0.5437            
                                                   
 - ETA: 1s - loss: 0.6922 - acc: 

 - ETA: 2s - loss: 0.6932 - acc: 0.4861                                      
                                                                             
 - ETA: 1s - loss: 0.6931 - acc: 0.4955                                      
                                                                             
 - ETA: 1s - loss: 0.6931 - acc: 0.5000                                      
                                                                             
 - ETA: 0s - loss: 0.6931 - acc: 0.5042                                      
                                                                             
 - ETA: 0s - loss: 0.6930 - acc: 0.5060                                      
                                                                             
 - 18s 35ms/step - loss: 0.6931 - acc: 0.5040 - val_loss: 0.6932 - val_acc: 0.4961

Epoch 2/10                                                                   
 16/504 [..............................]                  

                                                                             
 - ETA: 0s - loss: 0.6929 - acc: 0.5292                                      
                                                                             
 - ETA: 0s - loss: 0.6929 - acc: 0.5302                                      
                                                                             
 - 10s 21ms/step - loss: 0.6928 - acc: 0.5317 - val_loss: 0.6933 - val_acc: 0.4961

Epoch 3/10                                                                   
 16/504 [..............................]                                     
 - ETA: 8s - loss: 0.6903 - acc: 0.6250                                      
                                                                             
 32/504 [>.............................]                                     
 - ETA: 8s - loss: 0.6917 - acc: 0.5625                                      
                                                          

 - 10s 20ms/step - loss: 0.6924 - acc: 0.5317 - val_loss: 0.6934 - val_acc: 0.4961

Epoch 00003: early stopping                                                  
Best validation acc of epoch:                                                
0.4960629949419517                                                           
Train on 504 samples, validate on 127 samples                                
Epoch 1/15                                                                   
 16/504 [..............................]                                     
 - ETA: 2:48 - loss: 0.6934 - acc: 0.3750                                    
                                                                             
 32/504 [>.............................]                                     
 - ETA: 1:26 - loss: 0.6933 - acc: 0.4375                                    
                                                                             
 48/504 [=>............................]                  

 32/504 [>.............................]                                     
 - ETA: 9s - loss: 0.6968 - acc: 0.3750                                      
                                                                             
 48/504 [=>............................]                                     
 - ETA: 9s - loss: 0.6959 - acc: 0.4375                                      
                                                                             
 64/504 [==>...........................]                                     
 - ETA: 9s - loss: 0.6948 - acc: 0.4844                                      
                                                                             
 80/504 [===>..........................]                                     
 - ETA: 9s - loss: 0.6948 - acc: 0.4625                                      
                                                                             
 96/504 [====>.........................]                        

                                                                             
 80/504 [===>..........................]                                     
 - ETA: 9s - loss: 0.6856 - acc: 0.6000                                      
                                                                             
 96/504 [====>.........................]                                     
 - ETA: 8s - loss: 0.6891 - acc: 0.5625                                      
                                                                             
112/504 [=====>........................]                                     
 - ETA: 8s - loss: 0.6886 - acc: 0.5625                                      
                                                                             
 - ETA: 8s - loss: 0.6910 - acc: 0.5391                                      
                                                                             
 - ETA: 7s - loss: 0.6917 - acc: 0.5347                         

 - ETA: 24s - loss: 0.6940 - acc: 0.5573                                     
                                                                            
 - ETA: 20s - loss: 0.6937 - acc: 0.5402                                     
                                                                            
 - ETA: 17s - loss: 0.6937 - acc: 0.5469                                     
                                                                            
 - ETA: 14s - loss: 0.6939 - acc: 0.5417                                     
                                                                            
 - ETA: 12s - loss: 0.6942 - acc: 0.5344                                     
                                                                            
 - ETA: 9s - loss: 0.6941 - acc: 0.5341                                      
                                                                             
 - ETA: 7s - loss: 0.6938 - acc: 0.5365                              

 - ETA: 9s - loss: 0.6924 - acc: 0.5278                                      
                                                                             
 - ETA: 8s - loss: 0.6925 - acc: 0.5281                                      
                                                                             
 - ETA: 6s - loss: 0.6930 - acc: 0.5284                                      
                                                                             
 - ETA: 5s - loss: 0.6930 - acc: 0.5260                                      
                                                                             
 - ETA: 3s - loss: 0.6932 - acc: 0.5240                                      
                                                                             
 - ETA: 2s - loss: 0.6936 - acc: 0.5268                                      
                                                                             
 - ETA: 1s - loss: 0.6937 - acc: 0.5188                         

In [34]:
# Using results from Hyperas to create the model
def CNN_model(x_train,y_train):
    CNN_model = Sequential()

    CNN_model.add(Conv1D(64,20,activation="elu",input_shape = (x_train.shape[1:])))
    CNN_model.add(Conv1D(64,20,activation="relu"))
    CNN_model.add(MaxPooling1D(4))

    CNN_model.add(Conv1D(64,10,activation="elu"))
    CNN_model.add(Conv1D(96,5,activation="elu"))
    CNN_model.add(GlobalAveragePooling1D())

    CNN_model.add(Flatten())
    CNN_model.add(Dense(128,activation="elu"))
    CNN_model.add(Dropout(0.9912013870496312))
    CNN_model.add(Dense(10, activation='softmax'))

    sgd = SGD(lr=0.00026079803111884515,decay=0.0, momentum = 0.0, nesterov=False, clipnorm=2.0)

    CNN_model.compile(loss="sparse_categorical_crossentropy",optimizer=sgd, metrics=['accuracy'])

    early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, mode='min', patience=2, verbose=1)

    results = CNN_model.fit(x_train,y_train, batch_size=16, epochs=10, validation_split = 0.2, callbacks=[early_stop])
    
    return CNN_model, results

In [35]:
x_train, x_test, y_train, y_test = train_test_split(tf_vectors,table.iloc[:,1].values,test_size = 0.2)

x_train = x_train.toarray().reshape(631,1127,1)
y_train = y_train.reshape(631,1)
x_test = x_test.toarray().reshape(158,1127,1)
y_test = y_test.reshape(158,1)

CNN_model, results = CNN_model(x_train,y_train)

Train on 504 samples, validate on 127 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [36]:
# checking test accuracy
_, test_acc = CNN_model.evaluate(x_test, y_test, verbose=0)
test_acc

0.5316455749016774