In [2]:
import pandas as pd  
import numpy as np 
import io
from google.colab import files
from sklearn import model_selection, preprocessing, metrics
import re
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences


Using TensorFlow backend.


In [0]:
data = pd.read_csv("/content/drive/My Drive/Colab Notebooks/USPTO_grant.csv",usecols =["abstract","title","ipc"])

USPTO patent dataset.

In [4]:
data.tail()

Unnamed: 0,ipc,title,abstract
36514,h05k007/20,fan tray perforation pattern,an apparatus is provided in one example embodi...
36515,h05k009/00,electronic device and method for producing same,the present invention provides an electronic d...
36516,h05k013/02,tape feeder,"a tape feeder includes a reel holding section,..."
36517,h05k013/04,cut-and-clinch apparatus and board work machine,a cut-and-clinch apparatus that cuts and bends...
36518,b23p019/00,"board work system, and method for managing mou...",a board work system which optimizes amounting ...


The International Patent Classification (**IPC**) is a hierarchical patent classification system used in over 100 countries to classify the content of patents in a uniform manner.

In [6]:
data.count()

ipc         33672
title       36424
abstract    33672
dtype: int64

In [7]:
print("{} documents with null title".format(data['title'].isnull().sum()))
print("{} documents with null abstract".format(data['abstract'].isnull().sum()))
data_clean = data.dropna(how='any')

95 documents with null title
2847 documents with null abstract


In [8]:
data_clean.count()

ipc         33577
title       33577
abstract    33577
dtype: int64

In [9]:
print(data_clean.shape)

(33577, 3)


In [10]:
data_clean.head()

Unnamed: 0,ipc,title,abstract
600,a01h005/08,apple tree named ‘zouk 16’,a new and distinctive variety of a malus dome...
601,a01h005/08,apple tree named ‘duke fuji’,‘duke fuji’ apple tree is a naturally occurrin...
602,a01h005/08,sweet cherry tree named ‘ifg cher-five’,this invention is a new and distinct sweet che...
603,a01h005/08,mandarin tree named ‘rubygs’,‘rubygs’ is a new and distinct mandarin tree n...
604,a01h005/08,grapevine plant named ‘ifg thirty-seven’,this invention is a new and distinct grapevine...


In [0]:
data_full = pd.DataFrame()
data_ipc_list = pd.DataFrame()

data_full['full_text'] = data_clean['title'] + " " + data_clean['abstract']
data_full['ipc'] = data_clean['ipc']

data_full['ipc_list'] = data_clean['ipc'].apply(lambda x: x.split(','))


In [12]:
data_full.head(10)

Unnamed: 0,full_text,ipc,ipc_list
600,apple tree named ‘zouk 16’ a new and distincti...,a01h005/08,[a01h005/08]
601,apple tree named ‘duke fuji’ ‘duke fuji’ apple...,a01h005/08,[a01h005/08]
602,sweet cherry tree named ‘ifg cher-five’ this i...,a01h005/08,[a01h005/08]
603,mandarin tree named ‘rubygs’ ‘rubygs’ is a new...,a01h005/08,[a01h005/08]
604,grapevine plant named ‘ifg thirty-seven’ this ...,a01h005/08,[a01h005/08]
605,grapevine plant named ‘sugrafiftytwo’ a new an...,a01h005/08,[a01h005/08]
607,gregg ash tree named ‘libby davidson’ a new an...,a01h005/02,[a01h005/02]
620,poinsettia plant named ‘q103’ a new and distin...,a01h005/02,[a01h005/02]
630,new guinea a new and distinct cultivar of imp...,a01h005/02,[a01h005/02]
631,new guinea a new and distinct cultivar of imp...,a01h005/02,[a01h005/02]


In [13]:
data_full.count()

full_text    33577
ipc          33577
ipc_list     33577
dtype: int64

In [14]:
data_full['ipc_list'].head(10)

600    [a01h005/08]
601    [a01h005/08]
602    [a01h005/08]
603    [a01h005/08]
604    [a01h005/08]
605    [a01h005/08]
607    [a01h005/02]
620    [a01h005/02]
630    [a01h005/02]
631    [a01h005/02]
Name: ipc_list, dtype: object

In [15]:
print(data_full.shape)

(33577, 3)


In [0]:
mlb = preprocessing.MultiLabelBinarizer()
ipc_l = mlb.fit_transform(data_full['ipc_list'])

y_classes = mlb.classes_

In [17]:
print(y_classes)

['a01b019/00' 'a01b023/06' 'a01b037/00' ... 'h05k013/02' 'h05k013/04'
 'h05k013/08']


In [18]:
ipc_df = pd.DataFrame(ipc_l, columns=mlb.classes_)
ipc_df.head()

Unnamed: 0,a01b019/00,a01b023/06,a01b037/00,a01b059/06,a01b061/04,a01b063/111,a01b063/24,a01b063/32,a01b069/00,a01b071/06,a01b079/00,a01c001/02,a01c001/06,a01c005/06,a01c007/02,a01c007/04,a01c007/06,a01c007/08,a01c007/10,a01c011/02,a01c015/04,a01c023/00,a01c023/04,a01d027/00,a01d034/00,a01d034/135,a01d034/18,a01d034/416,a01d034/47,a01d034/73,a01d034/78,a01d034/81,a01d041/12,a01d041/127,a01d041/14,a01d045/10,a01d045/30,a01d046/26,a01d047/00,a01d057/02,...,h05h007/02,h05h007/04,h05h009/02,h05k001/00,h05k001/02,h05k001/03,h05k001/09,h05k001/11,h05k001/14,h05k001/16,h05k001/18,h05k003/00,h05k003/02,h05k003/10,h05k003/24,h05k003/28,h05k003/30,h05k003/32,h05k003/34,h05k003/36,h05k003/38,h05k003/40,h05k003/42,h05k003/46,h05k005/00,h05k005/02,h05k005/03,h05k005/06,h05k007/00,h05k007/02,h05k007/04,h05k007/10,h05k007/14,h05k007/18,h05k007/20,h05k009/00,h05k013/00,h05k013/02,h05k013/04,h05k013/08
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [19]:
ipc_df.shape

(33577, 11074)

In [20]:
print("Overall {} patent classes".format(len(mlb.classes_)))

Overall 11074 patent classes


In [21]:
data_full['full_text'].head(10)

600    apple tree named ‘zouk 16’ a new and distincti...
601    apple tree named ‘duke fuji’ ‘duke fuji’ apple...
602    sweet cherry tree named ‘ifg cher-five’ this i...
603    mandarin tree named ‘rubygs’ ‘rubygs’ is a new...
604    grapevine plant named ‘ifg thirty-seven’ this ...
605    grapevine plant named ‘sugrafiftytwo’ a new an...
607    gregg ash tree named ‘libby davidson’ a new an...
620    poinsettia plant named ‘q103’ a new and distin...
630    new guinea a new and distinct cultivar of  imp...
631    new guinea a new and distinct cultivar of  imp...
Name: full_text, dtype: object

In [22]:
# removing punctuation 
data_full['full_text_proc'] = data_full['full_text'].str.replace('[^\w\s]','')
data_full['full_text_proc'].head(10)

600    apple tree named zouk 16 a new and distinctive...
601    apple tree named duke fuji duke fuji apple tre...
602    sweet cherry tree named ifg cherfive this inve...
603    mandarin tree named rubygs rubygs is a new and...
604    grapevine plant named ifg thirtyseven this inv...
605    grapevine plant named sugrafiftytwo a new and ...
607    gregg ash tree named libby davidson a new and ...
620    poinsettia plant named q103 a new and distinct...
630    new guinea a new and distinct cultivar of  imp...
631    new guinea a new and distinct cultivar of  imp...
Name: full_text_proc, dtype: object

In [23]:
# lower - casing 
data_full['full_text_proc'] = data_full['full_text_proc'].apply(lambda x: " ".join(x.lower() for x in x.split()))
data_full['full_text_proc'].head(10)

600    apple tree named zouk 16 a new and distinctive...
601    apple tree named duke fuji duke fuji apple tre...
602    sweet cherry tree named ifg cherfive this inve...
603    mandarin tree named rubygs rubygs is a new and...
604    grapevine plant named ifg thirtyseven this inv...
605    grapevine plant named sugrafiftytwo a new and ...
607    gregg ash tree named libby davidson a new and ...
620    poinsettia plant named q103 a new and distinct...
630    new guinea a new and distinct cultivar of impa...
631    new guinea a new and distinct cultivar of impa...
Name: full_text_proc, dtype: object

In [24]:
# The most common words
freq_words = pd.Series(' '.join(data_full['full_text_proc']).split()).value_counts()[:10]  
freq_words

the      325344
a        253613
of       144560
and      131401
to       104683
in        55880
is        54825
an        53839
for       52904
first     39488
dtype: int64

In [25]:
#The most rare words
rare_words = pd.Series(' '.join(data_full['full_text_proc']).split()).value_counts()[-10:]
rare_words

prediluted         1
localities         1
finalize           1
gateside           1
teratomas          1
powerelectronic    1
moderatelylow      1
yx                 1
desoldering        1
boast              1
dtype: int64

In [26]:
# Remove Digits
data_full['full_text_proc'] = data_full['full_text_proc'].apply(lambda x : re.sub("\d+", "", x))
data_full['full_text_proc'].head(10)

600    apple tree named zouk  a new and distinctive v...
601    apple tree named duke fuji duke fuji apple tre...
602    sweet cherry tree named ifg cherfive this inve...
603    mandarin tree named rubygs rubygs is a new and...
604    grapevine plant named ifg thirtyseven this inv...
605    grapevine plant named sugrafiftytwo a new and ...
607    gregg ash tree named libby davidson a new and ...
620    poinsettia plant named q a new and distinct cu...
630    new guinea a new and distinct cultivar of impa...
631    new guinea a new and distinct cultivar of impa...
Name: full_text_proc, dtype: object

In [27]:
# Removing stop words (the,a,with,of,and...)

nltk.download('stopwords')
stop = nltk.corpus.stopwords.words('english')
pattern_stop = r'\b(?:{})\b'.format('|'.join(stop))
data_full['full_text_proc_no_stop'] = data_full['full_text_proc'].str.replace(pattern_stop, '')
data_full['full_text_proc_no_stop'] = data_full['full_text_proc_no_stop'].str.replace(r'\s+', ' ')
data_full['full_text_proc'] = data_full['full_text_proc_no_stop']
data_full['full_text_proc'].head(10)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


600    apple tree named zouk new distinctive variety ...
601    apple tree named duke fuji duke fuji apple tre...
602    sweet cherry tree named ifg cherfive invention...
603    mandarin tree named rubygs rubygs new distinct...
604    grapevine plant named ifg thirtyseven inventio...
605    grapevine plant named sugrafiftytwo new distin...
607    gregg ash tree named libby davidson new distin...
620    poinsettia plant named q new distinct cultivar...
630    new guinea new distinct cultivar impatiens pla...
631    new guinea new distinct cultivar impatiens pla...
Name: full_text_proc, dtype: object

In [28]:
#check again the most common words
freq_words = pd.Series(' '.join(data_full['full_text_proc']).split()).value_counts()[:10] # chose the number here 
freq_words

first       39488
device      28886
second      28579
one         24689
method      23921
includes    22748
system      21464
data        18387
least       16077
may         14616
dtype: int64

In [29]:
# Lemmatization
nltk.download('wordnet')

from textblob import Word
data_full['full_text_proc'] = data_full['full_text_proc'].apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))
data_full['full_text_proc'].head(10)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


600    apple tree named zouk new distinctive variety ...
601    apple tree named duke fuji duke fuji apple tre...
602    sweet cherry tree named ifg cherfive invention...
603    mandarin tree named rubygs rubygs new distinct...
604    grapevine plant named ifg thirtyseven inventio...
605    grapevine plant named sugrafiftytwo new distin...
607    gregg ash tree named libby davidson new distin...
620    poinsettia plant named q new distinct cultivar...
630    new guinea new distinct cultivar impatiens pla...
631    new guinea new distinct cultivar impatiens pla...
Name: full_text_proc, dtype: object

In [0]:
max_length = 600
col_names =  ['i','top_words']
data_tfidf  = pd.DataFrame(columns = col_names)

In [31]:
data_full['full_text_proc'].head()

600    apple tree named zouk new distinctive variety ...
601    apple tree named duke fuji duke fuji apple tre...
602    sweet cherry tree named ifg cherfive invention...
603    mandarin tree named rubygs rubygs new distinct...
604    grapevine plant named ifg thirtyseven inventio...
Name: full_text_proc, dtype: object

In [0]:
tfidf = TfidfVectorizer(lowercase=True, analyzer='word', smooth_idf=False, sublinear_tf=False, norm=None,
                        stop_words='english', ngram_range=(1,1)) 

text_transformed = tfidf.fit_transform(data_full['full_text_proc'])

In [33]:
import datetime
print(datetime.datetime.now())
feature_names = np.array(tfidf.get_feature_names())

for i in range(text_transformed.shape[0]):
    f_index = text_transformed[i,:].nonzero()[1]
    tfid_score = zip(f_index,[text_transformed[i,x] for x in f_index])
    
    word = []
    score = []

    for w,s in [(feature_names[i],s) for i,s in tfid_score]:
        word.append(w)
        score.append(s)
    score = np.array(score)
    word = np.array(word)
    txt = " ".join(word[(-score).argsort()[:max_length]])
    data_tfidf.loc[len(data_tfidf)] = [i,txt]
print(datetime.datetime.now())

2020-02-23 21:14:55.727238
2020-02-23 21:17:55.182046


In [0]:
data_full['full_text_proc_final'] = data_tfidf.set_index(data_full.index).top_words

In [35]:
data_full.head()

Unnamed: 0,full_text,ipc,ipc_list,full_text_proc,full_text_proc_no_stop,full_text_proc_final
600,apple tree named ‘zouk 16’ a new and distincti...,a01h005/08,[a01h005/08],apple tree named zouk new distinctive variety ...,apple tree named zouk new distinctive variety ...,apple zouk tree bright named wellfeathered lea...
601,apple tree named ‘duke fuji’ ‘duke fuji’ apple...,a01h005/08,[a01h005/08],apple tree named duke fuji duke fuji apple tre...,apple tree named duke fuji duke fuji apple tre...,fuji duke apple tree brak notable distinctive ...
602,sweet cherry tree named ‘ifg cher-five’ this i...,a01h005/08,[a01h005/08],sweet cherry tree named ifg cherfive invention...,sweet cherry tree named ifg cherfive invention...,cherry cherfive fruit ifg sweet tree induced r...
603,mandarin tree named ‘rubygs’ ‘rubygs’ is a new...,a01h005/08,[a01h005/08],mandarin tree named rubygs rubygs new distinct...,mandarin tree named rubygs rubygs new distinct...,mandarin rubygs fruit tree seed daisy notable ...
604,grapevine plant named ‘ifg thirty-seven’ this ...,a01h005/08,[a01h005/08],grapevine plant named ifg thirtyseven inventio...,grapevine plant named ifg thirtyseven inventio...,berry grapevine thirtyseven ifg red large vine...


**ELMO embeddings**

In [36]:
data_full.head()

Unnamed: 0,full_text,ipc,ipc_list,full_text_proc,full_text_proc_no_stop,full_text_proc_final
600,apple tree named ‘zouk 16’ a new and distincti...,a01h005/08,[a01h005/08],apple tree named zouk new distinctive variety ...,apple tree named zouk new distinctive variety ...,apple zouk tree bright named wellfeathered lea...
601,apple tree named ‘duke fuji’ ‘duke fuji’ apple...,a01h005/08,[a01h005/08],apple tree named duke fuji duke fuji apple tre...,apple tree named duke fuji duke fuji apple tre...,fuji duke apple tree brak notable distinctive ...
602,sweet cherry tree named ‘ifg cher-five’ this i...,a01h005/08,[a01h005/08],sweet cherry tree named ifg cherfive invention...,sweet cherry tree named ifg cherfive invention...,cherry cherfive fruit ifg sweet tree induced r...
603,mandarin tree named ‘rubygs’ ‘rubygs’ is a new...,a01h005/08,[a01h005/08],mandarin tree named rubygs rubygs new distinct...,mandarin tree named rubygs rubygs new distinct...,mandarin rubygs fruit tree seed daisy notable ...
604,grapevine plant named ‘ifg thirty-seven’ this ...,a01h005/08,[a01h005/08],grapevine plant named ifg thirtyseven inventio...,grapevine plant named ifg thirtyseven inventio...,berry grapevine thirtyseven ifg red large vine...


In [0]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
from sklearn import preprocessing
import keras
import numpy as np

In [0]:
url = "https://tfhub.dev/google/elmo/3"
embed = hub.Module(url)

In [39]:
X_train_elmo, X_test_elmo, Y_train_elmo, Y_test_elmo = model_selection.train_test_split(data_full['full_text_proc_final'], ipc_df, test_size=0.1)

print(X_train_elmo.shape)
print(X_test_elmo.shape)
print(Y_train_elmo.shape)
print(Y_test_elmo.shape)

(30219,)
(3358,)
(30219, 11074)
(3358, 11074)


In [40]:
from keras.layers import LSTM, GRU, Dense, Activation, Dropout, Input, Embedding, Bidirectional, Reshape, Flatten, Conv1D, MaxPooling1D, MaxPool1D, GlobalMaxPool1D, SpatialDropout1D, Lambda
from keras.models import Model
import keras.backend as K
from keras import optimizers, models


def ELMoEmbedding(x):
    return embed(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"]

input_text = Input(shape=(1,), dtype=tf.string)
embedding = Lambda(ELMoEmbedding, trainable=False, output_shape=(1024,))(input_text)
dense = Dense(100, activation='relu')(embedding)
dense = Dropout(0.25)(dense)
dense2 = Dense(50, activation='relu')(dense)
dense2 = Dropout(0.05)(dense2)
pred = Dense(len(mlb.classes_), activation='sigmoid')(dense2)
model = Model(inputs=[input_text], outputs=pred)
model.compile(loss='binary_crossentropy', optimizer=optimizers.Adam(lr=1e-6), metrics=['accuracy'])













INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore














Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.














Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [41]:
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())  
    session.run(tf.tables_initializer())
    history = model.fit(X_train_elmo, Y_train_elmo, epochs=5, batch_size=100, verbose=1,
                    validation_split=0.1)














Train on 27197 samples, validate on 3022 samples
Epoch 1/5
























Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


**GLOVE EMBEDDINGS**

In [0]:
from numpy import array
from numpy import asarray
from numpy import zeros

sequence_length = 600
embedding_dim = 100

embeddings_dictionary = dict()

glove_file = open('/content/drive/My Drive/Colab Notebooks/glove.6B.100d.txt', encoding="utf8")

for line in glove_file:
    records = line.split()
    word = records[0]
    vector_dimensions = asarray(records[1:], dtype='float32')
    embeddings_dictionary[word] = vector_dimensions
glove_file.close()

In [0]:
token = Tokenizer(filters='.') 
token.fit_on_texts(data_full['full_text_proc_final']) 
word_index = token.word_index

In [45]:
X_train_glove, X_test_glove, Y_train_glove, Y_test_glove = model_selection.train_test_split(data_full['full_text_proc_final'], ipc_df, test_size=0.1)

print(X_train_glove.shape)
print(X_test_glove.shape)
print(Y_train_glove.shape)
print(Y_test_glove.shape)

(30219,)
(3358,)
(30219, 11074)
(3358, 11074)


In [0]:
X_train_seq = pad_sequences(token.texts_to_sequences(X_train_glove), maxlen=sequence_length)
X_test_seq = pad_sequences(token.texts_to_sequences(X_test_glove), maxlen=sequence_length)

In [47]:
print(X_train_seq.shape)
print(X_test_seq.shape)

(30219, 600)
(3358, 600)


In [0]:
embedding_matrix = np.zeros((len(word_index) + 1, 100))
for word, i in word_index.items():
    embedding_vector = embeddings_dictionary.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [49]:
embedding_matrix.shape

(43974, 100)

In [50]:
ipc_df.shape[1] 

11074

In [0]:
from keras.layers import LSTM, GRU, Dense, Activation, Dropout, Input, Embedding, Bidirectional, Reshape, Flatten, Conv1D, MaxPooling1D, MaxPool1D, GlobalMaxPool1D, SpatialDropout1D
from keras import optimizers, models

vocabulary_size = len(word_index) + 1
sequence_length = sequence_length
embedding_dim = embedding_dim
num_filters = 100
kernel_size = 3 

classes = ipc_df.shape[1]

learning_rate = 1e-6

In [0]:
def CNN_model():
    input_layer = Input(shape=(sequence_length,), dtype='int32')

    embedding_layer = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, input_length=sequence_length, 
                                weights=[embedding_matrix], trainable=False)(input_layer)
    embedding_layer = SpatialDropout1D(0.2)(embedding_layer)

    conv_layer = Conv1D(num_filters, kernel_size=kernel_size, activation="relu")(embedding_layer)

    pooling_layer = GlobalMaxPool1D()(conv_layer)

    output_layer1 = Dense(50, activation="relu")(pooling_layer)
    output_layer1 = Dropout(0.25)(output_layer1)
    output_layer2 = Dense(units=classes, activation="sigmoid")(output_layer1)

    model = models.Model(inputs=input_layer, outputs=output_layer2,)
    model.compile(optimizer=optimizers.Adam(lr=learning_rate), 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model 

In [54]:

batch_size = 100
epochs = 5

with tf.Session() as session2:
  model = CNN_model()
  history = model.fit(X_train_seq, Y_train_glove,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose=1,
                      validation_split=0.1)

Train on 27197 samples, validate on 3022 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
