# Prepare Dataset

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.utils.multiclass import unique_labels
from keras.utils import to_categorical

filepath = 'Sample_data\sample.tsv'
df = pd.read_csv(filepath, sep='\t')
sentences = df['Sentence']
target = df['RE_Type']
noRE_labels = [l for l in unique_labels(target.tolist()) if l!="NoRE"]

Using TensorFlow backend.


In [2]:
def splitIndex(index):
    return index.split('|') 
#splitIndex(df.loc[0]['Gene1_Index(start|end)'])
splitIndex('225|228')

['225', '228']

In [3]:
df

Unnamed: 0,PMID,Sentence_ID,Sentence,Gene1|Gene1_ID,Gene1_Index(start|end),Gene2|Gene2_ID,Gene2_Index(start|end),RE_Type
0,24003223,S1,A brain-specific Grb2-associated regulator of ...,Erk|5594,225|228,GAREM2|150946,147|153,Neutral_Regulation
1,24003223,S2,Grb2-associated regulator of Erk/MAPK1 (GAREM)...,Grb2-associated regulator of Erk/MAPK1 (GAREM)...,0|46,EGF|1950,77|80,NoRE
2,24003223,S5,"Therefore, previously identified GAREM is name...",GAREM|64762,34|39,GAREM1|64762,49|55,NoRE
3,24003223,S6,"Here we characterized a new subtype of GAREM, ...",GAREM|381126,40|45,GAREM2|242915,47|53,NoRE
4,24003223,S7,"Three GAREM2 tyrosines (Tyr-102, Tyr-429, and ...",Grb2|2885,128|132,GAREM2|150946,6|12,Binding
5,24003223,S7,"Three GAREM2 tyrosines (Tyr-102, Tyr-429, and ...",EGF|1950,79|82,GAREM2|150946,6|12,Modification
6,24003223,S8,"Furthermore, GAREM2 and Shp2 regulate Erk acti...",Erk|5594,38|41,GAREM2|150946,13|19,Neutral_Regulation
7,24003223,S8,"Furthermore, GAREM2 and Shp2 regulate Erk acti...",Erk|5594,38|41,Shp2|5781,24|28,Neutral_Regulation
8,24003223,S10,GAREM2 is expressed in some neuroblastoma cell...,Grb2|2885,102|106,GAREM2|150946,0|6,Binding
9,24003223,S11,"Eventually, GAREM2 regulates Erk activation in...",GAREM2|150946,12|18,Erk|5594,29|32,Positive_Regulation


In [4]:
print(df.loc[0].Sentence)
def Replace2Gene(start, end, text, symbol):
    return text[0:int(start)] + symbol + text[int(end):]
Replace2Gene(225, 228, df.Sentence[0], "__GENE1__")

A brain-specific Grb2-associated regulator of extracellular signal-regulated kinase (Erk)/mitogen-activated protein kinase (MAPK) (GAREM) subtype, GAREM2, contributes to neurite outgrowth of neuroblastoma cells by regulating Erk signaling.


'A brain-specific Grb2-associated regulator of extracellular signal-regulated kinase (Erk)/mitogen-activated protein kinase (MAPK) (GAREM) subtype, GAREM2, contributes to neurite outgrowth of neuroblastoma cells by regulating __GENE1__ signaling.'

In [5]:
df['Gene2_Index(start|end)'][0]

'147|153'

In [6]:
s=Replace2Gene(225, 228, df.Sentence[0], "__GENE1__")
s=Replace2Gene(147, 153, s, "__GENE2__")
s

'A brain-specific Grb2-associated regulator of extracellular signal-regulated kinase (Erk)/mitogen-activated protein kinase (MAPK) (GAREM) subtype, __GENE2__, contributes to neurite outgrowth of neuroblastoma cells by regulating __GENE1__ signaling.'

In [7]:
Replace2Gene(147, 153, Replace2Gene(225, 228, df.Sentence[0], "__GENE1__"), "__GENE2__")

'A brain-specific Grb2-associated regulator of extracellular signal-regulated kinase (Erk)/mitogen-activated protein kinase (MAPK) (GAREM) subtype, __GENE2__, contributes to neurite outgrowth of neuroblastoma cells by regulating __GENE1__ signaling.'

In [8]:
def Replace(row, gene1, gene2):
    s1 = splitIndex(row['Gene1_Index(start|end)'])[0]
    e1 = splitIndex(row['Gene1_Index(start|end)'])[1]
    s2 = splitIndex(row['Gene2_Index(start|end)'])[0]
    e2 = splitIndex(row['Gene2_Index(start|end)'])[1]
    if s1>s2:
        return Replace2Gene(s2, e2, Replace2Gene(s1, e1, row.Sentence, gene1), gene2)
    else:
        return Replace2Gene(s1, e1, Replace2Gene(s2, e2, row.Sentence, gene2), gene1)
df['Preprocessed']=df.apply(Replace, args=("__GENE1__", "__GENE2__"), axis=1)

In [9]:
df

Unnamed: 0,PMID,Sentence_ID,Sentence,Gene1|Gene1_ID,Gene1_Index(start|end),Gene2|Gene2_ID,Gene2_Index(start|end),RE_Type,Preprocessed
0,24003223,S1,A brain-specific Grb2-associated regulator of ...,Erk|5594,225|228,GAREM2|150946,147|153,Neutral_Regulation,A brain-specific Grb2-associated regulator of ...
1,24003223,S2,Grb2-associated regulator of Erk/MAPK1 (GAREM)...,Grb2-associated regulator of Erk/MAPK1 (GAREM)...,0|46,EGF|1950,77|80,NoRE,__GENE1__ is an adaptor molecule in the __GENE...
2,24003223,S5,"Therefore, previously identified GAREM is name...",GAREM|64762,34|39,GAREM1|64762,49|55,NoRE,"Therefore, previously identified G__GENE1__is ..."
3,24003223,S6,"Here we characterized a new subtype of GAREM, ...",GAREM|381126,40|45,GAREM2|242915,47|53,NoRE,Here we characterized a new subtype of G__GENE...
4,24003223,S7,"Three GAREM2 tyrosines (Tyr-102, Tyr-429, and ...",Grb2|2885,128|132,GAREM2|150946,6|12,Binding,"Three __GENE2__ tyrosines (Tyr-102, Tyr-429, a..."
5,24003223,S7,"Three GAREM2 tyrosines (Tyr-102, Tyr-429, and ...",EGF|1950,79|82,GAREM2|150946,6|12,Modification,"Three __GENE2__ tyrosines (Tyr-102, Tyr-429, a..."
6,24003223,S8,"Furthermore, GAREM2 and Shp2 regulate Erk acti...",Erk|5594,38|41,GAREM2|150946,13|19,Neutral_Regulation,"Furthermore, __GENE2__ and Shp2 regulate __GEN..."
7,24003223,S8,"Furthermore, GAREM2 and Shp2 regulate Erk acti...",Erk|5594,38|41,Shp2|5781,24|28,Neutral_Regulation,"Furthermore, GAREM2 and __GENE2__ regulate __G..."
8,24003223,S10,GAREM2 is expressed in some neuroblastoma cell...,Grb2|2885,102|106,GAREM2|150946,0|6,Binding,__GENE2__ is expressed in some neuroblastoma c...
9,24003223,S11,"Eventually, GAREM2 regulates Erk activation in...",GAREM2|150946,12|18,Erk|5594,29|32,Positive_Regulation,"Eventually, __GENE1__ regulates __GENE2__ acti..."


In [10]:
print(df['Sentence'][1000])
print(df['Preprocessed'][1000])

Of these, IL-8, CCL2, TNC, Gal-1 and PTX3 were validated as upregulated and SERPINE1, STC2, CTGF and COL4A2 were validated as downregulated factors by immunochemical methods.
Of these, IL-8, CCL2, TNC, G__GENE1__and P__GENE2__were validated as upregulated and SERPINE1, STC2, CTGF and COL4A2 were validated as downregulated factors by immunochemical methods.


In [11]:
sentences = df['Preprocessed']
sentences

0       A brain-specific Grb2-associated regulator of ...
1       __GENE1__ is an adaptor molecule in the __GENE...
2       Therefore, previously identified G__GENE1__is ...
3       Here we characterized a new subtype of G__GENE...
4       Three __GENE2__ tyrosines (Tyr-102, Tyr-429, a...
5       Three __GENE2__ tyrosines (Tyr-102, Tyr-429, a...
6       Furthermore, __GENE2__ and Shp2 regulate __GEN...
7       Furthermore, GAREM2 and __GENE2__ regulate __G...
8       __GENE2__ is expressed in some neuroblastoma c...
9       Eventually, __GENE1__ regulates __GENE2__ acti...
10      __GENE1__ also regulates __GENE2__-induced neu...
11      Although the structure and function of both G_...
12      Although the structure and function of both G_...
13      Although the structure and function of both GA...
14      Nuclear localization of G__GENE1__might be con...
15      Nuclear localization of G__GENE1__might be con...
16      Nuclear localization of GAREM1 might be contro...
17      The N-

# Text Classification with Convolutional Neural Network (CNN)

In [12]:
from keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer(num_words=20000)
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
print('Number of Unique Tokens',len(word_index))

Number of Unique Tokens 4544


In [13]:
word_index

{'and': 1,
 'the': 2,
 'of': 3,
 'gene2': 4,
 'gene1': 5,
 'in': 6,
 'a': 7,
 'with': 8,
 'to': 9,
 'were': 10,
 'expression': 11,
 'by': 12,
 'that': 13,
 'we': 14,
 '1': 15,
 'i': 16,
 'c': 17,
 'genes': 18,
 'protein': 19,
 'as': 20,
 'for': 21,
 'mir': 22,
 's': 23,
 'complex': 24,
 'hsa': 25,
 't': 26,
 'is': 27,
 '2': 28,
 'il': 29,
 'cells': 30,
 'p': 31,
 '3': 32,
 'was': 33,
 'm': 34,
 'induced': 35,
 'gene': 36,
 'but': 37,
 'on': 38,
 'proteins': 39,
 'or': 40,
 '5': 41,
 'cell': 42,
 'factor': 43,
 'binding': 44,
 'not': 45,
 'involved': 46,
 'b': 47,
 'these': 48,
 'also': 49,
 'time': 50,
 'signaling': 51,
 'this': 52,
 'including': 53,
 'levels': 54,
 'data': 55,
 'which': 56,
 'pcr': 57,
 'stimulated': 58,
 'associated': 59,
 '4': 60,
 'response': 61,
 'are': 62,
 'real': 63,
 'g': 64,
 'pathway': 65,
 'regulated': 66,
 'confirmed': 67,
 'mediated': 68,
 'study': 69,
 'kinase': 70,
 'formation': 71,
 'identified': 72,
 'dependent': 73,
 'h': 74,
 'role': 75,
 'immune': 

In [14]:
sentences[1]

'__GENE1__ is an adaptor molecule in the __GENE2__-mediated signaling pathway.'

In [15]:
sequences = tokenizer.texts_to_sequences(sentences)
sequences

[[7,
  329,
  123,
  548,
  59,
  843,
  3,
  448,
  1302,
  66,
  70,
  1638,
  2307,
  191,
  19,
  70,
  717,
  3063,
  3064,
  4,
  1364,
  9,
  844,
  916,
  3,
  2554,
  30,
  12,
  1173,
  5,
  51],
 [5, 27, 80, 740, 1873, 6, 2, 4, 68, 51, 65],
 [886, 449, 72, 64, 5, 27, 3481, 64, 4],
 [93,
  14,
  741,
  7,
  690,
  3064,
  3,
  64,
  5,
  64,
  4,
  13,
  27,
  1174,
  691,
  6,
  2,
  549,
  604,
  1,
  139,
  329],
 [211,
  4,
  3065,
  953,
  3066,
  953,
  3067,
  1,
  953,
  3068,
  62,
  670,
  1228,
  1874,
  389,
  1,
  62,
  1229,
  21,
  44,
  5,
  3482],
 [211,
  4,
  3065,
  953,
  3066,
  953,
  3067,
  1,
  953,
  3068,
  62,
  670,
  1228,
  5,
  389,
  1,
  62,
  1229,
  21,
  44,
  9,
  548],
 [248, 4, 1, 917, 359, 5, 97, 6, 1874, 58, 30],
 [248, 3069, 1, 4, 359, 5, 97, 6, 1874, 58, 30],
 [4,
  27,
  691,
  6,
  1875,
  2554,
  42,
  485,
  1,
  27,
  49,
  718,
  670,
  1,
  692,
  9,
  5,
  95,
  299,
  8,
  1874],
 [3483, 5, 330, 4, 96, 6, 2, 458, 3, 1874, 

In [16]:
print(sentences[0])
print(" ".join([t for t in (tokenizer.index_word[x] for x in sequences[0])]))

A brain-specific Grb2-associated regulator of extracellular signal-regulated kinase (Erk)/mitogen-activated protein kinase (MAPK) (GAREM) subtype, __GENE2__, contributes to neurite outgrowth of neuroblastoma cells by regulating __GENE1__ signaling.
a brain specific grb2 associated regulator of extracellular signal regulated kinase erk mitogen activated protein kinase mapk garem subtype gene2 contributes to neurite outgrowth of neuroblastoma cells by regulating gene1 signaling


In [17]:
df['Preprocessed']=df.apply(Replace, args=("GENE1GENE", "GENE2GENE"), axis=1)
sentences = df['Preprocessed']
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
print('Number of Unique Tokens',len(word_index))
sequences = tokenizer.texts_to_sequences(sentences)
print(sentences[0])
print(" ".join([t for t in (tokenizer.index_word[x] for x in sequences[0])]))

Number of Unique Tokens 5774
A brain-specific Grb2-associated regulator of extracellular signal-regulated kinase (Erk)/mitogen-activated protein kinase (MAPK) (GAREM) subtype, GENE2GENE, contributes to neurite outgrowth of neuroblastoma cells by regulating GENE1GENE signaling.
a brain specific grb2 associated regulator of extracellular signal regulated kinase erk mitogen activated protein kinase mapk garem subtype gene2gene contributes to neurite outgrowth of neuroblastoma cells by regulating gene1gene signaling


In [18]:
from keras.preprocessing.sequence import pad_sequences
data = pad_sequences(sequences, maxlen=40)
data[0]
#len(data[0])

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    7,  341,
        125,  563,   59,  892,    3,  462, 1323,   65,   70, 1669, 2351,
        189,   18,   70,  745, 3134, 3135,   17, 1389,    9,  856,  929,
          3, 2609,   29,   12, 1189,   16,   51])

In [19]:
data[1]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   16,   27,   86,  746,
       1911,    4,    2,   17,   69,   51,   67])

In [20]:
print(" ".join([t for t in (tokenizer.index_word.get(x, '[X]') for x in data[0])]))
print(" ".join([t for t in (tokenizer.index_word.get(x, '[X]') for x in data[1])]))

[X] [X] [X] [X] [X] [X] [X] [X] [X] a brain specific grb2 associated regulator of extracellular signal regulated kinase erk mitogen activated protein kinase mapk garem subtype gene2gene contributes to neurite outgrowth of neuroblastoma cells by regulating gene1gene signaling
[X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] [X] gene1gene is an adaptor molecule in the gene2gene mediated signaling pathway


In [21]:
data = pad_sequences(sequences, maxlen=1000)
print('Shape of Data Tensor:', data.shape)
data.shape

Shape of Data Tensor: (3456, 1000)


(3456, 1000)

Label encoding

In [22]:
label_dict = dict()
label_set = []
for l in target:
    if(l in label_set):
        continue
    label_set.append(l)
    
for ll in label_set:
    label_dict[ll] = len(label_dict)

label_dict

{'Neutral_Regulation': 0,
 'NoRE': 1,
 'Binding': 2,
 'Modification': 3,
 'Positive_Regulation': 4,
 'Association': 5,
 'Interaction_Agent': 6,
 'Translocation': 7,
 'Negative_Agent': 8,
 'Complex': 9,
 'Negative_Regulation': 10,
 'Negation_Complex': 11,
 'Transformation': 12,
 'Negation_Binding': 13,
 'Negation_Translocation': 14,
 'Positive_Agent': 15,
 'Negation_Neutral_Regulation': 16,
 'Negation_Modification': 17,
 'Negation_Association': 18,
 'Negation_Negative_Regulation': 19,
 'Negation_Positive_Regulation': 20,
 'Negation_Interaction_Agent': 21}

In [23]:
Y_int = [label_dict[ll] for ll in target]
Y_int

[0,
 1,
 1,
 1,
 2,
 3,
 0,
 0,
 2,
 4,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 5,
 6,
 7,
 1,
 8,
 0,
 1,
 1,
 1,
 6,
 4,
 7,
 1,
 9,
 9,
 9,
 7,
 6,
 6,
 6,
 0,
 0,
 10,
 2,
 2,
 5,
 9,
 9,
 9,
 7,
 9,
 9,
 9,
 3,
 4,
 1,
 0,
 6,
 6,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 10,
 10,
 4,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 11,
 9,
 9,
 9,
 1,
 1,
 9,
 5,
 5,
 5,
 5,
 1,
 2,
 5,
 9,
 1,
 1,
 1,
 9,
 9,
 9,
 12,
 12,
 12,
 1,
 2,
 9,
 2,
 13,
 13,
 2,
 2,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 5,
 5,
 5,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 5,
 5,
 5,
 5,
 5,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 5,
 5,
 5,
 5,
 5,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 7,
 7,
 14,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 7,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 5,
 5,
 9,
 9,
 5,
 5,
 9,
 9,
 6,
 6,
 6,
 6,
 0,
 5,
 9,
 9,
 9,
 6,
 2,
 5,
 10,
 10,
 10,

In [24]:
label_dict

{'Neutral_Regulation': 0,
 'NoRE': 1,
 'Binding': 2,
 'Modification': 3,
 'Positive_Regulation': 4,
 'Association': 5,
 'Interaction_Agent': 6,
 'Translocation': 7,
 'Negative_Agent': 8,
 'Complex': 9,
 'Negative_Regulation': 10,
 'Negation_Complex': 11,
 'Transformation': 12,
 'Negation_Binding': 13,
 'Negation_Translocation': 14,
 'Positive_Agent': 15,
 'Negation_Neutral_Regulation': 16,
 'Negation_Modification': 17,
 'Negation_Association': 18,
 'Negation_Negative_Regulation': 19,
 'Negation_Positive_Regulation': 20,
 'Negation_Interaction_Agent': 21}

In [25]:
import numpy as np
labels = to_categorical(Y_int)
print(labels)
print('Shape of Label Tensor:', labels.shape)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]
Shape of Label Tensor: (3456, 22)


In [26]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.33, random_state=1000)
X_train

array([[   0,    0,    0, ...,   24,    1, 1515],
       [   0,    0,    0, ...,    4, 5502,  775],
       [   0,    0,    0, ...,    4, 1067,  133],
       ...,
       [   0,    0,    0, ..., 4611,  332, 1301],
       [   0,    0,    0, ...,  944,   63,   42],
       [   0,    0,    0, ...,    3,    2, 1080]])

In [27]:
print(data.shape) # Number of training instances, padded sequence
print(len(sentences)) # Number of training instances

from keras.layers import Input
sequence_input = Input(shape=(1000,), dtype='int32')

(3456, 1000)
3456


# Embedding Layer

In [28]:
from keras.layers import Embedding
from keras.initializers import RandomUniform
embedding_layer = Embedding(len(word_index) + 1,
                            100, 
                            embeddings_initializer=RandomUniform(),
                            input_length=1000, trainable=True)
embedded_sequences = embedding_layer(sequence_input)
embedding_layer.get_weights()[0].shape

Instructions for updating:
Colocations handled automatically by placer.


(5775, 100)

In [29]:
embedding_layer.output.shape

TensorShape([Dimension(None), Dimension(1000), Dimension(100)])

# Convolution Layer

In [30]:
from keras.layers import Conv1D

l_cov1= Conv1D(2, 4, activation='relu')(embedded_sequences)
l_cov1.shape

TensorShape([Dimension(None), Dimension(997), Dimension(2)])

# Max-pooling Layer

In [31]:
from keras.layers import MaxPooling1D
l_pool1 = MaxPooling1D(997)(l_cov1)
l_pool1.shape

TensorShape([Dimension(None), Dimension(1), Dimension(2)])

In [32]:
l_cov2= Conv1D(2, 3, activation='relu')(embedded_sequences)
l_pool2 = MaxPooling1D(998)(l_cov2)
print(l_pool2.shape)

l_cov3= Conv1D(2, 2, activation='relu')(embedded_sequences)
l_pool3 = MaxPooling1D(999)(l_cov3)
print(l_pool3.shape)

(?, 1, 2)
(?, 1, 2)


In [33]:
from keras.layers.merge import concatenate
l_concatenated = concatenate([l_pool1, l_pool2, l_pool3])
l_concatenated

<tf.Tensor 'concatenate_1/concat:0' shape=(?, 1, 6) dtype=float32>

In [34]:
from keras.layers import Flatten, Dense
l_flat = Flatten()(l_concatenated)
output = Dense(labels.shape[1], activation='softmax')(l_flat)
output

<tf.Tensor 'dense_1/Softmax:0' shape=(?, 22) dtype=float32>

In [35]:
from keras.models import Model
model = Model(sequence_input, output)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1000)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1000, 100)    577500      input_1[0][0]                    
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 997, 2)       802         embedding_1[0][0]                
__________________________________________________________________________________________________
conv1d_2 (Conv1D)               (None, 998, 2)       602         embedding_1[0][0]                
__________________________________________________________________________________________________
conv1d_3 (

In [36]:
#from keras.callbacks import ModelCheckpoint
#cp=ModelCheckpoint('cnn.model',monitor='val_acc',verbose=1,save_best_only=True)
history=model.fit(X_train, y_train, epochs=100, batch_size=320)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

In [40]:
y_pred = model.predict(X_test)
y_pred

array([[2.76340893e-06, 5.06842106e-08, 3.69440354e-02, ...,
        3.13660653e-10, 4.86026140e-04, 5.59384041e-08],
       [2.02462441e-04, 1.50618260e-04, 9.49539337e-03, ...,
        3.21468769e-06, 2.60973210e-03, 7.39944198e-06],
       [1.56552234e-12, 9.99996781e-01, 2.90140434e-07, ...,
        2.27003390e-14, 1.72655334e-09, 1.94956759e-10],
       ...,
       [1.15359285e-10, 9.99987841e-01, 1.23914276e-07, ...,
        1.74983088e-13, 1.38403031e-08, 3.77941116e-11],
       [1.35553800e-13, 9.99965787e-01, 8.07707409e-07, ...,
        4.99652335e-14, 5.37581846e-09, 3.29469313e-10],
       [5.69785479e-04, 9.86049592e-01, 5.83956262e-06, ...,
        4.91818064e-05, 8.12988310e-06, 1.13598946e-06]], dtype=float32)

In [41]:
label_set

['Neutral_Regulation',
 'NoRE',
 'Binding',
 'Modification',
 'Positive_Regulation',
 'Association',
 'Interaction_Agent',
 'Translocation',
 'Negative_Agent',
 'Complex',
 'Negative_Regulation',
 'Negation_Complex',
 'Transformation',
 'Negation_Binding',
 'Negation_Translocation',
 'Positive_Agent',
 'Negation_Neutral_Regulation',
 'Negation_Modification',
 'Negation_Association',
 'Negation_Negative_Regulation',
 'Negation_Positive_Regulation',
 'Negation_Interaction_Agent']

In [37]:
y_pred = model.predict(X_test)
y_predict_c = [label_set[p] for p in y_pred.argmax(axis=-1)]
print(y_predict_c)

['Modification', 'Association', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'Negative_Regulation', 'Association', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'Neutral_Regulation', 'NoRE', 'NoRE', 'Interaction_Agent', 'Interaction_Agent', 'NoRE', 'NoRE', 'Negative_Regulation', 'NoRE', 'Association', 'NoRE', 'NoRE', 'Complex', 'NoRE', 'NoRE', 'NoRE', 'Positive_Regulation', 'Interaction_Agent', 'Complex', 'Modification', 'Negative_Regulation', 'Interaction_Agent', 'Association', 'NoRE', 'NoRE', 'Negative_Regulation', 'Translocation', 'Complex', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'Modification', 'Association', 'NoRE', 'NoRE', 'NoRE', 'NoRE', 'Association', 'NoRE', 'Interaction_Agent', 'NoRE', 'NoRE', 'Modification', 'Binding', 'NoRE', 'Neutral_Regulation', 'NoRE', 'Binding', 'NoRE', 'NoRE', 'NoRE', 'Association', 'NoRE', 'Modification', 'Translocation', 'NoRE', 'NoRE', 'Association', 'NoRE', 'Interaction_Agent', 'Negative_Regulation', 'A

In [38]:
from sklearn.metrics import classification_report
y_test_c = [label_set[p] for p in y_test.argmax(axis=-1)]
report = classification_report(y_test_c, y_predict_c, labels=noRE_labels)
print(report)

                              precision    recall  f1-score   support

                 Association       0.58      0.54      0.56        96
                     Binding       0.29      0.37      0.33        27
                     Complex       0.54      0.60      0.57        45
           Interaction_Agent       0.35      0.80      0.49        25
                Modification       0.30      0.27      0.28        30
        Negation_Association       0.20      0.33      0.25         6
            Negation_Binding       0.00      0.00      0.00         1
            Negation_Complex       0.00      0.00      0.00         0
  Negation_Interaction_Agent       0.00      0.00      0.00         1
       Negation_Modification       0.00      0.00      0.00         5
Negation_Negative_Regulation       0.00      0.00      0.00         0
 Negation_Neutral_Regulation       0.00      0.00      0.00         1
Negation_Positive_Regulation       0.00      0.00      0.00         2
      Negation_Tran

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [42]:
from keras import backend as K 
K.clear_session()