-
Notifications
You must be signed in to change notification settings - Fork 1
/
models.py
124 lines (105 loc) · 5.89 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from __future__ import print_function, division
from keras.layers import Dense, Flatten, Dropout, Activation, Conv1D, Permute, RepeatVector, multiply, Bidirectional
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from keras.models import Sequential, load_model, Model
from keras.regularizers import l2
from keras.layers import Dense, Dropout, Input, Lambda
import tensorflow as tf
from keras import backend as K
class SarcasmModel:
def __init__(self, model_type, hidden_units, embedding_dim, vocab_size, max_len, pre_trained_embedding=False, embedding_weights=None):
self.model_type = model_type
self.hidden_units = hidden_units
self.embedding_dim = embedding_dim
self.vocab_size = vocab_size
self.max_len = max_len
self.pre_trained_embedding = pre_trained_embedding # using pre-trained word embeddings or not
self.embedding_weights = embedding_weights
def loadModel(self):
# Decides whether to use pre-trained embedding weights or not
input = Input(shape=(self.max_len,))
if self.pre_trained_embedding == True:
embeddings = Embedding(self.vocab_size, self.embedding_dim, input_length=self.max_len, weights=[self.embedding_weights], trainable=False)(input)
else:
embeddings = Embedding(self.vocab_size, self.embedding_dim, input_length=self.max_len)(input)
# Selecting the model based on the type
if self.model_type == 1: # basic neural network
print('-' * 100)
print("Model Selected: Basic neural network")
print('-' * 100)
lstm_output = Flatten()(embeddings)
final_output = Dense(1, activation='sigmoid')(lstm_output)
elif self.model_type == 2: # LSTM based network
print('-' * 100)
print("Model Selected: LSTM based network")
print('-' * 100)
lstm_output = LSTM(self.hidden_units)(embeddings)
lstm_output = Dense(256, activation ='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(lstm_output)
lstm_output = Dropout(0.3)(lstm_output)
lstm_output = Dense(128, activation ='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(lstm_output)
lstm_output = Dropout(0.3)(lstm_output)
final_output = Dense(1, activation='sigmoid')(lstm_output)
elif self.model_type == 3: # Bidirectional LSTM without attention
print('-' * 100)
print("Model Selected: Bidirectional LSTM without attention")
print('-' * 100)
lstm_output = Bidirectional(LSTM(self.hidden_units))(embeddings)
lstm_output = Dense(256, activation ='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(lstm_output)
lstm_output = Dropout(0.3)(lstm_output)
lstm_output = Dense(128, activation ='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(lstm_output)
lstm_output = Dropout(0.3)(lstm_output)
final_output = Dense(1, activation='sigmoid')(lstm_output)
elif self.model_type == 4: # Bidirectional LSTM with attention
print('-' * 100)
print("Model Selected: Bidirectional LSTM with attention")
print('-' * 100)
lstm_output = Bidirectional(LSTM(self.hidden_units, return_sequences=True), merge_mode='ave')(embeddings)
# calculating the attention coefficient for each hidden state
attention_vector = Dense(1, activation='tanh')(lstm_output)
attention_vector = Flatten()(attention_vector)
attention_vector = Activation('softmax')(attention_vector)
attention_vector = RepeatVector(self.hidden_units)(attention_vector)
attention_vector = Permute([2, 1])(attention_vector)
# Multiplying the hidden states with the attention coefficients and
# finding the weighted average
final_output = multiply([lstm_output, attention_vector])
final_output = Lambda(lambda xin: K.sum(
xin, axis=-2), output_shape=(self.hidden_units,))(final_output)
# passing the above weighted vector representation through single Dense
# layer for classification
final_output = Dropout(0.5)(final_output)
final_output = Dense(256, activation ='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(final_output)
lstm_output = Dropout(0.3)(final_output)
final_output = Dense(128, activation ='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(final_output)
final_output = Dense(1, activation='sigmoid')(final_output)
elif self.model_type == 5: # CNN-Bidirectional LSTM with attention
print('-' * 100)
print("Model Selected: CNN-Bidirectional LSTM with attention")
print('-' * 100)
# Hyper parameters for 1D Conv layer
filters = 32
kernel_size = 5
embeddings = Dropout(0.3)(embeddings)
conv_output = Conv1D(filters, kernel_size, activation='relu')(embeddings)
lstm_output = Bidirectional(LSTM(self.hidden_units, return_sequences=True), merge_mode='ave')(conv_output)
# calculating the attention coefficient for each hidden state
attention_vector = Dense(1, activation='tanh')(lstm_output)
attention_vector = Flatten()(attention_vector)
attention_vector = Activation('softmax')(attention_vector)
attention_vector = RepeatVector(self.hidden_units)(attention_vector)
attention_vector = Permute([2, 1])(attention_vector)
# Multiplying the hidden states with the attention coefficients and
# finding the weighted average
final_output = multiply([lstm_output, attention_vector])
final_output = Lambda(lambda xin: K.sum(
xin, axis=-2), output_shape=(self.hidden_units,))(final_output)
# passing the above weighted vector representation through single Dense
# layer for classification
final_output = Dropout(0.5)(final_output)
final_output = Dense(256, activation ='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(final_output)
lstm_output = Dropout(0.3)(final_output)
final_output = Dense(128, activation ='relu', kernel_initializer='he_normal', kernel_regularizer=l2(0.001))(final_output)
final_output = Dense(1, activation='sigmoid')(final_output)
model = Model(inputs=input, outputs=final_output)
return model