Next: https://github.com/arunarn2/HierarchicalAttentionNetworks/blob/master/HierarchicalAttn.py

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np

In [3]:
import tensorflow as tf
import tensorflow_hub as hub

In [4]:
from keras import backend as K
from keras.models import Model, Input, load_model
from keras.layers import LSTM, Dense, TimeDistributed, Bidirectional, Lambda, \
                         Activation, CuDNNLSTM, SpatialDropout1D, Dropout, BatchNormalization,\
                         GlobalAveragePooling1D, GlobalMaxPooling1D
from keras.optimizers import RMSprop, Adam, Adamax, SGD
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.regularizers import l2
from keras.layers.merge import add
from keras.utils import to_categorical

Using TensorFlow backend.


In [5]:
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score as scikit_f1_score

#### Custom Functions

In [6]:
from src.callbacks import PlotCurves
from src.eval_metrics import f1_macro, f1_micro # Many-to-one does not return a sequential y_preds.
from src.load_data import load_data

### Load Data

In [7]:
train_data, valid_data, test_data, metadata = load_data()

### Prepare data

In [9]:
ww = 1
batch_size = 32

In [10]:
def get_input(data_, ww, batch_size, one_hot=False, limit=None):

    def normalize(text):
        return text.replace('\n', '').strip()

    padding_sent = {
        'sentence': 'ENDPAD',
        'label': 0
    }

    X = []
    y = []

    for article in data_:
        sent_objs = article['sentences']

        for si, sentence in enumerate(sent_objs):
            sequence = []

            # Prev
            for i in reversed(range(ww)):
                sequence.append(normalize(sent_objs[si-i-1]['sentence'])
                                if si-i-1 >= 0
                                else padding_sent['sentence'])

            # Curr
            sequence.append(normalize(sent_objs[si]['sentence']))

            # Next
            for i in range(ww):
                sequence.append(normalize(sent_objs[si+i+1]['sentence'] )
                                if si+i+1 < len(article['sentences'])
                                else padding_sent['sentence'])

            X.append(sequence)

            if one_hot:
                label_ = to_categorical(sent_objs[si]['label'], num_classes=2)
            else:
                label_ = sent_objs[si]['label']

            y.append(label_)

    # limit data if not an even number when batch_size=2
#     if not limit:
#         limit = len(X) if len(X)%batch_size == 0 else len(X)-len(X)%batch_size
#     X = X[:limit]
#     y = y[:limit]

    return np.array(X), np.array(y)

In [11]:
X_tra, y_tra = get_input(train_data, ww, batch_size, one_hot=True, limit=None)
X_val, y_val = get_input(valid_data, ww, batch_size, one_hot=True, limit=None)
X_test, y_test = get_input(test_data, ww, batch_size, one_hot=False, limit=None)

In [12]:
X_tra.shape, X_val.shape, X_test.shape

((3582, 3), (399, 3), (441, 3))

In [13]:
y_tra.shape, y_val.shape, y_test.shape

((3582, 2), (399, 2), (441,))

### Load ELMo

In [14]:
sess = tf.compat.v1.Session()
K.set_session(sess)

In [15]:
os.environ["TFHUB_CACHE_DIR"] = '/tmp/tfhub'

In [16]:
elmo = hub.Module("https://tfhub.dev/google/elmo/3", trainable=True)

## Build Model

In [17]:
def ELMoEmbeddingStack(x):
    """
    ELMo takes list of sentences (as strings) and returns list of vectors.
    Thus when an article is given to elmo(), it returns a vector for each sentence.

    >> elmo(['I saw a cat.', 'There was also a dog.'])
    [<1024>, <1024>]
    """
    embeds = []
    for art in tf.unstack(tf.transpose(x, (1, 0))):
        embeds.append(elmo(tf.squeeze(tf.cast(art, tf.string)), signature='default', as_dict=True)['default'])
    return tf.stack(embeds, 1)

In [18]:
def build_model_0(ww):

    inp_size = 2 * ww + 1
    input_text = Input(shape=(inp_size,), dtype='string')

    embedding = Lambda(ELMoEmbeddingStack, output_shape=(None, None, inp_size, 1024))(input_text)

    x = Bidirectional(CuDNNLSTM(units=256, return_sequences=True))(embedding)

    pred = LSTM(2, activation='softmax')(x)

    return Model(inputs=[input_text], outputs=pred)

In [19]:
learningrate=0.01
optimizer = Adam(lr=learningrate)
optimizer_str = 'adam'
loss = 'binary_crossentropy'
metrics = ['acc', f1_macro, f1_micro]

In [20]:
model = build_model_0(ww)
model.summary()

model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

W1226 00:51:59.198355 140324051894400 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/Master_Thesis/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W1226 00:51:59.199070 140324051894400 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/Master_Thesis/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W1226 00:52:00.201127 140324051894400 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/Master_Thesis/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W1226 00:52:00.537301 140324051894400 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/Master_Thesis/.env/lib/python3.6/site-packages/keras/optimizers.py:790: T

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 3)                 0         
_________________________________________________________________
lambda_1 (Lambda)            (None, None, None, 3, 102 0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 512)         2625536   
_________________________________________________________________
lstm_1 (LSTM)                (None, 2)                 4120      
Total params: 2,629,656
Trainable params: 2,629,656
Non-trainable params: 0
_________________________________________________________________


In [21]:
model_name = 'RQ2_test_elmo_many_to_one_model_0' + \
             '_ww_' + str(ww) + \
             '_' + optimizer_str +  \
             '_lr_' + str(learningrate) +  \
             '_lrreduction' + \
             '_loss_' + loss + \
             '_onehot' + \
             '_softmax'

model_dir = './Model/' + model_name.split('model')[0] + 'model/' + model_name
results_file = os.path.join(model_dir, 'model_results_file.txt')

### Train Model

In [22]:
model.fit(X_tra, y_tra,
          epochs=50,
          batch_size=batch_size,
          validation_data=(X_val, y_val),
          callbacks=[
              PlotCurves(model_name=model_name, model_dir=model_dir,
                         plt_show=False, jnote=False),
              ReduceLROnPlateau(monitor='val_f1_macro', patience=3,
                                factor=0.1, min_lr=0.00001),
              EarlyStopping(monitor='val_f1_macro', min_delta=0,
                            patience=10, mode='max')
          ])

W1226 00:52:03.305498 140324051894400 deprecation_wrapper.py:119] From /home/aorus/workspaces/simge/Master_Thesis/.env/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



Train on 3582 samples, validate on 399 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
 128/3582 [>.............................] - ETA: 31s - loss: 0.4590 - acc: 0.8281 - f1_macro: 0.4958 - f1_micro: 0.7188

KeyboardInterrupt: 

### Load the best Model

In [38]:
model_name

'RQ2_test_elmo_many_to_one_model_0_ww_1_adam_lr_0.001_lrreduction_loss_binary_crossentropy_onehot_softmax'

In [39]:
best_model = load_model(os.path.join(model_dir, model_name + '_best_f1_macro_model.h5'), 
                        custom_objects={'elmo':elmo, 'tf':tf, 'f1_macro':f1_macro, 'f1_micro':f1_micro})

### Evaluation

In [40]:
def get_scores(model, data_, batch_size, ww, results_file, print_out=False):

    X, y_true = get_input(data_, ww, batch_size, one_hot=False, limit=None)

    y_preds = model.predict(X, batch_size=batch_size)
    y_preds = np.argmax(y_preds, axis=1)

    clsrpt = classification_report(y_true, y_preds)
    sf1 = scikit_f1_score(y_true, y_preds)
    sfm = scikit_f1_score(y_true, y_preds, average='macro')

    if print_out:
        print(clsrpt)
        print('\nScikit_F1_Macro:', sfm)
        print('\nScikit_F1_1:', sf1)

    if results_file:
        with open(results_file, 'a') as f:
            f.write('\n' + clsrpt + '\nF1_Macro: ' + str(sfm) + '\nF1_1: ' + str(sf1) + '\n\n')

    return sfm

#### Validation Set

In [41]:
with open(results_file, 'w') as f:
    f.write('\n---------------- Validation ----------------\n')
val_f1 = get_scores(best_model, valid_data, batch_size, ww, results_file)

#### Test Set

In [42]:
with open(results_file, 'a') as f:
    f.write('\n---------------- Test ----------------\n')
test_f1 = get_scores(best_model, test_data, batch_size, ww, results_file)