In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np

In [3]:
import tensorflow as tf
import tensorflow_hub as hub

In [4]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Lambda, Activation, Conv1D, \
                                    MaxPooling1D, Flatten, Reshape, Bidirectional, \
                                    BatchNormalization, Dropout, add, LSTM, \
                                    TimeDistributed
from tensorflow.keras.optimizers import RMSprop, Adam, Adamax, SGD
from tensorflow.keras.regularizers import l2

In [5]:
from src.keras_bert import convert_single_example, \
                           convert_text_to_examples, \
                           create_tokenizer_from_hub_module, \
                           convert_examples_to_features, \
                           InputExample, \
                           initialize_vars, \
                           BertLayer

In [6]:
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score as scikit_f1_score

In [7]:
# Initialize session
sess = tf.compat.v1.Session()

In [8]:
with tf.device("gpu:0"):
    print("GPU enabled")

GPU enabled


#### Custom Functions

In [9]:
from src.callbacks import PlotCurvesTF as PlotCurves
from src.eval_metrics_seq import f1_macro, f1_micro 
from src.load_data import load_data

Using TensorFlow backend.


### Load Data

In [10]:
train_data, valid_data, test_data, _ = load_data()
train_data = [art for art in train_data if len(art['sentences']) > 1]

### Load BERT

In [11]:
os.environ["TFHUB_CACHE_DIR"] = '/tmp/tfhub'

In [12]:
bert_path = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

In [13]:
bert = hub.Module(bert_path, trainable=True)

### Prepare data

In [14]:
batch_size = 2
max_len = 58
max_seq_length = 512
if max_seq_length > 512:
    print('!!!!!!! WARNING: BERT does not accept length > 512')
    max_seq_length = 512

In [15]:
def get_padding_sentence(max_seq_length, tokenizer, padding_text='ENDPAD'):

    example_sent = InputExample(guid=None, text_a=" ".join(padding_text), text_b=None, label=0)

    (input_ids, input_mask, segment_ids, label) = \
        convert_single_example(tokenizer, example_sent, max_seq_length=max_seq_length)

    return {"input_ids": input_ids, "input_mask": input_mask, "segment_ids": segment_ids, "label": 0}

In [16]:
def get_input(data_, max_len, max_seq_length):

    tokenizer = create_tokenizer_from_hub_module(bert_path)

    padding_sent = get_padding_sentence(max_seq_length, tokenizer)

    X = []
    y = []
    for article in data_:

        input_ids_seq = []
        input_mask_seq = []
        segment_ids_seq = []
        y_seq = []

        X_art = np.array([[" ".join(sentence['sentence'].replace('\n', ' ').strip().split()[0:max_seq_length])]
                          for sentence in article['sentences']], dtype=object)

        y_art = [sentence['label'] for sentence in article['sentences']]

        examples_ = convert_text_to_examples(X_art, y_art)

        (input_ids, input_masks, segment_ids, labels_) = \
            convert_examples_to_features(tokenizer, examples_, max_seq_length=max_seq_length)

        for i in range(max_len):

            if i < len(article['sentences']):
                input_ids_seq.append(input_ids[i])
                input_mask_seq.append(input_masks[i])
                segment_ids_seq.append(segment_ids[i])
                y_seq.append(labels_[i])

            else:
                input_ids_seq.append(padding_sent['input_ids'])
                input_mask_seq.append(padding_sent['input_mask'])
                segment_ids_seq.append(padding_sent['segment_ids'])
                y_seq.append([0])

        X_seq = (np.array(input_ids_seq),
                 np.array(input_mask_seq),
                 np.array(segment_ids_seq))

        X.append(X_seq)
        y.append(y_seq)

    return np.array(X), np.array(y), padding_sent

In [17]:
X_tra, y_tra, _ = get_input(train_data, max_len, max_seq_length)
X_val, y_val, _ = get_input(valid_data, max_len, max_seq_length)

Converting examples to features: 100%|██████████| 10/10 [00:00<00:00, 2092.97it/s]
Converting examples to features: 100%|██████████| 16/16 [00:00<00:00, 2149.20it/s]
Converting examples to features: 100%|██████████| 14/14 [00:00<00:00, 2171.61it/s]
Converting examples to features: 100%|██████████| 13/13 [00:00<00:00, 2083.77it/s]
Converting examples to features: 100%|██████████| 33/33 [00:00<00:00, 2628.71it/s]
Converting examples to features: 100%|██████████| 11/11 [00:00<00:00, 1235.60it/s]
Converting examples to features: 100%|██████████| 5/5 [00:00<00:00, 2976.37it/s]
Converting examples to features: 100%|██████████| 12/12 [00:00<00:00, 612.33it/s]
Converting examples to features: 100%|██████████| 17/17 [00:00<00:00, 1789.96it/s]
Converting examples to features: 100%|██████████| 5/5 [00:00<00:00, 2395.10it/s]
Converting examples to features: 100%|██████████| 22/22 [00:00<00:00, 1882.04it/s]
Converting examples to features: 100%|██████████| 18/18 [00:00<00:00, 1863.17it/s]
Convertin

Converting examples to features: 100%|██████████| 14/14 [00:00<00:00, 2550.84it/s]
Converting examples to features: 100%|██████████| 9/9 [00:00<00:00, 2109.22it/s]
Converting examples to features: 100%|██████████| 11/11 [00:00<00:00, 2696.83it/s]
Converting examples to features: 100%|██████████| 14/14 [00:00<00:00, 2457.02it/s]
Converting examples to features: 100%|██████████| 8/8 [00:00<00:00, 2416.77it/s]
Converting examples to features: 100%|██████████| 20/20 [00:00<00:00, 2540.08it/s]
Converting examples to features: 100%|██████████| 14/14 [00:00<00:00, 2729.40it/s]
Converting examples to features: 100%|██████████| 6/6 [00:00<00:00, 2216.28it/s]
Converting examples to features: 100%|██████████| 7/7 [00:00<00:00, 2109.20it/s]
Converting examples to features: 100%|██████████| 53/53 [00:00<00:00, 2891.57it/s]
Converting examples to features: 100%|██████████| 11/11 [00:00<00:00, 2211.02it/s]
Converting examples to features: 100%|██████████| 15/15 [00:00<00:00, 2111.23it/s]
Converting e

Converting examples to features: 100%|██████████| 8/8 [00:00<00:00, 2085.81it/s]
Converting examples to features: 100%|██████████| 20/20 [00:00<00:00, 2365.59it/s]
Converting examples to features: 100%|██████████| 19/19 [00:00<00:00, 2882.27it/s]
Converting examples to features: 100%|██████████| 8/8 [00:00<00:00, 3026.74it/s]
Converting examples to features: 100%|██████████| 14/14 [00:00<00:00, 2413.39it/s]
Converting examples to features: 100%|██████████| 8/8 [00:00<00:00, 2373.18it/s]
Converting examples to features: 100%|██████████| 8/8 [00:00<00:00, 1711.70it/s]
Converting examples to features: 100%|██████████| 17/17 [00:00<00:00, 2409.79it/s]
Converting examples to features: 100%|██████████| 12/12 [00:00<00:00, 2706.15it/s]
Converting examples to features: 100%|██████████| 9/9 [00:00<00:00, 2538.75it/s]
Converting examples to features: 100%|██████████| 5/5 [00:00<00:00, 2298.00it/s]
Converting examples to features: 100%|██████████| 10/10 [00:00<00:00, 2052.61it/s]
Converting examp

In [18]:
X_tra.shape, X_val.shape#, X_test.shape

((250, 3, 58, 512), (32, 3, 58, 512))

In [19]:
y_tra.shape, y_val.shape#, y_test.shape

((250, 58, 1), (32, 58, 1))

## Build Model

In [20]:
def BERTEmbeddingStack(x):
    embeds = []
    for art in tf.unstack(tf.reshape(x, (batch_size, 3, max_len, 512))):
        art = tf.cast(art, dtype="int32")
        # Below does not change the shape of segment_ids etc.
        # Only puts them into a dictionary
        bert_inputs = dict(
            input_ids=art[0],
            input_mask=art[1],
            segment_ids=art[2]
        )
        # Pooling
        result = bert(bert_inputs, signature="tokens", as_dict=True)["sequence_output"]
        mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
        masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / (
                             tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10)
        input_mask = tf.cast(art[1], tf.float32)
        pooled = masked_reduce_mean(result, input_mask)
        embeds.append(pooled)
    # print(tf.stack(embeds, 0))
    return tf.stack(embeds, 0)

In [21]:
def build_model_0(max_len, max_seq_length):

    input_text = Input(shape=(3, max_len, max_seq_length))

    bert_output = Lambda(BERTEmbeddingStack, output_shape=(None, None, max_len, 768))(input_text)

    x = Bidirectional(LSTM(units=128, return_sequences=True))(bert_output)

    pred = TimeDistributed(Dense(1, activation='softmax'))(x)

    return Model(inputs=[input_text], outputs=pred)

In [22]:
learningrate = 0.0001
optimizer = Adam(lr=learningrate)
optimizer_str = 'adam'
loss = 'binary_crossentropy'
metrics = ['acc', f1_macro, f1_micro]

In [23]:
model = build_model_0(max_len, max_seq_length)
model.summary()

model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

W1231 00:10:50.081682 139742757937280 deprecation.py:506] From /home/aorus/workspaces/simge/Master_Thesis/.env/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W1231 00:10:50.084985 139742757937280 deprecation.py:506] From /home/aorus/workspaces/simge/Master_Thesis/.env/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:97: calling GlorotUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W1231 00:10:50.085531 139742757937280 deprecation.py:506] From /home/aorus/workspaces/simge/Master_Thesis/.env/lib/python3.6/site-packag

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 3, 58, 512)]      0         
_________________________________________________________________
lambda (Lambda)              (2, 58, 768)              0         
_________________________________________________________________
bidirectional (Bidirectional (2, 58, 256)              918528    
_________________________________________________________________
time_distributed (TimeDistri (2, 58, 1)                257       
Total params: 918,785
Trainable params: 918,785
Non-trainable params: 0
_________________________________________________________________


In [24]:
model_name = 'RQ2_test_bert_model_0' + \
             '_maxlen_' + str(max_len) + \
             '_' + optimizer_str + \
             '_lr_' + str(learningrate) + \
             '_lrreduction' + \
             '_loss_' + loss

model_dir = './Model/' + model_name.split('model')[0] + 'model/' + model_name
results_file = os.path.join(model_dir, 'model_results_file.txt')

### Train Model

In [25]:
# Instantiate variables
initialize_vars(sess)

In [27]:
model.fit(X_tra, y_tra,
          epochs=1,
          batch_size=batch_size,
          validation_data=(X_val, y_val),
          callbacks=[
              PlotCurves(model_name=model_name, model_dir=model_dir,
                         plt_show=False, jnote=False, save_best=False),
          ])

Train on 250 samples, validate on 32 samples


<tensorflow.python.keras.callbacks.History at 0x7f16c0b92208>

<Figure size 720x360 with 0 Axes>

### Load the best Model

In [28]:
best_model = model

### Evaluation

In [29]:
def get_scores(model, data_, batch_size, max_len, max_seq_length,
               results_file=None, print_out=False):
    
    def unpad(X, y_preds, padding_sent):
        y_unpad = []
        for ai, art in enumerate(X):
            for si, sen_inp_ids in enumerate(art[0]):
                if list(sen_inp_ids) != list(padding_sent['input_ids']):
                    y_unpad.append(y_preds[ai][si])
        return y_unpad
    
    X, y_true, padding_sent = get_input(data_, max_len, max_seq_length)
    y_true = unpad(X, y_true, padding_sent)
    y_true = [y[0] for y in y_true]
    
    y_preds = model.predict(X, batch_size=batch_size)
    y_preds = unpad(X, y_preds, padding_sent)
    y_preds = [0 if y[0] < 0.5 else 1 for y in y_preds]
    
    clsrpt = classification_report(y_true, y_preds)
    sf1 = scikit_f1_score(y_true, y_preds)
    sfm = scikit_f1_score(y_true, y_preds, average='macro')

    if print_out:
        print(clsrpt)
        print('\nScikit_F1_Macro:', sfm)
        print('\nScikit_F1_1:', sf1)

    if results_file:
        with open(results_file, 'a') as f:
            f.write('\n' + clsrpt + '\nF1_Macro: ' + str(sfm) + '\nF1_1: ' + str(sf1) + '\n\n')
    return sfm

#### Validation Set

In [30]:
# with open(results_file, 'w') as f:
#     f.write('\n---------------- Validation ----------------\n')
val_f1 = get_scores(best_model, valid_data, batch_size, max_len, max_seq_length, 
                    results_file, print_out=True)

Converting examples to features: 100%|██████████| 4/4 [00:00<00:00, 1327.10it/s]
Converting examples to features: 100%|██████████| 20/20 [00:00<00:00, 1520.39it/s]
Converting examples to features: 100%|██████████| 5/5 [00:00<00:00, 2132.12it/s]
Converting examples to features: 100%|██████████| 14/14 [00:00<00:00, 2240.12it/s]
Converting examples to features: 100%|██████████| 16/16 [00:00<00:00, 2615.21it/s]
Converting examples to features: 100%|██████████| 18/18 [00:00<00:00, 2504.06it/s]
Converting examples to features: 100%|██████████| 8/8 [00:00<00:00, 2381.27it/s]
Converting examples to features: 100%|██████████| 15/15 [00:00<00:00, 1754.25it/s]
Converting examples to features: 100%|██████████| 9/9 [00:00<00:00, 1522.19it/s]
Converting examples to features: 100%|██████████| 12/12 [00:00<00:00, 2537.01it/s]
Converting examples to features: 100%|██████████| 12/12 [00:00<00:00, 2415.38it/s]
Converting examples to features: 100%|██████████| 7/7 [00:00<00:00, 2688.66it/s]
Converting exa

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       269
           1       0.33      1.00      0.49       130

   micro avg       0.33      0.33      0.33       399
   macro avg       0.16      0.50      0.25       399
weighted avg       0.11      0.33      0.16       399


Scikit_F1_Macro: 0.2457466918714556

Scikit_F1_1: 0.4914933837429112


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


#### Test Set

In [31]:
with open(results_file, 'a') as f:
    f.write('\n---------------- Test ----------------\n')
test_f1 = get_scores(best_model, test_data, batch_size, max_len, max_seq_length,
                     results_file, print_out=True)

Converting examples to features: 100%|██████████| 23/23 [00:00<00:00, 2811.03it/s]
Converting examples to features: 100%|██████████| 14/14 [00:00<00:00, 1890.48it/s]
Converting examples to features: 100%|██████████| 14/14 [00:00<00:00, 2396.94it/s]
Converting examples to features: 100%|██████████| 32/32 [00:00<00:00, 2438.28it/s]
Converting examples to features: 100%|██████████| 23/23 [00:00<00:00, 2355.60it/s]
Converting examples to features: 100%|██████████| 12/12 [00:00<00:00, 3331.24it/s]
Converting examples to features: 100%|██████████| 4/4 [00:00<00:00, 1684.80it/s]
Converting examples to features: 100%|██████████| 9/9 [00:00<00:00, 2037.39it/s]
Converting examples to features: 100%|██████████| 5/5 [00:00<00:00, 2196.43it/s]
Converting examples to features: 100%|██████████| 34/34 [00:00<00:00, 3194.30it/s]
Converting examples to features: 100%|██████████| 10/10 [00:00<00:00, 2799.19it/s]
Converting examples to features: 100%|██████████| 10/10 [00:00<00:00, 1853.10it/s]
Converting

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       325
           1       0.26      1.00      0.42       116

   micro avg       0.26      0.26      0.26       441
   macro avg       0.13      0.50      0.21       441
weighted avg       0.07      0.26      0.11       441


Scikit_F1_Macro: 0.20825852782764812

Scikit_F1_1: 0.41651705565529623
