In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np

In [3]:
import tensorflow as tf
import tensorflow_hub as hub

In [4]:
from keras import backend as K

from keras.models import Model, Input
from keras.layers import Embedding, Dense, Lambda
from keras.regularizers import l2

from keras.utils import to_categorical

Using TensorFlow backend.


In [5]:
from keras.callbacks import ReduceLROnPlateau
from keras.optimizers import RMSprop, Adam, SGD
from keras.models import load_model

In [6]:
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score as scikit_f1_score

#### Custom Functions

In [7]:
from src.callbacks import PlotCurves
from src.custom_functions import f1_macro, f1_micro 
from src.load_data import load_data

### Load Data

In [8]:
n_tags = 2

In [9]:
train_data, valid_data, test_data, metadata = load_data()

### Prepare data

In [10]:
def split_data(data_, to_categorize):
    
    X = np.array([sentence['sentence'].replace('\n', '').strip() 
                  for article in data_ 
                  for sentence in article['sentences']])

    y = np.array([sentence['label'] 
                  for article in data_
                  for sentence in article['sentences']])

    if to_categorize:
        y = to_categorical(y)
    
    return X, y

In [11]:
X_tra, y_tra = split_data(train_data, True)
X_val, y_val = split_data(valid_data, True)
X_test, y_test = split_data(test_data, False)

### Load ELMo

In [39]:
sess = tf.Session()
K.set_session(sess)

In [40]:
elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)

In [115]:
learningrate=0.005
optimizer = SGD(lr=learningrate)
optimizer_str = 'SGD'
loss = 'binary_crossentropy'
metrics = ['acc', f1_macro, f1_micro]

model_name = 'RQ1_elmo_flat_model_0_' + optimizer_str + '_lr_' + str(learningrate) + '_loss_' + loss # + '_with_learnin_rate_reduction'


## Load the best Model

In [116]:
best_model = load_model('Model/' + model_name + '/' + model_name + '_best_f1_macro_model.h5', custom_objects={'elmo': elmo, 'tf': tf, 'f1_macro':f1_macro, 'f1_micro':f1_micro})

### Validation Set


In [117]:
import os
model_name

'RQ1_elmo_flat_model_0_SGD_lr_0.005_loss_binary_crossentropy'

In [118]:
results_file = os.path.join('Model/' + model_name, 'model_results_file.txt')
        
with open(results_file, 'w') as f:
    f.write('\n----------------\n')

In [119]:
y_val_true = np.array([sentence['label'] for article in valid_data 
                                 for sentence in article['sentences']])

y_val_preds = best_model.predict(X_val, batch_size=32)
y_val_preds = np.argmax(y_val_preds, axis=1)

print(classification_report(y_val_true, y_val_preds))
with open(results_file, 'a') as f:
    f.write(classification_report(y_val_true, y_val_preds))
    f.write('\n')

              precision    recall  f1-score   support

           0       0.85      0.90      0.87       269
           1       0.77      0.66      0.71       130

   micro avg       0.82      0.82      0.82       399
   macro avg       0.81      0.78      0.79       399
weighted avg       0.82      0.82      0.82       399



In [120]:
scikit_f1_score(y_val_true, y_val_preds,average='macro')

0.7924222605386765

In [121]:
with open(results_file, 'a') as f:
    f.write(str(scikit_f1_score(y_val_true, y_val_preds,average='macro')))
    f.write('\n')

### Test

In [122]:
y_preds = best_model.predict(X_test, batch_size=32)
y_preds = np.argmax(y_preds, axis=1)

In [123]:
### build_flat_model_1 results
print(classification_report(y_test, y_preds))
with open(results_file, 'a') as f:
    f.write(classification_report(y_test, y_preds))
    f.write('\n')

              precision    recall  f1-score   support

           0       0.83      0.97      0.89       325
           1       0.83      0.46      0.59       116

   micro avg       0.83      0.83      0.83       441
   macro avg       0.83      0.71      0.74       441
weighted avg       0.83      0.83      0.81       441



In [124]:
scikit_f1_score(y_test, y_preds,average='macro')

0.7417378917378918

In [125]:
with open(results_file, 'a') as f:
    f.write(str(scikit_f1_score(y_test, y_preds,average='macro')))
    f.write('\n')

In [126]:
model_name

'RQ1_elmo_flat_model_0_SGD_lr_0.005_loss_binary_crossentropy'