# TODO:
* Adapt PlotCurves() to work with tensorflow functions/methods

Following: https://github.com/strongio/keras-bert/blob/master/keras-bert.ipynb

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop, Adam, SGD

In [None]:
from keras.utils import to_categorical

In [None]:
from src.keras_bert import convert_text_to_examples, \
                           create_tokenizer_from_hub_module, \
                           convert_examples_to_features, \
                           initialize_vars, \
                           build_model, \
                           BertLayer

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score as scikit_f1_score

In [None]:
# Initialize session
sess = tf.Session()

#### Custom Functions

In [None]:
from src.callbacks import PlotCurvesTF as PlotCurves
from src.eval_metrics import f1_micro, f1_macro
from src.load_data import load_data

### Load Data

In [None]:
train_data, valid_data, test_data, metadata = load_data()

### Prepare data

In [None]:
max_seq_length = 512
if max_seq_length > 512:
    print('!!!!!!! WARNING: BERT does not accept lenght > 512')
    max_seq_length = 512

In [None]:
bert_path = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

In [None]:
def split_data(data_, max_seq_length, bert_path, to_categorize):
    
    tokenizer = create_tokenizer_from_hub_module(bert_path)
    
    # !!! For BERT input, each sentence should be in an array
    X = np.array([[" ".join(sentence['sentence'].replace('\n', '').strip().lower().split()[0:max_seq_length])]
                  for article in data_ 
                  for sentence in article['sentences']], dtype=object)

    y = [sentence['label'] 
                  for article in data_
                  for sentence in article['sentences']]
    
    examples_ = convert_text_to_examples(X, y)
    
    (input_ids, input_masks, segment_ids, labels_) = \
            convert_examples_to_features(tokenizer, examples_, max_seq_length=max_seq_length)
    
    if to_categorize:
        labels_ = to_categorical(labels_)
    
    return [input_ids, input_masks, segment_ids], labels_

In [None]:
X_tra, y_tra = split_data(train_data, max_seq_length, bert_path, True)
X_val, y_val = split_data(valid_data, max_seq_length, bert_path, True)
X_test, y_test = split_data(test_data, max_seq_length, bert_path, False)

## Build Model

In [None]:
def build_flat_model_0(max_seq_length):
    in_id = Input(shape=(max_seq_length,), name="input_ids")
    in_mask = Input(shape=(max_seq_length,), name="input_masks")
    in_segment = Input(shape=(max_seq_length,), name="segment_ids")
    bert_inputs = [in_id, in_mask, in_segment]
    bert_output = BertLayer(n_fine_tune_layers=3, bert_path=bert_path)(bert_inputs)
    
    dense = Dense(512, activation="relu")(bert_output)
    dense = Dense(256, activation="relu")(dense)
    
    pred = Dense(2, activation="sigmoid")(dense)

    return Model(inputs=bert_inputs, outputs=pred)

In [None]:
learningrate=0.001
optimizer = Adam(lr=learningrate)
optimizer_str = 'Adam'
loss = 'binary_crossentropy'
metrics = ['acc', f1_macro, f1_micro]

In [None]:
model_name = 'RQ1_test_bert_old_model_0_' + optimizer_str + '_lr_' + str(learningrate) + '_loss_' + loss # + '_with_learnin_rate_reduction'
model = build_flat_model_0(max_seq_length)
model.summary()

model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

### Train

In [None]:
# Instantiate variables
initialize_vars(sess)

In [None]:
## Flat Model Fit
model.fit(X_tra, y_tra, epochs=20, batch_size=32, validation_data=(X_val, y_val))
#           callbacks=[PlotCurves(model_name=model_name, jnote=True)])

### Validation Set


In [19]:
import os
model_name

'RQ1_test_bert_old_model_0_Adam_lr_0.001_loss_binary_crossentropy'

In [25]:
results_file = os.path.join('Model/' + model_name, 'model_results_file.txt')
        
with open(results_file, 'w') as f:
    f.write('\n----------------\n')

In [21]:
y_val_true = np.array([sentence['label'] for article in valid_data 
                                 for sentence in article['sentences']])

y_val_preds = model.predict(X_val, batch_size=32)
y_val_preds = np.argmax(y_val_preds, axis=1)

print(classification_report(y_val_true, y_val_preds))
# with open(results_file, 'a') as f:
#     f.write(classification_report(y_val_true, y_val_preds))
#     f.write('\n')

              precision    recall  f1-score   support

           0       0.82      0.94      0.88       269
           1       0.82      0.58      0.68       130

   micro avg       0.82      0.82      0.82       399
   macro avg       0.82      0.76      0.78       399
weighted avg       0.82      0.82      0.81       399



### Test

In [22]:
y_preds = model.predict(X_test, batch_size=32)
y_preds = np.argmax(y_preds, axis=1)

In [23]:
### build_flat_model_1 results
print(classification_report(y_test, y_preds))
# with open(results_file, 'a') as f:
#     f.write(classification_report(y_test, y_preds))
#     f.write('\n')

              precision    recall  f1-score   support

           0       0.83      0.96      0.89       325
           1       0.81      0.44      0.57       116

   micro avg       0.83      0.83      0.83       441
   macro avg       0.82      0.70      0.73       441
weighted avg       0.82      0.83      0.81       441



In [31]:
scikit_f1_score(y_test, y_preds,average='macro')

0.8059931779782589

# Oylesine birden alinan sonuclar

##### Validation

In [41]:
y_val_true = np.array([sentence['label'] for article in valid_data 
                                 for sentence in article['sentences']])

y_val_preds = model.predict(X_val, batch_size=32)
y_val_preds = np.argmax(y_val_preds, axis=1)

print(classification_report(y_val_true, y_val_preds))
with open(results_file, 'a') as f:
    f.write(classification_report(y_val_true, y_val_preds))
    f.write('\n')

              precision    recall  f1-score   support

           0       0.93      0.85      0.89       269
           1       0.73      0.87      0.80       130

   micro avg       0.85      0.85      0.85       399
   macro avg       0.83      0.86      0.84       399
weighted avg       0.87      0.85      0.86       399



In [44]:
scikit_f1_score(y_val_true, y_val_preds,average='macro')

0.841467090480627

##### Test

In [48]:
y_preds = model.predict(X_test, batch_size=32)
y_preds = np.argmax(y_preds, axis=1)

In [49]:
### build_flat_model_1 results
print(classification_report(y_test, y_preds))
with open(results_file, 'a') as f:
    f.write(classification_report(y_test, y_preds))
    f.write('\n')

              precision    recall  f1-score   support

           0       0.92      0.90      0.91       325
           1       0.75      0.78      0.76       116

   micro avg       0.87      0.87      0.87       441
   macro avg       0.83      0.84      0.84       441
weighted avg       0.88      0.87      0.87       441



In [50]:
scikit_f1_score(y_test, y_preds, average='macro')

0.8388746803069054