# TODO:
* Butun makale yerine sliding window seklinde ver cumleleri (ekstra deney olarak) 

Next: https://github.com/arunarn2/HierarchicalAttentionNetworks/blob/master/HierarchicalAttn.py

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np

In [3]:
import matplotlib.pyplot as plt
# from IPython import display

In [4]:
from sklearn.metrics import classification_report

In [5]:
import tensorflow as tf
import tensorflow_hub as hub

In [6]:
from keras import backend as K

from keras.models import Model, Input
from keras.layers.merge import add
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Bidirectional, Lambda
from keras.regularizers import l2

from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


#### Custom Functions

In [7]:
from src.callbacks import PlotCurves
from src.custom_functions import f1_macro, f1_micro 
from src.load_data import load_data

### Load Data

In [8]:
train_data, valid_data, test_data, metadata = load_data()

### Prepare data

In [11]:
max_len = 60
n_tags = 2

In [12]:
def split_data(data_, max_len, n_tags, is_test=False):
    
    X = []
    for article in data_:
        new_seq = []
        for i in range(max_len):
            try:
                new_seq.append(article['sentences'][i]['sentence'])
            except:
                new_seq.append("ENDPAD")
        X.append(new_seq)
    X = np.array(X)
    
    if not is_test: 
        y = [[sent['label'] for sent in article['sentences']] for article in data_]
        y = pad_sequences(maxlen=max_len, sequences=y, padding="post", value=0)
        y = np.array([[to_categorical(lab, num_classes=n_tags) for lab in sent] for sent in y])
    else:
        y = np.array([sent['label'] for article in data_ for sent in article['sentences']])
    
    return X, y

In [15]:
X_tra, y_tra = split_data(train_data, max_len, n_tags, False)
X_val, y_val = split_data(valid_data, max_len, n_tags, False)
X_test, y_test = split_data(test_data, max_len, n_tags, True)

In [16]:
y_tra.shape, y_val.shape, y_test.shape

((251, 60, 2), (32, 60, 2), (441,))

#### Limit Data

In [17]:
X_tra = X_tra[:250]
y_tra = y_tra[:250]

In [18]:
X_tra.shape, type(X_tra)

((250, 60), numpy.ndarray)

In [19]:
X_val = X_val[:32]
y_val = y_val[:32]

In [20]:
X_val.shape, y_val.shape, type(X_val)

((32, 60), (32, 60, 2), numpy.ndarray)

### Load ELMo

In [21]:
sess = tf.Session()
K.set_session(sess)

In [22]:
elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)

## Build Model

In [23]:
def ELMoEmbedding(x):
    embeds = []
    for art in tf.unstack(tf.transpose(x, (1, 0))):
        embeds.append(elmo(tf.squeeze(tf.cast(art, tf.string)), signature="default", as_dict=True)["default"])
    return tf.stack(embeds, 1)

In [24]:
def build_lstm_model(max_len, n_tags):
    
    input_text = Input(shape=(max_len,), dtype="string")
    
    embedding = Lambda(ELMoEmbedding, output_shape=(None, None, max_len, 1024))(input_text)
    
    dns = Dense(512, activation='relu')(embedding)
    
    dns = Dense(256, activation='relu')(dns)
    
    x = Bidirectional(LSTM(units=128, return_sequences=True,
                           recurrent_dropout=0.2, dropout=0.2))(dns)

    x_rnn = Bidirectional(LSTM(units=128, return_sequences=True,
                               recurrent_dropout=0.2, dropout=0.2))(x)

    x = add([x, x_rnn])  # residual connection to the first biLSTM

    out = TimeDistributed(Dense(n_tags, activation="softmax"))(x)
    
    return Model(input_text, outputs=out)

In [26]:
model = build_lstm_model(max_len, n_tags)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 60)           0                                            
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, None, None, 6 0           input_2[0][0]                    
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, None, None, 6 524800      lambda_2[0][0]                   
__________________________________________________________________________________________________
dense_5 (Dense)                 (None, None, None, 6 131328      dense_4[0][0]                    
__________________________________________________________________________________________________
bidirectio

In [28]:
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

### Train Model

In [43]:
## Sequential Model
model.fit(X_tra, y_tra, epochs=20, batch_size=2, validation_data=(X_val, y_val)) 
#            callbacks=[PlotCurves(model_name='elmo_sentence_sequence')])

Train on 250 samples, validate on 32 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f665fc6f748>

### Test

In [48]:
model_preds = model.predict(X_test, batch_size=2)

In [49]:
y_preds = [[np.argmax(lab) for lab in art] for art in model_preds]

In [50]:
y_preds_unpad = []
for ai, art in enumerate(X_test):
    for si, sent in enumerate(art):
        if sent != 'ENDPAD':
            y_preds_unpad.append(y_preds[ai][si])

In [51]:
### Sequential Model results
print(classification_report(y_test, y_preds_unpad))

              precision    recall  f1-score   support

           0       0.90      0.78      0.84       325
           1       0.55      0.75      0.64       116

   micro avg       0.77      0.77      0.77       441
   macro avg       0.72      0.77      0.74       441
weighted avg       0.81      0.77      0.78       441

