In [3]:
# load library
from sklearn.metrics import f1_score
import numpy as np
import os
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.layers import TimeDistributed, Masking
from keras import optimizers
from keras import regularizers
from keras.utils.np_utils import to_categorical
from keras.layers import Embedding, Input
from keras.models import Model
from keras_contrib.layers import CRF
from sklearn.model_selection import StratifiedKFold

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [87]:
from hyperopt import fmin, tpe, hp, Trials

source: https://github.com/keras-team/keras-contrib/commit/d1f4b6ba7506462a638d3a1f2ab16e2a2e7dc883

In [4]:
# load the data
cwd = os.getcwd()
x_train = np.load(os.path.join(cwd,'saved_dataRumEval2019_npy_files/train/train_array.npy'))
y_train = np.load(os.path.join(cwd,'saved_dataRumEval2019_npy_files/train/fold_stance_labels.npy'))
y_train_cat =[]
for i in range(len(y_train)):
        y_train_cat.append(to_categorical(y_train[i], num_classes=4))
y_train_cat = np.asarray(y_train_cat)

In [6]:
x_test = np.load(os.path.join(cwd,'saved_dataRumEval2019_npy_files/dev/train_array.npy'))
y_test = np.load(os.path.join(cwd,'saved_dataRumEval2019_npy_files/dev/fold_stance_labels.npy'))

In [5]:
ids_test = np.load(os.path.join(cwd,'saved_dataRumEval2019_npy_files/dev/tweet_ids.npy'))
ids_train = np.load(os.path.join(cwd,'saved_dataRumEval2019_npy_files/train/tweet_ids.npy'))

In [14]:
# prepare the data
from sklearn.model_selection import train_test_split
x1,x2,y1,y2,id1,id2 =train_test_split(x_train,y_train_cat,ids_train, test_size=0.3,random_state=4)

search_space = {'num_dense_layers': hp.choice('nlayers', [1, 2]),
                    'num_dense_units': hp.choice('num_dense', [200, 300,
                                                               400, 500]),
                    'num_epochs': hp.choice('num_epochs',  [100, 50]),
                    'num_lstm_units': hp.choice('num_lstm_units', [100, 200,
                                                                   300]),
                    'num_lstm_layers': hp.choice('num_lstm_layers', [1, 2]),
                    'learn_rate': hp.choice('learn_rate', [1e-4, 3e-4, 1e-3]),
                    'mb_size': hp.choice('mb_size', [32, 64]),
                    'l2reg': hp.choice('l2reg', [0.0, 1e-4, 3e-4, 1e-3]),
                    'rng_seed': hp.choice('rng_seed', [364])
                    }

In [147]:

# parameters
num_lstm_units = 200
num_lstm_layers = 2
num_dense_layers = 1
num_dense_units = 300
num_epochs = 100
learn_rate = 1e-4
mb_size = 64
l2reg = 1e-3
num_features = x_train.shape[2]
stances = 4

In [152]:
# models
def mod(x,y):
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=(None, num_features)))
    for nl in range(num_lstm_layers-1):
        model.add(LSTM(num_lstm_units, kernel_initializer='glorot_normal',
                       dropout=0.2, recurrent_dropout=0.2,
                       return_sequences=True))
    model.add(LSTM(num_lstm_units, kernel_initializer='glorot_normal',
                   dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
    model.add(TimeDistributed(Dense(num_dense_units, activation='relu')))
    for nl in range(num_dense_layers-1):
        model.add(TimeDistributed(Dense(num_dense_units, activation='relu')))
    model.add(Dropout(0.5))
    crf = CRF(stances, sparse_target=True)
    model.add(crf)
    adam = optimizers.Adam(lr=learn_rate, beta_1=0.9, beta_2=0.999,
                           epsilon=1e-08, decay=0.0)
    model.compile(adam, loss=crf.loss_function, metrics=[crf.accuracy])
    model.fit(x, y,shuffle=True, epochs=num_epochs,class_weight=None, batch_size=mb_size)
    return model

In [94]:
def calculate(tx,ty,tid,model):
    pred_prob = model.predict_classes(tx)
    #confidence = np.max(pred_prob,axis=2)
    y_pred = model.predict_classes(tx)
    fids_test = []
    for i in tid:
        fids_test.extend(i)
    fy_pred = y_pred.flatten()
    fy_test = ty.flatten()
    uniqtwid, uindices2 = np.unique(fids_test, return_index=True)
    uniqtwid = uniqtwid.tolist()
    uindices2 = uindices2.tolist()
    uniq_dev_prediction = [fy_pred[i] for i in uindices2]
    uniq_dev_label = [fy_test[i] for i in uindices2]
    mactest_F1 = f1_score(uniq_dev_prediction, uniq_dev_label, average='macro')
    mactest_F2 = f1_score(uniq_dev_prediction, uniq_dev_label, average= None)
    acc = sum(np.array(uniq_dev_prediction) == np.array(uniq_dev_label))/len(uniq_dev_prediction)
    return mactest_F1,mactest_F2, acc, uniq_dev_label
    

In [153]:
# train_test split
#prepare the data
from sklearn.model_selection import train_test_split
x1,x2,y1,y2,id1,id2 =train_test_split(x_train,y_train_cat,ids_train, test_size=0.3,random_state=4)
md = mod(x1,y1)
print(calculate(x2,y2,id2,md))
print(calculate(x_test,y_test,ids_test,md))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
(0.4477117691312838, array([0.84204276, 0.05338078]), 0.7292620865139949, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 

(0.22174239579421703, array([0.88696958, 0.        , 0.        , 0.        ]), 0.7952861952861953, [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 2.0, 3.0, 3.0, 3.0, 0.0, 0.0, 2.0, 2.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 3.0, 1.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 3.0, 3.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 

  'recall', 'true', average, warn_for)


In [168]:
y_test

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [3., 1., 1., ..., 0., 0., 0.],
       [3., 0., 0., ..., 0., 0., 0.],
       [3., 1., 1., ..., 0., 0., 0.]], dtype=float32)

In [160]:

fids_test = []
for i in ids_test:
    fids_test.extend(i)
#fy_pred = y_pred.flatten()
fy_test = y_test.flatten()
uniqtwid, uindices2 = np.unique(fids_test, return_index=True)
uniqtwid = uniqtwid.tolist()
uindices2 = uindices2.tolist()
#uniq_dev_prediction = [fy_pred[i] for i in uindices2]
uniq_dev_label = [fy_test[i] for i in uindices2]

In [167]:
for i in range(len(uindices2)):
    print(uniq_dev_label[i])
    print(uniqtwid[i])
    print(uindices2[i])
    print(fy_test[uindices2[i]])

0.0
1jvbd8
2382
0.0
1.0
31xv6u
2353
1.0
0.0
498293668655423488
171
0.0
0.0
498293763387568128
172
0.0
0.0
498293880820076544
174
0.0
0.0
498293953582866432
176
0.0
0.0
498293963183652864
178
0.0
0.0
498293964748124161
180
0.0
0.0
498294148550885376
182
0.0
0.0
498294365630910466
184
0.0
0.0
498294492743483395
186
0.0
0.0
498294531625091072
188
0.0
0.0
498294603544801281
190
0.0
0.0
498294706355601409
192
0.0
0.0
498294714026983424
194
0.0
0.0
498295324029747200
196
0.0
1.0
498295399682441217
198
1.0
0.0
498297185834786816
200
0.0
0.0
498297997118435329
202
0.0
0.0
498298019045847040
204
0.0
0.0
498298371615244289
206
0.0
0.0
498298907311767553
208
0.0
0.0
498299535702368256
210
0.0
2.0
498299548625018880
212
2.0
0.0
498299653935603713
214
0.0
0.0
498300617392406528
216
0.0
0.0
498303898646347776
218
0.0
0.0
498304971671343107
220
0.0
0.0
498305308754591744
222
0.0
0.0
498307471610744832
224
0.0
1.0
498309467889729536
226
1.0
0.0
498312078243819520
228
0.0
0.0
498313345129529344
230
0.0

448
0.0
1.0
581154613254086656
449
1.0
0.0
581154859715530752
451
0.0
0.0
581154930511073280
454
0.0
0.0
581154995640221699
456
0.0
0.0
581155260195958784
463
0.0
0.0
581155288516005888
465
0.0
0.0
581155895477825536
467
0.0
0.0
581156001535139840
469
0.0
0.0
581156407774429184
457
0.0
0.0
581157068419125249
471
0.0
0.0
581157167220277248
473
0.0
0.0
581157421273522177
475
0.0
0.0
581157526391119872
477
0.0
1.0
581158014507462657
479
1.0
1.0
581159283175399424
481
1.0
1.0
581161281199161346
483
1.0
0.0
581162102414557184
452
0.0
0.0
581162461807685632
485
0.0
0.0
581169458040934400
487
0.0
0.0
581190518044000256
458
0.0
0.0
581202636965355521
459
0.0
0.0
581290271997968384
488
0.0
0.0
581290541645635584
489
0.0
0.0
581290764547805184
501
0.0
0.0
581292874328764417
503
0.0
1.0
581294132758487040
505
1.0
0.0
581294446261583872
508
0.0
0.0
581296168711356416
514
0.0
0.0
581296428045078528
515
0.0
0.0
581296614570008576
519
0.0
0.0
581296722783010816
530
0.0
0.0
581297818528141313
522
0.0


765035397059530755
968
0.0
0.0
765042054141468672
970
0.0
0.0
765054620204830720
913
0.0
0.0
765075088437764097
972
0.0
0.0
765076985311748096
974
0.0
0.0
765171900683161601
976
0.0
0.0
765209981872250880
978
0.0
0.0
765252851782254592
980
0.0
0.0
765325586218659840
895
0.0
0.0
765336147539202048
982
0.0
0.0
765612220860370944
857
0.0
0.0
766059906558464000
859
0.0
0.0
767725956706414592
983
0.0
0.0
767726293479723008
984
0.0
0.0
767726308856127488
988
0.0
0.0
767726387415293952
990
0.0
0.0
767726431640092672
992
0.0
1.0
767726553308491776
995
1.0
0.0
767726588746145793
997
0.0
0.0
767726757369802752
999
0.0
0.0
767726943810756608
1001
0.0
0.0
767727125608660992
1013
0.0
0.0
767727188800000000
1015
0.0
0.0
767727257171464192
1018
0.0
0.0
767727443398451200
1020
0.0
0.0
767727622046511104
1022
0.0
0.0
767727718142087168
1024
0.0
0.0
767728005976326144
1026
0.0
0.0
767728097928052736
1028
0.0
0.0
767728740486950912
1030
0.0
0.0
767728776667140096
1033
0.0
0.0
767728937719898112
1031
0.0


2368
0.0
0.0
cq6fj7f
2362
0.0
0.0
cq6s9ur
2366
0.0
0.0
cq72mqx
2370
0.0
1.0
cq9capm
2355
1.0
1.0
cq9mbsa
2356
1.0
0.0
cq9sgm3
2357
0.0
0.0
cqa0rkx
2358
0.0
0.0
cqjckbb
2371
0.0
0.0
cqk0i0a
2374
0.0
0.0
cqk0k41
2375
0.0
0.0
cqk0plz
2376
0.0
0.0
d1qj24g
2760
0.0
0.0
d1qk6s6
2765
0.0
0.0
d1ql4bd
2767
0.0
0.0
d1qpgbl
2774
0.0
0.0
d1r2vji
2768
0.0
0.0
d1r5goy
2769
0.0
0.0
d1rfaf8
2771
0.0
0.0
d1rli1y
2772
0.0
0.0
d1uyinv
2761
0.0
0.0
d1v1nv5
2762
0.0
0.0
d1v23gd
2763
0.0
0.0
d23k9ry
2776
0.0
0.0
dd3dvp6
2413
0.0
0.0
dd3dx94
2415
0.0
0.0
dd3e1x7
2417
0.0
0.0
dd3e2ma
2411
0.0
0.0
dgmdk4o
2716
0.0
1.0
dgmfqgc
2717
1.0
0.0
dgmg44w
2718
0.0
0.0
dgmgdpz
2719
0.0
0.0
dgmh7xj
2720
0.0
0.0
dgmj3qu
2721
0.0
0.0
dgmlbfb
2722
0.0
0.0
dgmlvtq
2738
0.0
0.0
dgmmpk2
2758
0.0
0.0
dgmod3j
2739
0.0
0.0
dgmqhwg
2730
0.0
0.0
dgms2wz
2740
0.0
0.0
dgmubp1
2750
0.0
0.0
dgnizga
2754
0.0
0.0
dh6cnp1
2756
0.0
0.0
diwxy5w
2780
0.0
0.0
diwxz3b
2785
0.0
0.0
diwy18u
2788
0.0
0.0
diwy1gr
2781
0.0
0.0
diwy1rp
2802
0.0
0.0


In [102]:
#1027,4700,460,515


In [10]:
# 5-fold cross validation
from sklearn.model_selection import KFold
kf= KFold(n_splits=5)
cvscores = []
for train_index, dev_index in kf.split(x_train):
    x1,x2 = x_train[train_index],x_train[dev_index]
    y1_cat,y2_cat = y_train_cat[train_index],y_train_cat[dev_index]
    y1,y2 = y_train[train_index],y_train[dev_index]
    ind1,ind2 = ids_train[train_index],ids_train[dev_index]
    md = mod(x1,y1_cat)
    res = calculate(x2,y2,id2,md)
    cvscores.append(res)
    print(res)

In [125]:
fids_test = []    
for i in ids_test:
    fids_test.extend(i)
fy_test = y_test.flatten()
uniqtwid, uindices2 = np.unique(fids_test, return_index=True)
uniqtwid = uniqtwid.tolist()
uindices2 = uindices2.tolist()


In [129]:
uniq_dev_label = [fy_test[i] for i in uindices2] 

In [130]:
len(uniq_dev_label)

1485

In [134]:
uniq_dev_label.count(3)

30

In [144]:
x1.shape

(2492, 25, 314)

In [146]:
y1.shape

(2492, 25, 4)

In [37]:
'''
# model evaluations
y2_pred_prob = md.predict(x2)
y2_confidence = np.max(pred_prob,axis=2)
y2_pred = md.predict_classes(x2)

fids_dev = []
for i in id2:
        fids_dev.extend(i)
fy_pred = y2_pred.flatten()
fy_test = y2.flatten()
uniqtwid, uindices2 = np.unique(fids_dev, return_index=True)
uniqtwid = uniqtwid.tolist()
uindices2 = uindices2.tolist()
uniq_dev_prediction = [fy_pred[i] for i in uindices2]
uniq_dev_label = [fy_test[i] for i in uindices2]
mactest_F = f1_score(uniq_dev_prediction, uniq_dev_label, average='macro')
'''

  'recall', 'true', average, warn_for)
