In [1]:
import numpy as np
import pandas as pd
import re, os
import datetime
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler,normalize
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.utils.np_utils import to_categorical
from keras.layers import Dense, Input, Concatenate, BatchNormalization, Activation,LayerNormalization
from keras.layers import Lambda, Embedding, GRU, Bidirectional, TimeDistributed, concatenate,Flatten,GlobalAveragePooling1D
from keras.models import Model
from keras import optimizers
from keras import backend as K
from keras.layers import Layer
from keras import initializers
from word2vecReader import Word2Vec
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
import pickle
# from tensorflow.python.keras.optimizers import adam_v2
# from tensorflow.python.keras.optimizers import rmsprop_v2
import tensorflow as tf
from tensorflow import keras
from  keras import layers
from time import process_time

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
K.set_learning_phase(1)
np.random.seed(0)
MAX_SENT_LENGTH = 20  #number of words in a sentence
MAX_NB_WORDS = 20000
POST_DIM = 400
INFO_DIM = 30
VALIDATION_SPLIT = 0.2



In [4]:
##slice tensor function in keras
def crop(dimension, start, end):
    # Crops (or slices) a Tensor on a given dimension from start to end
    # example : to crop tensor x[:, :, 5:10]
    # call slice(2, 5, 10) as you want to crop on the second dimension
    def func(x):
        if dimension == 0:
            return x[start: end]
        if dimension == 1:
            return x[:, start: end]
        if dimension == 2:
            return x[:, :, start: end]
        if dimension == 3:
            return x[:, :, :, start: end]
        if dimension == 4:
            return x[:, :, :, :, start: end]

    return Lambda(func)


def myFunc(x):
    if "empety" in x:
        return False
    else:
        return True


def clean_str(string):
    """
    Tokenization/string cleaning for dataset
    Every dataset is lower cased except
    """
    string = re.sub(r"\\", "", string)
    string = re.sub(r"\'", "", string)
    string = re.sub(r"\"", "", string)
    string = string.strip().lower()
    word_tokens = word_tokenize(string)
    filtered_words = [word for word in word_tokens if word not in stopwords.words('english')]
    return filtered_words


def find_str(s, char):
    index = 0

    if char in s:
        c = char[0]
        for ch in s:
            if ch == c:
                if s[index:index + len(char)] == char:
                    return index

            index += 1


class AttLayer(Layer):
    def __init__(self, attention_dim):
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = attention_dim
        super(AttLayer, self).__init__()

    def build(self, input_shape):
        assert len(input_shape) == 3
        self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
        self.b = K.variable(self.init((self.attention_dim,)))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self.trainable_weightss = [self.W, self.b, self.u]
        super(AttLayer, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

    
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(
            query, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(
            key, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(
            value, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output


'''Transformer的Encoder部分'''


class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


'''Transformer输入的编码层'''


class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions


In [5]:
with open('instagram.pickle', 'rb') as handle:
    dictionary = pickle.load(handle)
texts = dictionary['text']
texts = texts.fillna("")
# texts=[text.encode('ascii') for text in texts]
#print(texts)
comments = dictionary['comments']
#print(comments)
timeInfo = dictionary['time']
#print(timeInfo)
postInfo = dictionary['post']
#print(postInfo)

#print(postInfo)
labels = dictionary['labels']
#print(labels)
b = np.zeros([len(timeInfo), len(max(timeInfo, key=lambda x: len(x)))])
for i, j in enumerate(timeInfo):
    b[i][0:len(j)] = j
timeInfo = b
#print(b)
time_size = len(np.unique(timeInfo))
MAX_SENTS = len(timeInfo[0])  ####number of sentences

c = np.zeros([len(postInfo), len(max(postInfo, key=lambda x: len(x)))])
for i, j in enumerate(postInfo):
    c[i][0:len(j)] = j
median_value = np.median(c)
c = np.where(c > 10000000,median_value , c)
postInfo = c
#print(postInfo)
post_size = len(np.unique(postInfo))
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)
#print(MAX_SENTS)
data = np.zeros((len(texts), MAX_SENTS, MAX_SENT_LENGTH+1), dtype='int32')
#print(tokenizer.word_index['club'])

for i, sentences in enumerate(comments):
    for j, sent in enumerate(sentences):
        if j < MAX_SENTS:
            wordTokens = text_to_word_sequence(sent)
            k = 0
            for word in wordTokens:
                #print(type(wordTokens[0]))
                #print("'{}'".format(word))
                if k < MAX_SENT_LENGTH and word in tokenizer.word_index:
                    data[i, j, k] = tokenizer.word_index[word]
                    k = k + 1
#print(data)

word_index = tokenizer.word_index
print('Total %s unique tokens.' % len(word_index))
single_label = np.asarray(labels)
labels = to_categorical(np.asarray(labels))
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)

###standardize
transfer = StandardScaler()
timeInfo = transfer.fit_transform(timeInfo)
postInfo = transfer.fit_transform(postInfo)
HAN_pre = []
HAN_reca = []
HAN_f1 = []
HAN_AUC = []
HAN_TIME = []
embeddings_index = Word2Vec.load_word2vec_format("word2vec_twitter_model.bin", binary=True, )  #

# print('Total %s word vectors.' % len(embeddings_index))
embedding_matrix = np.random.random((len(word_index) + 1, POST_DIM))
outword_dic = dict()
for word, i in word_index.items():
    if word in embeddings_index.vocab:
        embedding_vector = embeddings_index[word]
        embedding_matrix[i] = embedding_vector
    else:
        new_vector = np.random.rand(POST_DIM, )
        outword_dic.setdefault(word, new_vector)
        embedding_matrix[i] = outword_dic[word]

Total 8009 unique tokens.
Shape of data tensor: (2121, 192, 21)
Shape of label tensor: (2121, 2)


In [6]:
for j in range(10):
    start_time = process_time()
    indices = np.arange(data.shape[0])
    #print(indices)
    np.random.shuffle(indices)
    data1 = data[indices]
    #print(data1.shape)
    labels1 = labels[indices]
    single_label1 = single_label[indices]
    timeInfo1 = timeInfo[indices]
    #print(timeInfo1.shape)
    timeInfo1 = timeInfo1.reshape((2121, MAX_SENTS, 1))
    print(timeInfo1.shape)
    data1 = np.dstack((data1, timeInfo1))
    postInfo1=postInfo[indices]
    #print(postInfo1.shape)
    nb_validation_samples = int(VALIDATION_SPLIT * data1.shape[0])
    zeros = np.zeros(2121)
    zeros = zeros.reshape((2121, 1, 1))

    x_train = data1[:-nb_validation_samples]
    m=crop(1, 0, MAX_SENT_LENGTH)(x_train)
    # print(m)
    y_train = labels1[:-nb_validation_samples]
    #print(y_train.shape)
    zeros_train = zeros[:-nb_validation_samples]
    time_train = timeInfo1[:-nb_validation_samples]
    post_train = postInfo1[:-nb_validation_samples]
    median_value1 = np.median(post_train)
    post_train = np.where(np.isnan(post_train), 0, post_train)
    x_val = data1[-nb_validation_samples:]
    y_val = labels1[-nb_validation_samples:]
    zeros_test = zeros[-nb_validation_samples:]
    time_test = timeInfo1[-nb_validation_samples:]
    post_test = postInfo1[-nb_validation_samples:]
    median_value2 = np.median(post_test)
    post_test = np.where(np.isnan(post_test), 0, post_test)
    y_single = single_label1[-nb_validation_samples:]

    print('Number of positive and negative posts in training and test set')
    print(y_train.sum(axis=0))
    print(y_val.sum(axis=0))

    # building Hierachical Attention network

    embedding_layer = Embedding(len(word_index) + 1,
                                POST_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SENT_LENGTH,
                                trainable=True,
                                mask_zero=True)

    all_input = Input(shape=(MAX_SENT_LENGTH + 2,))
    sentence_input = crop(1, 0, MAX_SENT_LENGTH)(all_input)  ##slice
    time_input = crop(1, MAX_SENT_LENGTH, MAX_SENT_LENGTH + 2)(all_input)  ##slice
    embed_dim = 200  # Embedding size for each token
    num_heads = 2  # Number of attention heads
    ff_dim = 200  # Hidden layer size in feed forward network inside transformer
    vocab_size = 20000
    embedding_layer1 = TokenAndPositionEmbedding(MAX_SENT_LENGTH, vocab_size, embed_dim)
    x = embedding_layer1(sentence_input)
    transformer_block1 = TransformerBlock(embed_dim, num_heads, ff_dim)
    l_trans = transformer_block1(x)
    l_att = GlobalAveragePooling1D()(l_trans)
    l_att = Dense(200, activation='sigmoid')(l_att)  ####(?,200)
    # time_embedding=Dense(TIME_DIM,activation='sigmoid')(time_input)
    merged_output = Concatenate()([l_att, time_input])  ###text+time information
    sentEncoder = Model(all_input, merged_output)

    review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH+2))
    review_encoder = TimeDistributed(sentEncoder)(review_input)
    transformer_block2 = TransformerBlock(202, num_heads, ff_dim)
    l_lstm_sent = transformer_block2(review_encoder)
    # pred_time=Dense(1,activation='relu')(l_lstm_sent)
    fully_sent = Dense(1, use_bias=False)(l_lstm_sent)
    norm_fullysent = BatchNormalization()(fully_sent)
    pred_time = Activation(activation='linear')(norm_fullysent)

    zero_input = Input(shape=(1, 1))
    shift_predtime = Concatenate(axis=1)([zero_input, pred_time])
    shift_predtime = crop(1, 0, MAX_SENTS)(shift_predtime)
    l_att_sent = GlobalAveragePooling1D()(l_lstm_sent)
    l_att_sent = Dense(200, activation='sigmoid')(l_att_sent)

    ###embed the #likes, shares
    post_input = Input(shape=(4,))
    #print(post_input)
    # post_embedding = Dense(INFO_DIM, activation='sigmoid')(post_input)
    fully_post = Dense(INFO_DIM, use_bias=False)(post_input)
    norm_fullypost = BatchNormalization()(fully_post)
    post_embedding = Activation(activation='relu')(norm_fullypost)
    x = concatenate([l_att_sent,
                     post_embedding])  ###merge the document level vectro with the additional embedded features such as #likes
    fully_review = Dense(2, use_bias=False)(x)
    norm_fullyreview = BatchNormalization()(fully_review)
    preds = Activation(activation='softmax')(norm_fullyreview)

    rmsprop = optimizers.Adam(learning_rate=0.001, decay=0.99)
    model = Model(inputs=[review_input, post_input, zero_input], outputs=[preds, shift_predtime])
    # print(model.summary())
    model.compile(loss=['binary_crossentropy', 'mse'], loss_weights=[1, 0.00002],
                  optimizer=rmsprop)
    # filepath = "weights/weights-improvement-{epoch:02d}-{loss:.2f}.hdf5"
    # checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
    # callbacks_list = [EarlyStopping(monitor='loss', patience=1,mode='min'),checkpoint]

    print("model fitting - Hierachical attention network for cyberbullying detection")

    model.fit([x_train, post_train, zeros_train], [y_train, time_train], batch_size=64,
              epochs=25, verbose=1)
    yp = model.predict([x_val, post_test, zeros_test], verbose=1)
    ypreds = yp[0]
    ypreds = np.argmax(ypreds, axis=1)
 #print y_single
    #print ypred
    f1=precision_recall_fscore_support(y_single.astype(int), ypreds)   # <==
    auc=roc_auc_score(y_single.astype('int'), ypreds)  #<==
    f1 = precision_recall_fscore_support(y_single.astype(int), ypreds)  # <==
    auc = roc_auc_score(y_single.astype('int'), ypreds)  # <== category
    end_time = process_time()
    cpu_time = end_time - start_time
    print("cpu_time:")
    print(cpu_time)
    print("f1:")
    print(f1)
    print("auc:")
    print(auc)
    HAN_TIME.append(cpu_time)
    HAN_AUC.append(auc)
    HAN_f1.append(f1[2][1])
    HAN_reca.append(f1[1][1])
    HAN_pre.append(f1[0][1])

    #for t-sne visualization
    # if j==0:
    #     a=model.layers
    #     get_representations_test = K.function([model.layers[0].input,model.layers[1].input,model.layers[12].input], [model.layers[6].output])
    #     representations_test = get_representations_test([x_val,post_test,zeros_test])[0]
    #     representation_dict = {
    #         'representations': representations_test,
    #         'labels': y_single
    #     }
    #
    #     with open('HANCD_Tem_results.pickle', 'wb') as handle:
    #         pickle.dump(representation_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    # K.clear_session()

print(HAN_AUC)
print(HAN_f1)
print(HAN_pre)
print(HAN_reca)
print(HAN_TIME)
print ("TIME",np.mean(HAN_TIME), np.std(HAN_TIME))
print ("AUC",np.mean(HAN_AUC), np.std(HAN_AUC))
print ("f1", np.mean(HAN_f1), np.std(HAN_f1))
print ("precision",np.mean(HAN_pre), np.std(HAN_pre))
print ("recall", np.mean(HAN_reca), np.std(HAN_reca))


(2121, 192, 1)
Number of positive and negative posts in training and test set
[1170.  527.]
[292. 132.]
model fitting - Hierachical attention network for cyberbullying detection
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
cpu_time:
32127.421875
f1:
(array([0.75722543, 0.61538462]), array([0.89726027, 0.36363636]), array([0.82131661, 0.45714286]), array([292, 132], dtype=int64))
auc:
0.6304483188044833
(2121, 192, 1)
Number of positive and negative posts in training and test set
[1166.  531.]
[296. 128.]
model fitting - Hierachical attention network for cyberbullying detection
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25

Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
cpu_time:
33965.90625
f1:
(array([0.76231884, 0.44303797]), array([0.85667752, 0.2991453 ]), array([0.80674847, 0.35714286]), array([307, 117], dtype=int64))
auc:
0.5779114117876333
(2121, 192, 1)
Number of positive and negative posts in training and test set
[1175.  522.]
[287. 137.]
model fitting - Hierachical attention network for cyberbullying detection
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
cpu_time:
32804.515625
f1:
(array([0.7254902 , 0.33850932]), array([0.25783972, 0.79562044]), array([0.38046272, 

Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
cpu_time:
32779.296875
f1:
(array([0.74635569, 0.65432099]), array([0.90140845, 0.37857143]), array([0.81658692, 0.47963801]), array([284, 140], dtype=int64))
auc:
0.639989939637827
(2121, 192, 1)
Number of positive and negative posts in training and test set
[1162.  535.]
[300. 124.]
model fitting - Hierachical attention network for cyberbullying detection
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
cpu_time:
32287.875
f1:
(array([0.75354108, 0.52112676]), array([0.88666667, 0.2983871 ]), array([0.81470138, 0.37948718]), array([300, 124], dtype=int64))
a

Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
cpu_time:
32533.640625
f1:
(array([0.75949367, 0.58333333]), array([0.84210526, 0.45323741]), array([0.79866889, 0.51012146]), array([285, 139], dtype=int64))
auc:
0.6476713366149185
(2121, 192, 1)
Number of positive and negative posts in training and test set
[1164.  533.]
[298. 126.]
model fitting - Hierachical attention network for cyberbullying detection
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
cpu_time:
32736.109375
f1:
(array([0.76315789, 0.45      ]), array([0.77852349, 0.42857143]), array([0.77076412, 0.43902439]), array([298, 126], dtype=int64))
auc:
0.6035474592521572
(2121, 192, 1)
Number

Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
cpu_time:
32467.546875
f1:
(array([0.76282051, 0.54464286]), array([0.82352941, 0.45185185]), array([0.79201331, 0.49392713]), array([289, 135], dtype=int64))
auc:
0.6376906318082788
(2121, 192, 1)
Number of positive and negative posts in training and test set
[1170.  527.]
[292. 132.]
model fitting - Hierachical attention network for cyberbullying detection
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
cpu_time:
34795.546875
f1:
(array([0.74695122, 0.51041667]), array([0.8390411 , 0.37121212]), array([0.79032258, 0.42982456]), array([292, 132], dtype=int64))
auc:
0.605126608551266
[0.6304483188044833, 0.6238386824324325, 0.5779114117876333, 0.5267300

In [7]:
model.summary()

Model: "model_19"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_38 (InputLayer)          [(None, 192, 22)]    0           []                               
                                                                                                  
 time_distributed_9 (TimeDistri  (None, 192, 202)    4286200     ['input_38[0][0]']               
 buted)                                                                                           
                                                                                                  
 input_40 (InputLayer)          [(None, 4)]          0           []                               
                                                                                                  
 transformer_block_19 (Transfor  (None, 192, 202)    246034      ['time_distributed_9[0][0]

In [15]:
sentEncoder.summary()

Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_23 (InputLayer)          [(None, 22)]         0           []                               
                                                                                                  
 lambda_24 (Lambda)             (None, 20)           0           ['input_23[0][0]']               
                                                                                                  
 token_and_position_embedding_6  (None, 20, 200)     4004000     ['lambda_24[0][0]']              
  (TokenAndPositionEmbedding)                                                                     
                                                                                                  
 transformer_block_12 (Transfor  (None, 20, 200)     242000      ['token_and_position_embed