# Import datasets and libraries

In [None]:
!gdown 1SaSq8kwvNmxq2HoQBenhXC3ejM8BU70d
!gdown 1uGv2afj67P9BGEMwFPyv_IopjMzaqMuG

In [None]:
import pandas as pd
import numpy as np
import keras
from keras.layers import Input, Dense, GRU, Embedding, Dropout, LSTM, Concatenate, SimpleRNN, Bidirectional
from keras.models import Model
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences

In [None]:
def str_to_npa(s):
  data_list = s.split(' ')
  c = 0
  for x in data_list:
    if x == '':
      c += 1
  for i in range(c):
    data_list.remove('')
  data_array = np.array([float(num) for num in data_list])
  return data_array

#encoding for testing
model_enc = {1 : 'RNN',
             2 : 'BiRNN',
             3 : 'GRU',
             4 : 'BiGRU',
             5 : 'LSTM',
             6 : 'BiLSTM'}

d1 = pd.read_csv('t15_text_n2v.csv', encoding='utf-8')
d2 = pd.read_csv('t16_text_n2v.csv', encoding='utf-8')
d1['n2v'] = d1['n2v'].apply(lambda x: x.replace('[', ''))
d1['n2v'] = d1['n2v'].apply(lambda x: x.replace(']', ''))
d1['n2v'] = d1['n2v'].apply(lambda x: str_to_npa(x))

d2['n2v'] = d2['n2v'].apply(lambda x: x.replace('[', ''))
d2['n2v'] = d2['n2v'].apply(lambda x: x.replace(']', ''))
d2['n2v'] = d2['n2v'].apply(lambda x: str_to_npa(x))

# Twitter15

In [None]:
bertweet_embeddings1 = np.load('t15_bertweet_emb.npy')
d1['bertweet_embeddings'] = list(bertweet_embeddings1)

## Without node embeddings

In [None]:
from sklearn.model_selection import train_test_split
train_embeddings, test_embeddings, train_labels, test_labels = train_test_split(
    np.array(d1['bertweet_embeddings'].tolist()), d1['label'].values, test_size=0.2, random_state=42
)

def RNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = SimpleRNN(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def BiRNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = Bidirectional(SimpleRNN(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def GRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = GRU(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def BiGRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = Bidirectional(GRU(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def LSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = LSTM(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def BiLSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = Bidirectional(LSTM(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

In [None]:
for j in range(1, 11):
  random_state = random.randint(1, 100)
  train_embeddings, test_embeddings, train_labels, test_labels = train_test_split(
        np.array(d1['bertweet_embeddings'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
  filename = "results/t15/bertweet/without/output" + str(j) + ".txt"
  file = open(filename, "a")
  for i in model_enc.keys():
      if model_enc[i] == 'RNN':
        model = RNN_model()
      elif model_enc[i] == 'BiRNN':
        model = BiRNN_model()
      elif model_enc[i] == 'GRU':
        model = GRU_model()
      elif model_enc[i] == 'BiGRU':
        model = BiGRU_model()
      elif model_enc[i] == 'LSTM':
        model = LSTM_model()
      else:
        model = BiLSTM_model()
      model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])
      print(f"Training Model {model_enc[i]}")
      model.fit(train_embeddings, train_labels, epochs=30, batch_size=32, validation_split=0.2)
      score = model.evaluate(test_embeddings, test_labels, verbose=0)
      file.write(f"Model {model_enc[i]} Test Accuracy: {score[1]}" + '\n')
      y_pred = model.predict(test_embeddings)
      y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
      report = classification_report(test_labels, y_pred)
      file.write(report + '\n')

  file.close()

## With Node2Vec Node Embeddings

In [None]:
from sklearn.model_selection import train_test_split
train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
    np.array(d1['bertweet_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=42
)
def RNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = SimpleRNN(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiRNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = Bidirectional(SimpleRNN(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def GRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = GRU(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiGRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = Bidirectional(GRU(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def LSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = LSTM(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiLSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = Bidirectional(LSTM(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

In [None]:
for j in range(1, 11):
  random_state = random.randint(1, 100)
  train_embeddings, test_embeddings, train_dw, test_dw, train_labels, test_labels = train_test_split(
        np.array(d1['bertweet_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
  filename = "results/t15/bertweet/n2v/output" + str(j) + ".txt"
  file = open(filename, "a")
  for i in model_enc.keys():
      if model_enc[i] == 'RNN':
        model = RNN_model()
      elif model_enc[i] == 'BiRNN':
        model = BiRNN_model()
      elif model_enc[i] == 'GRU':
        model = GRU_model()
      elif model_enc[i] == 'BiGRU':
        model = BiGRU_model()
      elif model_enc[i] == 'LSTM':
        model = LSTM_model()
      else:
        model = BiLSTM_model()
      model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])
      print(f"Training Model {model_enc[i]}")
      model.fit([train_embeddings, train_dw], train_labels, epochs=30, batch_size=32, validation_split=0.2)
      score = model.evaluate([test_embeddings, test_dw], test_labels, verbose=0)
      file.write(f"Model {model_enc[i]} Test Accuracy: {score[1]}" + '\n')
      y_pred = model.predict([test_embeddings, test_dw])
      y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
      report = classification_report(test_labels, y_pred)
      file.write(report + '\n')

  file.close()

## With DeepWalk Node Embeddings

In [None]:
node_emb = np.load('100d/t15_dw_emb.npy')
d1['dw'] = list(node_emb)

from sklearn.model_selection import train_test_split
train_embeddings, test_embeddings, train_dw, test_dw, train_labels, test_labels = train_test_split(
    np.array(d1['bertweet_embeddings'].tolist()), np.array(d1['dw'].tolist()), d1['label'].values, test_size=0.2, random_state=42
)

def RNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = SimpleRNN(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiRNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = Bidirectional(SimpleRNN(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def GRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = GRU(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiGRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = Bidirectional(GRU(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def LSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = LSTM(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiLSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = Bidirectional(LSTM(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

In [None]:
for j in range(1, 11):
  random_state = random.randint(1, 100)
  train_embeddings, test_embeddings, train_dw, test_dw, train_labels, test_labels = train_test_split(
        np.array(d1['bertweet_embeddings'].tolist()), np.array(d1['dw'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
  filename = "results/t15/bertweet/dw/output" + str(j) + ".txt"
  file = open(filename, "a")
  for i in model_enc.keys():
      if model_enc[i] == 'RNN':
        model = RNN_model()
      elif model_enc[i] == 'BiRNN':
        model = BiRNN_model()
      elif model_enc[i] == 'GRU':
        model = GRU_model()
      elif model_enc[i] == 'BiGRU':
        model = BiGRU_model()
      elif model_enc[i] == 'LSTM':
        model = LSTM_model()
      else:
        model = BiLSTM_model()
      model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])
      print(f"Training Model {model_enc[i]}")
      model.fit([train_embeddings, train_dw], train_labels, epochs=30, batch_size=32, validation_split=0.2)
      score = model.evaluate([test_embeddings, test_dw], test_labels, verbose=0)
      file.write(f"Model {model_enc[i]} Test Accuracy: {score[1]}" + '\n')
      y_pred = model.predict([test_embeddings, test_dw])
      y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
      report = classification_report(test_labels, y_pred)
      file.write(report + '\n')

  file.close()

# Twitter16

In [None]:
bert_embeddings1 = np.load('t16_bertweet_emb.npy')
d1['bertweet_embeddings'] = list(bert_embeddings1)

## Without Node Embedding

In [None]:
from sklearn.model_selection import train_test_split
train_embeddings, test_embeddings, train_labels, test_labels = train_test_split(
    np.array(d2['bertweet_embeddings'].tolist()), d2['label'].values, test_size=0.2, random_state=42
)

def RNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = SimpleRNN(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def BiRNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = Bidirectional(SimpleRNN(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def GRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = GRU(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def BiGRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = Bidirectional(GRU(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def LSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = LSTM(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

def BiLSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  rnn_output = Bidirectional(LSTM(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  hidden = Dense(32, activation='relu')(rnn_output)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=text_input, outputs=output)
  return model

In [None]:
for j in range(1, 11):
  random_state = random.randint(1, 100)
  train_embeddings, test_embeddings, train_labels, test_labels = train_test_split(
        np.array(d2['bertweet_embeddings'].tolist()), d2['label'].values, test_size=0.2, random_state=random_state
    )
  filename = "results/t16/bertweet/without/output" + str(j) + ".txt"
  file = open(filename, "a")
  for i in model_enc.keys():
      if model_enc[i] == 'RNN':
        model = RNN_model()
      elif model_enc[i] == 'BiRNN':
        model = BiRNN_model()
      elif model_enc[i] == 'GRU':
        model = GRU_model()
      elif model_enc[i] == 'BiGRU':
        model = BiGRU_model()
      elif model_enc[i] == 'LSTM':
        model = LSTM_model()
      else:
        model = BiLSTM_model()
      model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])
      print(f"Training Model {model_enc[i]}")
      model.fit(train_embeddings, train_labels, epochs=30, batch_size=32, validation_split=0.2)
      score = model.evaluate(test_embeddings, test_labels, verbose=0)
      file.write(f"Model {model_enc[i]} Test Accuracy: {score[1]}" + '\n')
      y_pred = model.predict(test_embeddings)
      y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
      report = classification_report(test_labels, y_pred)
      file.write(report + '\n')

  file.close()

## With Node2Vec Node Embeddings

In [None]:
from sklearn.model_selection import train_test_split
train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
    np.array(d2['bertweet_embeddings'].tolist()), np.array(d2['n2v'].tolist()), d2['label'].values, test_size=0.2, random_state=42
)
def RNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = SimpleRNN(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiRNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = Bidirectional(SimpleRNN(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def GRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = GRU(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiGRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = Bidirectional(GRU(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def LSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = LSTM(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiLSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = Bidirectional(LSTM(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

In [None]:
for j in range(1, 11):
  random_state = random.randint(1, 100)
  train_embeddings, test_embeddings, train_dw, test_dw, train_labels, test_labels = train_test_split(
        np.array(d2['bertweet_embeddings'].tolist()), np.array(d2['n2v'].tolist()), d2['label'].values, test_size=0.2, random_state=random_state
    )
  filename = "results/t16/bertweet/n2v/output" + str(j) + ".txt"
  file = open(filename, "a")
  for i in model_enc.keys():
      if model_enc[i] == 'RNN':
        model = RNN_model()
      elif model_enc[i] == 'BiRNN':
        model = BiRNN_model()
      elif model_enc[i] == 'GRU':
        model = GRU_model()
      elif model_enc[i] == 'BiGRU':
        model = BiGRU_model()
      elif model_enc[i] == 'LSTM':
        model = LSTM_model()
      else:
        model = BiLSTM_model()
      model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])
      print(f"Training Model {model_enc[i]}")
      model.fit([train_embeddings, train_dw], train_labels, epochs=30, batch_size=32, validation_split=0.2)
      score = model.evaluate([test_embeddings, test_dw], test_labels, verbose=0)
      file.write(f"Model {model_enc[i]} Test Accuracy: {score[1]}" + '\n')
      y_pred = model.predict([test_embeddings, test_dw])
      y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
      report = classification_report(test_labels, y_pred)
      file.write(report + '\n')

  file.close()

## With DeepWalk Node Embeddings

In [None]:
node_emb = np.load('100d/t16_dw_emb.npy')
d2['dw'] = list(node_emb)

from sklearn.model_selection import train_test_split
train_embeddings, test_embeddings, train_dw, test_dw, train_labels, test_labels = train_test_split(
    np.array(d2['bertweet_embeddings'].tolist()), np.array(d2['dw'].tolist()), d2['label'].values, test_size=0.2, random_state=42
)

def RNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = SimpleRNN(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiRNN_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = Bidirectional(SimpleRNN(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def GRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = GRU(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiGRU_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = Bidirectional(GRU(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def LSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = LSTM(64)(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

def BiLSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_dw.shape[1],), name='graph_input')
  rnn_output = Bidirectional(LSTM(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

In [None]:
for j in range(1, 11):
  random_state = random.randint(1, 100)
  train_embeddings, test_embeddings, train_dw, test_dw, train_labels, test_labels = train_test_split(
        np.array(d2['bertweet_embeddings'].tolist()), np.array(d2['dw'].tolist()), d2['label'].values, test_size=0.2, random_state=random_state
    )
  filename = "results/t16/bertweet/dw/output" + str(j) + ".txt"
  file = open(filename, "a")
  for i in model_enc.keys():
      if model_enc[i] == 'RNN':
        model = RNN_model()
      elif model_enc[i] == 'BiRNN':
        model = BiRNN_model()
      elif model_enc[i] == 'GRU':
        model = GRU_model()
      elif model_enc[i] == 'BiGRU':
        model = BiGRU_model()
      elif model_enc[i] == 'LSTM':
        model = LSTM_model()
      else:
        model = BiLSTM_model()
      model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])
      print(f"Training Model {model_enc[i]}")
      model.fit([train_embeddings, train_dw], train_labels, epochs=30, batch_size=32, validation_split=0.2)
      score = model.evaluate([test_embeddings, test_dw], test_labels, verbose=0)
      file.write(f"Model {model_enc[i]} Test Accuracy: {score[1]}" + '\n')
      y_pred = model.predict([test_embeddings, test_dw])
      y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
      report = classification_report(test_labels, y_pred)
      file.write(report + '\n')

  file.close()