# Import datasets and libraries

In [None]:
!gdown 1SaSq8kwvNmxq2HoQBenhXC3ejM8BU70d
!gdown 1uGv2afj67P9BGEMwFPyv_IopjMzaqMuG

In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
from keras.layers import Input, Dense, GRU, Embedding, Dropout, LSTM, Concatenate, SimpleRNN, Bidirectional
from keras.models import Model

In [None]:
def list_str_to_npa(s):
  data_list = s.split(' ')
  data_array = np.array([float(num) for num in data_list])
  return data_array

In [None]:
d1 = pd.read_csv('t15_text_n2v.csv', encoding='utf-8')
bert_embeddings1 = np.load('t15_bert_emb.npy')
d1['bert_embeddings'] = list(bert_embeddings1)

In [None]:
from sklearn.model_selection import train_test_split
train_embeddings, test_embeddings, train_dw, test_dw, train_labels, test_labels = train_test_split(
    np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=42
)

def BiLSTM_model():
  text_input = Input(shape=(train_embeddings.shape[1], train_embeddings.shape[2]), name='text_input')
  graph_input = Input(shape=(train_n2v.shape[1],), name='graph_input')
  rnn_output = Bidirectional(LSTM(64))(text_input)
  rnn_output = Dropout(0.2)(rnn_output)
  graph_output = Dense(32, activation='relu')(graph_input)
  concatenated = Concatenate()([rnn_output, graph_output])
  hidden = Dense(32, activation='relu')(concatenated)
  output = Dense(1, activation='sigmoid')(hidden)
  model = Model(inputs=[text_input, graph_input], outputs=output)
  return model

# 32 dimensions

In [None]:
emb_lists = []
for i in range(1, 7):
    filename = f"t15/32d/output_{i}.txt"
    with open(filename, "r") as file:
        emb_list = [line.strip() for line in file]
        emb_lists.append(emb_list)

n2v_1, n2v_2, n2v_3, n2v_4, n2v_5, n2v_6 = emb_lists

## (P, Q) = (1, 1)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/32d/11.txt', 'a') as f:
  d1['n2v'] = n2v_1
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (0.5, 1)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/32d/051.txt', 'a') as f:
  d1['n2v'] = n2v_2
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (1, 0.5)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/32d/105.txt', 'a') as f:
  d1['n2v'] = n2v_3
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (0.5, 0.5)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/32d/0505.txt', 'a') as f:
  d1['n2v'] = n2v_4
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (2, 1)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/32d/21.txt', 'a') as f:
  d1['n2v'] = n2v_5
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (1, 2)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/32d/21.txt', 'a') as f:
  d1['n2v'] = n2v_6
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

# 100 dimensions

In [None]:
emb_lists = []
for i in range(1, 7):
    filename = f"t15/100d/output_{i}.txt"
    with open(filename, "r") as file:
        emb_list = [line.strip() for line in file]
        emb_lists.append(emb_list)

n2v_1, n2v_2, n2v_3, n2v_4, n2v_5, n2v_6 = emb_lists

## (P, Q) = (1, 1)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/100d/11.txt', 'a') as f:
  d1['n2v'] = n2v_1
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (0.5, 1)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/100d/051.txt', 'a') as f:
  d1['n2v'] = n2v_2
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (1, 0.5)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/100d/105.txt', 'a') as f:
  d1['n2v'] = n2v_3
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (0.5, 0.5)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/100d/0505.txt', 'a') as f:
  d1['n2v'] = n2v_4
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (2, 1)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/100d/21.txt', 'a') as f:
  d1['n2v'] = n2v_5
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')

## (P, Q) = (1, 2)

In [None]:
d1 = d1.drop('n2v', axis=1)
with open('t15/100d/12.txt', 'a') as f:
  d1['n2v'] = n2v_6
  d1['n2v'] = d1['n2v'].apply(lambda x: list_str_to_npa(x))
  for i in range(1, 11):
    model = BiLSTM_model()
    model.compile(loss='binary_crossentropy',
                  optimizer='SGD',
                  metrics=['accuracy'])
    random_state = random.randint(1, 100)
    f.write(f"iteration_{i}" + '\n')
    train_embeddings, test_embeddings, train_n2v, test_n2v, train_labels, test_labels = train_test_split(
      np.array(d1['bert_embeddings'].tolist()), np.array(d1['n2v'].tolist()), d1['label'].values, test_size=0.2, random_state=random_state
    )
    model.fit([train_embeddings, train_n2v], train_labels, epochs=30, batch_size=32, validation_split=0.2)
    y_pred = model.predict([test_embeddings, test_n2v])
    y_pred = [1.0 if p > 0.5 else 0 for p in y_pred]
    report = classification_report(test_labels, y_pred)
    f.write(report + '\n')