# Import Datasets and libraries

In [None]:
# Downloading csv datasets for Twitter15 and Twitter16 with links
!gdown 1SaSq8kwvNmxq2HoQBenhXC3ejM8BU70d
!gdown 1uGv2afj67P9BGEMwFPyv_IopjMzaqMuG
!gdown 1jfWwc8g-rS0G3oS5oKsydq8QXU7vev72
!gdown 1z0vGTX5LGaMn-zjSpT9uIePXwi0qDBUz
!mkdir Twitter15
!mkdir Twitter16
!unzip tree15.zip -d 'Twitter15'
!unzip tree16.zip -d 'Twitter16'
!pip install node2vec

In [None]:
from node2vec import Node2Vec as n2v
import networkx as nx
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np

In [None]:
d1 = pd.read_csv('t15_text_n2v.csv', encoding='utf-8')
d2 = pd.read_csv('t16_text_n2v.csv', encoding='utf-8')

In [1]:
# functions for reading a graph from a tree_file, and for drawing

tree_dir = '/content/Twitter15/tree'
def read_graph_from_file(tree_dir, filename):
    with open(os.path.join(tree_dir, filename), 'r') as file:
        G = nx.DiGraph()
        for line in file:
            if '->' in line:
                parent_node, child_node = line.strip().split('->')
                G.add_edge(parent_node, child_node)
    return G
def draw_graph(G):
    plt.figure(figsize=(10, 8))
    pos = nx.kamada_kawai_layout(G)
    node_options = {"node_color": "red", "node_size":30}
    edge_options = {"width": .5, "alpha": .5, "edge_color":"black"}
    nx.draw_networkx_nodes(G, pos, **node_options)
    nx.draw_networkx_edges(G, pos, **edge_options)
    plt.show()

# 32-dimensional Node2Vec Embeddings for Twitter15 and Twitter16

Each cell has its parameters declared for running separately

In [None]:
# Twitter15 Node2Vec Embeddings
emb_lists = []
dimensions = 32
walk_length = 10
num_walks = 10
window = 10
min_count = 1
batch_words = 4
pq_values = [(1, 1), (0.5, 1), (1, 0.5), (0.5, 0.5), (2, 1), (1, 2)]

for i, pq in enumerate(pq_values, start=1):
    emb_list = []
    p, q = pq

    for tweet_id in d1['tweet_id']:
        filename = str(tweet_id) + ".txt"
        G = read_graph_from_file(tree_dir, filename)
        model = n2v(G, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, p=p, q=q, workers=4)
        model = model.fit(window=window, min_count=min_count, batch_words=batch_words)
        #node embedding of the source tweet -> node_0 = root, node_1 = source
        node = list(G.nodes())[1]
        emb_list.append(model.wv.get_vector(node))

    emb_lists.append(emb_list)

    with open(f"32d/t15_output_{i}.txt", "w") as file:
        for emb in emb_list:
            emb_str = ' '.join(str(x) for x in emb)
            file.write(emb_str + '\n')

In [None]:
#Twitter16 Node2Vec Embeddings
emb_lists = []
dimensions = 100
walk_length = 10
num_walks = 10
window = 10
min_count = 1
batch_words = 4
pq_values = [(1, 1), (0.5, 1), (1, 0.5), (0.5, 0.5), (2, 1), (1, 2)]

for i, pq in enumerate(pq_values, start=1):
    emb_list = []
    p, q = pq

    for tweet_id in d2['tweet_id']:
        filename = str(tweet_id) + ".txt"
        G = read_graph_from_file(tree_dir, filename)
        model = n2v(G, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, p=p, q=q, workers=4)
        model = model.fit(window=window, min_count=min_count, batch_words=batch_words)
        #node embedding of the source tweet -> node_0 = root, node_1 = source
        node = list(G.nodes())[1]
        emb_list.append(model.wv.get_vector(node))

    emb_lists.append(emb_list)

    with open(f"32d/t16_output_{i}.txt", "w") as file:
        for emb in emb_list:
            emb_str = ' '.join(str(x) for x in emb)
            file.write(emb_str + '\n')

# 100-dimensional Node2Vec Embeddings for Twitter15 and Twitter16

In [None]:
# Twitter15 Node2Vec Embeddings
emb_lists = []
dimensions = 100
walk_length = 10
num_walks = 10
window = 10
min_count = 1
batch_words = 4
pq_values = [(1, 1), (0.5, 1), (1, 0.5), (0.5, 0.5), (2, 1), (1, 2)]

for i, pq in enumerate(pq_values, start=1):
    emb_list = []
    p, q = pq

    for tweet_id in d1['tweet_id']:
        filename = str(tweet_id) + ".txt"
        G = read_graph_from_file(tree_dir, filename)
        model = n2v(G, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, p=p, q=q, workers=4)
        model = model.fit(window=window, min_count=min_count, batch_words=batch_words)
        #node embedding of the source tweet -> node_0 = root, node_1 = source
        node = list(G.nodes())[1]
        emb_list.append(model.wv.get_vector(node))

    emb_lists.append(emb_list)

    with open(f"100d/t15_output_{i}.txt", "w") as file:
        for emb in emb_list:
            emb_str = ' '.join(str(x) for x in emb)
            file.write(emb_str + '\n')

In [None]:
#Twitter16 Node2Vec Embeddings
emb_lists = []
dimensions = 100
walk_length = 10
num_walks = 10
window = 10
min_count = 1
batch_words = 4
pq_values = [(1, 1), (0.5, 1), (1, 0.5), (0.5, 0.5), (2, 1), (1, 2)]

for i, pq in enumerate(pq_values, start=1):
    emb_list = []
    p, q = pq

    for tweet_id in d2['tweet_id']:
        filename = str(tweet_id) + ".txt"
        G = read_graph_from_file(tree_dir, filename)
        model = n2v(G, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, p=p, q=q, workers=4)
        model = model.fit(window=window, min_count=min_count, batch_words=batch_words)
        #node embedding of the source tweet -> node_0 = root, node_1 = source
        node = list(G.nodes())[1]
        emb_list.append(model.wv.get_vector(node))

    emb_lists.append(emb_list)

    with open(f"100d/t16_output_{i}.txt", "w") as file:
        for emb in emb_list:
            emb_str = ' '.join(str(x) for x in emb)
            file.write(emb_str + '\n')