# Import Datasets and libraries


In [None]:
# Downloading csv datasets for Twitter15 and Twitter16 with links
!gdown 1SaSq8kwvNmxq2HoQBenhXC3ejM8BU70d
!gdown 1uGv2afj67P9BGEMwFPyv_IopjMzaqMuG
!gdown 1jfWwc8g-rS0G3oS5oKsydq8QXU7vev72
!gdown 1z0vGTX5LGaMn-zjSpT9uIePXwi0qDBUz
!mkdir Twitter15
!mkdir Twitter16
!unzip tree15.zip -d 'Twitter15'
!unzip tree16.zip -d 'Twitter16'
!pip install karateclub

In [None]:
from karateclub import DeepWalk
import networkx as nx
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np

In [None]:
d1 = pd.read_csv('t15_text_n2v.csv', encoding='utf-8')
d2 = pd.read_csv('t16_text_n2v.csv', encoding='utf-8')

In [None]:
# functions for reading a graph from a tree_file, and for drawing

def read_graph_from_file(tree_dir, filename):
    with open(os.path.join(tree_dir, filename), 'r') as file:
        G = nx.DiGraph()
        for line in file:
            if '->' in line:
                parent_node, child_node = line.strip().split('->')
                G.add_edge(parent_node, child_node)
    return G
def draw_graph(G):
    plt.figure(figsize=(10, 8))
    pos = nx.kamada_kawai_layout(G)
    node_options = {"node_color": "red", "node_size":30}
    edge_options = {"width": .5, "alpha": .5, "edge_color":"black"}
    nx.draw_networkx_nodes(G, pos, **node_options)
    nx.draw_networkx_edges(G, pos, **edge_options)
    plt.show()

# 32-dimensional DeepWalk Embeddings for Twitter15 and Twitter16

In [None]:
tree_dir = '/content/Twitter15/tree'

In [None]:
t15_32d_emb_list = []
for tweet_id in d1['tweet_id']:
    d = {}
    filename = str(tweet_id) + ".txt"
    G = read_graph_from_file(tree_dir, filename)
    model = DeepWalk(walk_length=10, dimensions=32, window_size=5)
    nodes = G.nodes()
    nodes_list = list(nodes)
    d = {nodes_list[i]: i for i in range(len(nodes_list))}
    H = nx.relabel_nodes(G, d)
    model.fit(H)
    embs = model.get_embedding()
    emb = embs[1]
    t15_32d_emb_list.append(emb)

In [None]:
t16_32d_emb_list = []
for tweet_id in d2['tweet_id']:
    d = {}
    filename = str(tweet_id) + ".txt"
    G = read_graph_from_file(tree_dir, filename)
    model = DeepWalk(walk_length=10, dimensions=32, window_size=5)
    nodes = G.nodes()
    nodes_list = list(nodes)
    d = {nodes_list[i]: i for i in range(len(nodes_list))}
    H = nx.relabel_nodes(G, d)
    model.fit(H)
    embs = model.get_embedding()
    emb = embs[1]
    t16_32d_emb_list.append(emb)

#100-dimensional DeepWalk Embeddings on Twitter15 and Twitter16

In [None]:
tree_dir = '/content/Twitter16/tree'

In [None]:
t15_100d_emb_list = []
for tweet_id in d1['tweet_id']:
    d = {}
    filename = str(tweet_id) + ".txt"
    G = read_graph_from_file(tree_dir, filename)
    model = DeepWalk(walk_length=10, dimensions=100, window_size=5)
    nodes = G.nodes()
    nodes_list = list(nodes)
    d = {nodes_list[i]: i for i in range(len(nodes_list))}
    H = nx.relabel_nodes(G, d)
    model.fit(H)
    embs = model.get_embedding()
    emb = embs[1]
    t15_100d_emb_list.append(emb)

In [None]:
t16_100d_emb_list = []
for tweet_id in d2['tweet_id']:
    d = {}
    filename = str(tweet_id) + ".txt"
    G = read_graph_from_file(tree_dir, filename)
    model = DeepWalk(walk_length=10, dimensions=100, window_size=5)
    nodes = G.nodes()
    nodes_list = list(nodes)
    d = {nodes_list[i]: i for i in range(len(nodes_list))}
    H = nx.relabel_nodes(G, d)
    model.fit(H)
    embs = model.get_embedding()
    emb = embs[1]
    t16_100d_emb_list.append(emb)

# Saving Embeddings

In [None]:
np.save('32d/t15_dw_emb.npy', t15_32d_emb_list)
np.save('32d/t16_dw_emb.npy', t16_32d_emb_list)
np.save('100d/t15_dw_emb.npy', t15_100d_emb_list)
np.save('100d/t15_dw_emb.npy', t15_100d_emb_list)