In [None]:
import sys
sys.path.insert(0, '..')

In [None]:
from config.path_config import GraphPaths, PlotPaths, NodePairPaths
from config.constants import Constants

import networkx as nx
import csv
import os
import random
random.seed(42)

In [None]:
def get_seed_nodes(path):
    with open(path) as f:
        content = f.readlines()
    content = [x.strip() for x in content] 
    return content

In [None]:
tw_real = nx.read_edgelist(GraphPaths.ione_raw_tw)
fs_real = nx.read_edgelist(GraphPaths.ione_raw_fs)

In [None]:
print(tw_real.number_of_nodes(), tw_real.number_of_edges())
print(fs_real.number_of_nodes(), fs_real.number_of_edges())

In [None]:
for (i, j) in tw_real.edges():
    tw_real.edges[i,j]['weight'] = 1
for (i, j) in fs_real.edges():
    fs_real.edges[i,j]['weight'] = 1

In [None]:
u_suffix = lambda label : str(label) + '-u'
v_suffix = lambda label : str(label) + '-v'

In [None]:
relabeled_tw_real = nx.relabel_nodes(tw_real, u_suffix)
relabeled_fs_real = nx.relabel_nodes(fs_real, v_suffix)

In [None]:
out_dir = os.path.dirname(GraphPaths.ione.format(nw='tw'))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
nx.write_edgelist(relabeled_tw_real, GraphPaths.ione.format(nw='tw'), data=['weight'])
nx.write_edgelist(relabeled_fs_real, GraphPaths.ione.format(nw='fs'), data=['weight'])

In [None]:
def common_neighborhood(graph_1, graph_2, pair):
    """
    :param graph1: networkx graph 1
    :param graph2: networkx graph 2
    :param pair: expected a tuple for pair of nodes
    """
    l1 = list(nx.all_neighbors(graph_1, pair[0]))
    l2 = list(nx.all_neighbors(graph_2, pair[1]))
    union = list(set().union(l1, l2))
    intersection = list(set(l1).intersection(l2))
    cn = len(intersection) / len(union)
    return cn

In [None]:
def random_walk(graph, path_length, alpha=0, rand=random.Random(), start=None, is_start_node_first_node=True):
    if start:
        path = [start]
    else:
      # Sampling is uniform w.r.t V, and not w.r.t E
      path = [rand.choice(list(graph.nodes()))]
    while len(path) < path_length:
        cur = path[-1]
        if len(graph[cur]) > 0:
#             print(cur, list(graph[cur]))
            if rand.random() >= alpha:
                path.append(rand.choice(list(graph[cur])))
            else:
                path.append(path[0])
        else:
            break
    if is_start_node_first_node:
        return [tuple((start, node)) for node in path]
    else:
        return [tuple((node, start)) for node in path]
#     return [str(node) for node in path]

In [None]:
def write_sampled_nodepairs(graph1, graph2, random_walk_path_len, seed_nodes, path_seed, path_nonseed, path_combined):
    
    with open(path_seed, 'w') as f_seed, open(path_nonseed, 'w') as f_nonseed, open(path_combined, 'w') as f_combined:
        writer_seed = csv.writer(f_seed, delimiter=' ')
        writer_nonseed = csv.writer(f_nonseed, delimiter=' ')
        writer_combined = csv.writer(f_combined, delimiter=' ')
        total_num_nodepairs = 0
        
        for node in seed_nodes:
            assert(node in graph1 and node in graph2)
            
            random_walk_pairs = random_walk(graph2, random_walk_path_len, start=node)
            random_walk_pairs.extend(random_walk(graph1, random_walk_path_len, start=node, is_start_node_first_node=False))

            for node1, node2 in random_walk_pairs:
                # Duplicates will need be handled when reading; networkx handles duplicate edges
                assert(node1 in graph1 and node2 in graph2)
                cn = common_neighborhood(graph1, graph2, tuple((node1, node2)))
                if cn > 0:
                    if node1 == node2:
                        writer_seed.writerow([node1+u_suffix, node2+v_suffix, cn])
                    else:
                        writer_nonseed.writerow([node1+u_suffix, node2+v_suffix, cn])
                    writer_combined.writerow([node1+u_suffix, node2+v_suffix, cn])
                    total_num_nodepairs += 1
    print(f'Wrote {total_num_nodepairs} node pairs combined')

In [None]:
seed_nodes = get_seed_nodes(GraphPaths.ione_raw_gt)
print(len(seed_nodes))
u_suffix = '-u'
v_suffix = '-v'

In [None]:
DEFAULT_WALK_LENGTH = 20
out_dir = os.path.dirname(NodePairPaths.ione_nodepairs.format(type='seed'))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
write_sampled_nodepairs(tw_real, fs_real, DEFAULT_WALK_LENGTH, seed_nodes, 
                        NodePairPaths.ione_nodepairs.format(type='seed'),
                        NodePairPaths.ione_nodepairs.format(type='nonseed'),
                        NodePairPaths.ione_nodepairs.format(type='combined'))