# Homework 4

In [1]:
import networkx as nx

import random

## Visualize data

We chose the Google web graph at https://snap.stanford.edu/data/web-Google.html.

In [24]:
folder = 'data'
file = 'web-NotreDame.txt'

def read_graph(folder, file):
    with open(folder + '/' + file, 'r') as f:
        graph = nx.DiGraph()
        # skip first 4 lines
        for i in range(4):
            f.readline()
        # each line is an edge (pair of nodes)
        # counter = 0
        for line in f:
            # if counter == 1000:
            #     break
            edge = line.split()
            graph.add_edge(int(edge[0]), int(edge[1]))
            # counter += 1
    return graph

In [25]:
G = read_graph(folder, file)

print("Number of nodes: ", G.number_of_nodes())
print("Number of edges: ", G.number_of_edges())

Number of nodes:  325729
Number of edges:  1497134


In [None]:
# visualize the graph
# import matplotlib.pyplot as plt
# %matplotlib inline

# plt.figure(figsize=(10, 10))
# nx.draw(G, node_size=10)
# plt.show()

In [26]:
# convert G to undirected graph
G_undirected = G.to_undirected()

print("Number of nodes: ", G_undirected.number_of_nodes())
print("Number of edges: ", G_undirected.number_of_edges())

Number of nodes:  325729
Number of edges:  1117563


In [28]:
# calculate number of triangles
triangles = nx.triangles(G_undirected)

print("Number of triangles: ", sum(triangles.values()) / 3)

Number of triangles:  8910005.0


## Streaming graph algorithm

In [10]:
stream = []

edge_res = [] # list of edges (i, j)
wedge_res = [] # list of wedges (i, j, k)
isClosed = [] # boolean array
tot_wedges = 0 # this is the total number of wedges formed by edges in the current edge res

def closed_by(wedge, edge):
    if edge[0] == wedge[2] and edge[1] == wedge[0]:
        return True
    return False

def update_tot_wedges():
    global tot_wedges

    for i in range(len(edge_res)):
        for j in range(i + 1, len(edge_res)):
            intersection = set(edge_res[i]).intersection(set(edge_res[j]))
            if len(intersection) == 1 and (edge_res[i][1] == edge_res[j][0] or edge_res[i][0] == edge_res[j][1]):
                tot_wedges += 1

def calculate_N_t(edge_t):
    N_t_list = []

    for i in range(len(edge_res)):
        intersection = set(edge_t).intersection(set(edge_res[i]))
        if len(intersection) == 1 and (edge_res[i][1] == edge_t[0] or edge_res[i][0] == edge_t[1]):
            N_t_list.append(i)

    return N_t_list

def streaming_triangles(s_e, s_w):
    global edge_res, wedge_res

    k_t = []
    T_t = []

    # initialize edge_res, wedge_res with size s_e, s_w
    for i in range(s_e):
        edge_res.append((0, 0))
    for i in range(s_w):
        wedge_res.append((0, 0, 0))
        isClosed.append(False)

    for edge_t, t in zip(stream, range(1, len(stream) + 1)):
        update(edge_t, s_e, s_w, t)
        # count the fraction of entries in isClosed set to true
        p = sum(isClosed) / len(isClosed) if len(isClosed) != 0 else 0
        k_t.append(3 * p)
        T_t.append((p * t * t / s_e * (s_e - 1)) * tot_wedges)

    return T_t

def update(edge_t, s_e, s_w, t):
    global isClosed, tot_wedges, edge_res, wedge_res

    for i in range(s_w):
        if closed_by(wedge_res[i], edge_t):
            isClosed[i] = True

    edge_res_old = edge_res.copy()

    for i in range(s_e):
        # pick a random number from 0 to 1
        x = random.random()
        if x <= 1 / t:
            edge_res[i] = edge_t
        # if there were any updates of edge res
    if edge_res != edge_res_old:
        # update tot_wedges, the number of wedges formed by edge_res
        update_tot_wedges()
        N_t_list = calculate_N_t(edge_t)
        new_wedges = len(N_t_list)

        # print(edge_res)
        # print(wedge_res)

        for i in range(s_w):
            x = random.random()
            ratio = new_wedges / tot_wedges if tot_wedges != 0 else 0
            if x <= ratio:
                wedge_res[i] = (edge_t[0], edge_res[i][0], edge_res[i][1])
                wedge_res[i] = (edge_res[i][0], edge_res[i][1], edge_t[1])
                # pick uniform random wedge in N_t_list
                i = random.randint(0, len(N_t_list) - 1)
                index = N_t_list[i]
                wedge_res[index] = (edge_res[index][0], edge_t[0], edge_t[1]) # TODO: check if we save it in the right order
                isClosed[index] = False

In [12]:
stream = [(1, 2), (1, 3), (4, 1), (2, 3), (2, 4), (3, 4)]

print(streaming_triangles(6, 6))

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


## Final questions

1. What were the challenges you faced when implementing the algorithm?
2. Can the algorithm be easily parallelized? If yes, how? If not, why? Explain.
3. Does the algorithm work for unbounded graph streams? Explain.
4. Does the algorithm support edge deletions? If not, what modification would it need? Explain.