In [None]:
import snap
import random
import zlib
import csv
from scipy import stats # for scipy.stats.kstest
import statistics

# Graph Sampling

In [None]:


def get_color(vertex, k, perm):
    return perm[vertex % 1000] % k
    #return zlib.adler32(str(vertex + seed).encode('UTF-8')) % k

def graph_sampling(stream, k):
    perm = list(range(1000))
    random.shuffle(perm)
    
    G = snap.TUNGraph.New()
    for edge in stream:
        u = int(edge[0])
        v = int(edge[1])
        if (get_color(u, k, perm) == get_color(v, k, perm)):
            if (G.IsNode(u) == False):
                G.AddNode(u)
            if (G.IsNode(v) == False):
                G.AddNode(v)
            G.AddEdge(u, v)
    return G

def graph_sampling_file(path, seperator, k):
    f = open(path, newline='')
    stream = csv.reader(f, delimiter=seperator)
    
    return graph_sampling(stream, k)

# Triangle and Open-Wedge Heavy-Hitter

In [None]:
def triangle_hh(G, a, e):
    T = snap.GetTriads(G)
    Ht = []
    for vp in G.Nodes():
        v = vp.GetId()
        Tv = snap.GetNodeTriads(G, v)
        if (Tv > (a - e/2)*T):
            Ht.append(v)
    return Ht

In [None]:
def open_wedge_hh(G, a, e):
    W = snap.GetTriadsAll(G)[2]
    Hw = []
    for vp in G.Nodes():
        v = vp.GetId()
        Wv = snap.GetNodeTriadsAll(G, v)[2]
        if (Wv > (a - e/2)*W):
            Hw.append(v)
    return Hw

# Exact Computation

In [None]:
def exact_computation(path, seperator, a):
    G = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1, seperator)
    
    total = snap.GetTriadsAll(G)

    Ht = []
    Hw = []

    TriadV = snap.TIntTrV()
    snap.GetTriads(G, TriadV)
    for vr in TriadV:
        if (vr.GetVal2() > a*total[1]):
            Ht.append(vr.GetVal1())
        if (vr.GetVal3() > a*total[2]):
            Hw.append(vr.GetVal1())
    return [Ht, Hw]

# Experiment

## NotreDame

In [None]:
%%time

a = 0.000001
e = a/1.2

G = graph_sampling_file('data/web-NotreDame.txt', '\t', 6)

Ht = triangle_hh(G, a, e)
Hw = open_wedge_hh(G, a, e)

print(len(Ht), len(Hw))

In [None]:
%%time

# exact
H = exact_computation('data/web-NotreDame.txt', '\t', a)
print(len(H[0]), len(H[1]))

## YouTube

In [None]:
%%time

G = graph_sampling_file('data/com-youtube.ungraph.txt', '\t', 8)

Ht = triangle_hh(G, 0.005, 0.005/2)
Hw = open_wedge_hh(G, 0.005, 0.005/2)

print(len(Ht), len(Hw))

In [None]:
%%time

H = exact_computation('data/com-youtube.ungraph.txt', '\t', 0.005)
print(len(H[0]), len(H[1]))