In [4]:
import random

In [5]:
def load_edges(path):
    edges = []
    with open(path, "r") as f:
        for line in f:
            edge = line.strip().split()
            if edge:
                edges.append(frozenset(edge))
    return edges

edges = load_edges("soc-Epinions1.txt")
print(f"Loaded {len(edges)} edges.")
edges[:5]

Loaded 508837 edges.


[frozenset({'0', '4'}),
 frozenset({'0', '5'}),
 frozenset({'0', '7'}),
 frozenset({'0', '8'}),
 frozenset({'0', '9'})]

In [3]:
def reservoir_sampling_base(edge, k, reservoir, i):
    if i < k:
        [reservoir.append(edge), True, i]
    else:
        p = random.randint(0, i)
        if p < k:
            replace = random.randint(0, k - 1)
            reservoir[replace] = edge
            return [reservoir, True, replace]
    return [reservoir, False, -1]

k = 1000
reservoir = []

for (i, edge) in enumerate(edges):
    [reservoir, _, _] = reservoir_sampling_base(edge, k, reservoir, i)
print(reservoir)
print(len(reservoir))

[frozenset({'12697', '645'}), frozenset({'12776', '47053'}), frozenset({'7412', '3920'}), frozenset({'485', '34'}), frozenset({'20435', '57633'}), frozenset({'549', '2643'}), frozenset({'43152', '27057'}), frozenset({'14374', '5551'}), frozenset({'64755', '70427'}), frozenset({'32059', '35689'}), frozenset({'38709', '5404'}), frozenset({'1975', '2030'}), frozenset({'2199', '58'}), frozenset({'32394', '3869'}), frozenset({'185', '393'}), frozenset({'9386', '348'}), frozenset({'46441', '46442'}), frozenset({'8787', '23194'}), frozenset({'2028', '761'}), frozenset({'14125', '6692'}), frozenset({'388', '14'}), frozenset({'32077', '32074'}), frozenset({'63655', '32753'}), frozenset({'1606', '1392'}), frozenset({'917', '10703'}), frozenset({'4358', '3540'}), frozenset({'10395', '48030'}), frozenset({'12952', '2028'}), frozenset({'1033', '2420'}), frozenset({'3111', '29801'}), frozenset({'136', '105'}), frozenset({'398', '424'}), frozenset({'38695', '5392'}), frozenset({'1225', '4454'}), froz

In [4]:
def generate_wedges(edge, edge_reservoir):
    wedges = []
    total_wedges = 0
    for res_edge in edge_reservoir:
        if len(edge.intersection(res_edge)) == 1:
            wedges.append((edge, res_edge))
            total_wedges += 1
    return [total_wedges, wedges]

print(generate_wedges(frozenset(['1', '2']), reservoir))


[4, [(frozenset({'1', '2'}), frozenset({'1', '3422'})), (frozenset({'1', '2'}), frozenset({'1', '7620'})), (frozenset({'1', '2'}), frozenset({'1', '7426'})), (frozenset({'1', '2'}), frozenset({'1', '1475'}))]]


In [15]:
def streaming_triangles(re_size, rw_size, stream):
    edge_reservoir = []
    wedge_reservoir = []
    is_closed = []
    total_wedges = 0
    totalTriangles = 0

    for i, edge in enumerate(stream):
        print(f"Processing edge {i+1}/{len(stream)}", end='\r')
        print(f"Current triangle estimate: {totalTriangles}", end='\r')
        [edge_reservoir, wedge_reservoir, is_closed, total_wedges] = update(edge, edge_reservoir, wedge_reservoir, is_closed, total_wedges, re_size, rw_size)
        p = sum(is_closed) / len(is_closed) if len(is_closed) > 0 else 0
        k = 3 * p
        totalTriangles = (p*i*i)/(re_size*(re_size-1)) * total_wedges
    return totalTriangles

def update(edge, edge_reservoir, wedge_reservoir, is_closed, total_wedges, re_size, rw_size):
    [new_edge_reservoir, edge_added, replaced_index] = reservoir_sampling_base(edge, re_size, edge_reservoir, len(edge_reservoir))
    edge_reservoir = new_edge_reservoir

    [new_wedges_count, new_wedges] = generate_wedges(edge, edge_reservoir)
    total_wedges += new_wedges_count

    for wedge in new_wedges:
        if len(wedge_reservoir) < rw_size:
            wedge_reservoir.append(wedge)
            is_closed.append(0)
        else:
            p = random.randint(0, total_wedges - 1)
            if p < rw_size:
                replace_index = random.randint(0, rw_size - 1)
                wedge_reservoir[replace_index] = wedge
                is_closed[replace_index] = 0

    return [edge_reservoir, wedge_reservoir, is_closed, total_wedges]
            
streaming_triangles(1000, 1000, edges)
    

Current triangle estimate: 0.0

KeyboardInterrupt: 