In [5]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
import time


In [6]:
def exact_netx(graph):
  start = time.time()
  t = sum(nx.triangles(graph).values())/3
  end = time.time()
  return {"triangles": t, "time": end - start}

In [7]:
def exact_trace(graph):
  start = time.time()
  adj_matrix = nx.adjacency_matrix(graph, dtype = np.float64)
  adj_matrix_cubed = adj_matrix @ adj_matrix @ adj_matrix
  t = adj_matrix_cubed.trace()/6
  end = time.time()
  return {"triangles": t, "time": end - start}

In [8]:
def uniform_sampling(graph, sample_size):
  start = time.time()
  nodes = list(graph.nodes)
  n = len(nodes)
  node_sample = random.sample(nodes, sample_size)
  sample_t = 0
  for node in node_sample:
    sample_t += nx.triangles(graph, node)
  sample_t /= 3
  t = sample_t*n/sample_size
  end = time.time()
  return {"triangles": t, "time": end - start}

In [10]:
def random_sampling_with_degrees(graph, sample_size):
  start = time.time()
  nodes = []
  degrees = []
  sum_of_degrees=0
  for node in graph:
    nodes.append(node)
    degree=graph.degree(node)
    degrees.append(degree)
    sum_of_degrees+=degree
  node_sample = random.choices(nodes, weights = degrees, k = sample_size)
  sample_t = 0
  for node in node_sample:
    sample_t += nx.triangles(graph, node)/graph.degree(node)
  t = sample_t*sum_of_degrees/3
  end = time.time()
  return {"triangles": t, "time": end - start}

In [11]:
def hutchplusplus(graph, queries):
    start = time.time()
    #to fix
    #fast operations
    #A.A.A.S
    adj = nx.adjacency_matrix(graph, dtype = np.float64)
    d = adj.shape[0]
    S = np.random.choice([1, -1], size = (d, queries/3))
    G = np.random.choice([1, -1], size = (d, queries/3))
    Q, R = np.linalg.qr(A @ S)
    trace = np.trace(np.transpose(Q) @ A @ Q) + 3/queries*(np.trace(np.transpose(G) @ (np.eye(d) - Q @ np.transpose(Q)) @ A @ (np.eye(d) - Q @ np.transpose(Q)) @ G))
    t = trace/6
    end = time.time()
    return {"triangles": t, "time": end - start}

In [8]:
# def lanczos(A, m):
#     n = A.shape[0]
#     v = [np.random.rand(n)]
#     w_prime = [A @ v[0]]
#     alpha = [w_prime[0] * v[0]]
#     beta = []
#     w = [w_prime[0] - alpha[0] @ v[0]]
#     for j in range(2, m + 1):
#         beta_j = np.linalg.norm(w[-1])
#         beta.push(beta_j)
#         if beta_j != 0:
#             v_j = w[-1]/beta_j
#         else:
#             v_j = #TODO
#         v.push(v_j)
#         w_prime.push(A @ v[-1])
#         alpha.push(w_prime[-1]*v[-1])


In [9]:
# def eigenTriangle(graph, sample_size, tol):
#   adj_matrix = nx.adjacency_matrix(graph, dtype = np.float64)
#   nodes = list(graph.nodes)
#   n = len(nodes)
#   node_sample = random.sample(nodes, sample_size)
#   for node in node_sample:
#     lambda_1 = lanczos(A, 1)
#     eigen = lambda_1
#     i = 2

In [12]:
def testing_graph_methods(graph, accuracies, times):
 
    exact_netx_result = exact_netx(graph)
    exact_triangles = exact_netx_result['triangles']
    print(f"Exact NetworkX Triangles: {exact_triangles}, Time: {exact_netx_result['time']}s")
 
    total_nodes = len(graph.nodes())
    sample_sizes = [int(total_nodes * percentage / 100) for percentage in range(1, 51)]
 
    methods = {
        'exact_netx': exact_netx,
        'exact_trace': exact_trace,
        'uniform_sampling': lambda g: uniform_sampling(g, 404),
        # sample size is currently 404 but can be changed throughout testing
        'random_sampling_with_degrees': lambda g: random_sampling_with_degrees(g, 404),
        # 'hutchplusplus': lambda g: hutchplusplus(g, queries)  # Uncomment when fixed by Om
    }
# create for loop for trials and another for loop for sample size according to sampling; between 1% and 50% of the sample size given.
    # evaluate each method with respective names according to the methods being used
    for method_name, method_func in methods.items():
        for sample_size in sample_sizes:
            
            method_accuracies = []
            method_times = []
 
            # Run 10 trials for the current method and sample size
            for trial in range(1, 11):
                print(f"Trial {trial} for {method_name} with Sample Size: {sample_size}")
                
                if method_name in ['uniform_sampling', 'random_sampling_with_degrees']:
                    result = method_func(graph, sample_size)
                else:
                    result = method_func(graph)
 
                if method_name not in ['exact_netx', 'exact_trace']:
                    rel_error = abs(exact_triangles - result['triangles']) / exact_triangles
                    method_accuracies.append(rel_error)
 
                method_times.append(result['time'])
 
            if method_accuracies:
                avg_acc = sum(method_accuracies) / len(method_accuracies)
                accuracies.append((method_name, sample_size, avg_acc))
            avg_time = sum(method_times) / len(method_times)
            times.append((method_name, sample_size, avg_time))
            print(f"{method_name} with sample size: {sample_size}: average accuracy: {avg_acc if method_accuracies else 'n/a'}, average time: {avg_time}s")
 
 
accuracies = []
times = []
 
fb_graph = nx.read_edgelist('facebook_combined.txt', create_using=nx.Graph(), nodetype=int)
 
testing_graph_methods(fb_graph, accuracies, times)
 
print("Accuracies:", accuracies)
print("Times:", times)

{'triangles': 1612010.0, 'time': 0.6290624141693115}


ModuleNotFoundError: No module named 'scipy'