In [2]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
import time
import csv

In [3]:
def exact_netx(graph):
  start = time.time()
  t = sum(nx.triangles(graph).values())/3
  end = time.time()
  return {"triangles": t, "time": end - start}

In [4]:
def exact_trace(graph):
  start = time.time()
  adj_matrix = nx.adjacency_matrix(graph, dtype = np.float64)
  adj_matrix_cubed = adj_matrix @ adj_matrix @ adj_matrix
  t = adj_matrix_cubed.trace()/6
  end = time.time()
  return {"triangles": t, "time": end - start}

In [5]:
def uniform_sampling(graph, sample_size):
  start = time.time()
  nodes = list(graph.nodes)
  n = len(nodes)
  node_sample = random.sample(nodes, sample_size)
  sample_t = 0
  for node in node_sample:
    sample_t += nx.triangles(graph, node)
  sample_t /= 3
  t = sample_t*n/sample_size
  end = time.time()
  return {"triangles": t, "time": end - start}

In [6]:
def random_sampling_with_degrees(graph, sample_size):
  start = time.time()
  nodes = []
  degrees = []
  sum_of_degrees=0
  for node in graph:
    nodes.append(node)
    degree=graph.degree(node)
    degrees.append(degree)
    sum_of_degrees+=degree
  node_sample = random.choices(nodes, weights = degrees, k = sample_size)
  sample_t = 0
  for node in node_sample:
    sample_t += nx.triangles(graph, node)/graph.degree(node)
  t = sample_t*sum_of_degrees/3
  end = time.time()
  return {"triangles": t, "time": end - start}

In [7]:
def hutchplusplus(graph, queries):
    start = time.time()
    A = nx.adjacency_matrix(graph, dtype = np.float64)
    d = A.shape[0]
    S = np.random.choice([1, -1], size = (d, queries//3))
    G = np.random.choice([1, -1], size = (d, queries//3))
    Q, R = np.linalg.qr(A @ (A @ (A @ S)))
    term_1 = np.trace(Q.T @ (A @ (A @ (A @ Q))))
    term_prod = G - Q @ (Q.T @ G)
    term_2 = 3/queries*np.trace(term_prod.T @ (A @ (A @ (A @ term_prod))))
    t = term_1/6 + (term_2 - term_1)/6
    end = time.time()
    return {"triangles": t, "time": end - start}

In [8]:
# def lanczos(A, m):
#     n = A.shape[0]
#     v = [np.random.rand(n)]
#     w_prime = [A @ v[0]]
#     alpha = [w_prime[0] * v[0]]
#     beta = []
#     w = [w_prime[0] - alpha[0] @ v[0]]
#     for j in range(2, m + 1):
#         beta_j = np.linalg.norm(w[-1])
#         beta.push(beta_j)
#         if beta_j != 0:
#             v_j = w[-1]/beta_j
#         else:
#             v_j = #TODO
#         v.push(v_j)
#         w_prime.push(A @ v[-1])
#         alpha.push(w_prime[-1]*v[-1])


In [9]:
# def eigenTriangle(graph, sample_size, tol):
#   adj_matrix = nx.adjacency_matrix(graph, dtype = np.float64)
#   nodes = list(graph.nodes)
#   n = len(nodes)
#   node_sample = random.sample(nodes, sample_size)
#   for node in node_sample:
#     lambda_1 = lanczos(A, 1)
#     eigen = lambda_1
#     i = 2

In [8]:
# Sampling Testing Function
def testing_graph_methods_sampling(graph, accuracies, times,algo):
 
    exact_netx_result = exact_netx(graph)
    exact_triangles = exact_netx_result['triangles']
    print(f"Exact NetworkX Triangles: {exact_triangles}, Time: {exact_netx_result['time']}s")
 
    total_nodes = len(graph.nodes())
    sample_sizes = [int(total_nodes * percentage / 100) for percentage in range(5,55,5) ]
    algo_name=algo.__name__
 
# create for loop for trials and another for loop for sample size according to sampling; between 5% and 50% in increments of 5% f the sample size given.
    # evaluate each method with respective names according to the methods being used
    for sample_size in sample_sizes:
            method_accuracies = []
            method_times = []
 
            # Run 10 trials for the current method and sample size
            for trial in range(1, 11):
                print(f"Trial {trial} for {algo_name} with Sample Size: {sample_size}")
                
                result = algo(graph, sample_size)
                rel_error = abs(exact_triangles - result['triangles']) / exact_triangles
                method_accuracies.append(rel_error)
 
                method_times.append(result['time'])
                avg_acc = sum(method_accuracies) / len(method_accuracies)
                accuracies.append((algo_name, sample_size, avg_acc))
            avg_time = sum(method_times) / len(method_times)
            times.append((algo_name, sample_size, avg_time))
            print(f"{algo_name} with sample size: {sample_size}: average accuracy: {avg_acc}, average time: {avg_time}s")
 
accuracies = []
times = []
 
fb_graph = nx.read_edgelist('facebook_combined.txt', create_using=nx.Graph(), nodetype=int)
 
testing_graph_methods_sampling(fb_graph, accuracies, times,uniform_sampling)
 
print("Accuracies:", accuracies)
print("Times:", times)
with open('output.csv', 'a', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(accuracies)
    writer.writerow("\n")
    writer.writerows(times)
    writer.writerow("\n")


Exact NetworkX Triangles: 1612010.0, Time: 0.5682752132415771s
Trial 1 for uniform_sampling with Sample Size: 201
Trial 2 for uniform_sampling with Sample Size: 201
Trial 3 for uniform_sampling with Sample Size: 201
Trial 4 for uniform_sampling with Sample Size: 201
Trial 5 for uniform_sampling with Sample Size: 201
Trial 6 for uniform_sampling with Sample Size: 201
Trial 7 for uniform_sampling with Sample Size: 201
Trial 8 for uniform_sampling with Sample Size: 201
Trial 9 for uniform_sampling with Sample Size: 201
Trial 10 for uniform_sampling with Sample Size: 201
uniform_sampling with sample size: 201: average accuracy: 0.13478540840461398, average time: 0.13154542446136475s
Trial 1 for uniform_sampling with Sample Size: 403
Trial 2 for uniform_sampling with Sample Size: 403
Trial 3 for uniform_sampling with Sample Size: 403
Trial 4 for uniform_sampling with Sample Size: 403
Trial 5 for uniform_sampling with Sample Size: 403
Trial 6 for uniform_sampling with Sample Size: 403
Trial 

In [11]:
#Regular Testing Function
def testing_graph_methods_regular(graph, accuracies, times,algo):
 
    exact_netx_result = exact_netx(graph)
    exact_triangles = exact_netx_result['triangles']
    print(f"Exact NetworkX Triangles: {exact_triangles}, Time: {exact_netx_result['time']}s")
 
    total_nodes = len(graph.nodes())
    algo_name=algo.__name__
 
# create for loop for trials
    method_accuracies = []
    method_times = []
    # Run 10 trials for the current method
    for trial in range(1, 11):
        print(f"Trial {trial} for {algo_name}")
        result = algo(graph)
        rel_error = abs(exact_triangles - result['triangles']) / exact_triangles
        method_accuracies.append(rel_error)
        method_times.append(result['time'])
        avg_acc = sum(method_accuracies) / len(method_accuracies)
        accuracies.append((algo_name, avg_acc))
        avg_time = sum(method_times) / len(method_times)
        times.append((algo_name, avg_time))
        print(f"{algo_name} with average accuracy: {avg_acc}, average time: {avg_time}s")
 
 
accuracies = []
times = []
 
fb_graph = nx.read_edgelist('facebook_combined.txt', create_using=nx.Graph(), nodetype=int)
 
testing_graph_methods_regular(fb_graph, accuracies, times,exact_trace)
 
print("Accuracies:", accuracies)
print("Times:", times)
with open('output.csv', 'a', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(accuracies)
    writer.writerow("\n")
    writer.writerows(times)
    writer.writerow("\n")



Exact NetworkX Triangles: 1612010.0, Time: 0.48077869415283203s
Trial 1 for exact_trace
exact_trace with average accuracy: 0.0, average time: 1.404020071029663s
Trial 2 for exact_trace
exact_trace with average accuracy: 0.0, average time: 1.1388757228851318s
Trial 3 for exact_trace
exact_trace with average accuracy: 0.0, average time: 0.9810603459676107s
Trial 4 for exact_trace
exact_trace with average accuracy: 0.0, average time: 0.9242104291915894s
Trial 5 for exact_trace
exact_trace with average accuracy: 0.0, average time: 0.875962209701538s
Trial 6 for exact_trace
exact_trace with average accuracy: 0.0, average time: 0.8502724170684814s
Trial 7 for exact_trace
exact_trace with average accuracy: 0.0, average time: 0.8630331584385463s
Trial 8 for exact_trace
exact_trace with average accuracy: 0.0, average time: 0.8562715947628021s
Trial 9 for exact_trace
exact_trace with average accuracy: 0.0, average time: 0.8520763715108236s
Trial 10 for exact_trace
exact_trace with average accura