In [13]:
"""
This script benchmarks various network filtering techniques on simulated networks.

It performs the following steps:
1. Imports necessary modules and adds the parent directory to sys.path.
2. Loads simulated networks from pickle files.
3. Initializes a Filter instance with different filtering techniques.
4. Applies each filtering technique to each network and calculates the Jaccard distance.
5. Prints the results for each network and filtering technique.

The script uses custom modules for network filtering and benchmarking, and relies on
NetworkX for graph operations.
"""

import networkx as nx
import pickle
import sys
import os
import glob

# Add the parent directory to sys.path to allow importing from src
# In a Jupyter notebook, we can't use __file__, so we'll use a different approach

# Get the current working directory
current_dir = os.getcwd()

# Get the parent directory (src)
parent_dir = os.path.dirname(current_dir)

# Add the parent directory to sys.path if it's not already there
if parent_dir not in sys.path:
	sys.path.append(parent_dir)

# Now try to import the modules
try:
	from net_filtering.filter import Filter
	from benchmark.net_filter_bench import bench_net2net_filtering
	from benchmark.net2net_indicators import calculate_jaccard_distance
except ImportError as e:
	print(f"Error importing modules: {e}")
	print("Please ensure that the 'src' directory structure is correct and contains the necessary modules.")
	raise

# Get all simulated network files
simulated_nets_path = '../../data/simulated_nets/*weighted*.pickle'
simulated_net_files = glob.glob(simulated_nets_path)

if not simulated_net_files:
	print(f"Error: No pickle files found in {simulated_nets_path}")
	print("Please ensure that the directory exists and contains pickle files.")
	raise FileNotFoundError

# Create an instance of the Filter class
filter_instance = Filter()

# List of filtering techniques
filtering_funcs = [
	filter_instance.mst,
	filter_instance.pmfg,
	filter_instance.threshold,
	filter_instance.local_degree_sparsifier,
	filter_instance.random_edge_sparsifier,
	filter_instance.simmelian_sparsifier,
	filter_instance.disparity_filter,
	filter_instance.overlapping_trees,
	filter_instance.k_core_decomposition
]

# Run bench_net2net_filtering for each technique and each network
results = {}
for net_file in simulated_net_files:
	net_name = os.path.basename(net_file).split('.')[0]
	results[net_name] = {}
	print(net_name)

	# Load the network
	try:
		with open(net_file, 'rb') as f:
			original_net = pickle.load(f)
	except Exception as e:
		print(f"Error loading {net_file}: {e}")
		continue

	for func in filtering_funcs:
		# Prepare the filter function with default parameters
		if func == filter_instance.threshold:
			filter_func = lambda G: func(G, threshold=0.5)
		elif func in [filter_instance.local_degree_sparsifier, filter_instance.random_edge_sparsifier]:
			filter_func = lambda G: func(G, target_ratio=0.5)
		else:
			filter_func = func

		# Run the benchmark
		score = bench_net2net_filtering(original_net, filter_func, calculate_jaccard_distance)
		results[net_name][func.__name__] = score

# Print results
for net_name, net_results in results.items():
	print(f"\nResults for {net_name}:")
	for technique, score in net_results.items():
		print(f"  {technique}: Jaccard Distance = {score:.4f}")


barabasi_albert_graph_weighted
random_graph_weighted
grid_graph_weighted

Results for barabasi_albert_graph_weighted:
  mst: Jaccard Distance = 0.5468
  pmfg: Jaccard Distance = 0.2100
  threshold: Jaccard Distance = 0.6221
  local_degree_sparsifier: Jaccard Distance = 0.5000
  random_edge_sparsifier: Jaccard Distance = 0.5000
  simmelian_sparsifier: Jaccard Distance = 0.0000
  disparity_filter: Jaccard Distance = 0.9694
  overlapping_trees: Jaccard Distance = 0.1378
  k_core_decomposition: Jaccard Distance = 0.0000

Results for random_graph_weighted:
  mst: Jaccard Distance = 0.8852
  pmfg: Jaccard Distance = 0.7751
  threshold: Jaccard Distance = 0.7354
  local_degree_sparsifier: Jaccard Distance = 0.5000
  random_edge_sparsifier: Jaccard Distance = 0.5000
  simmelian_sparsifier: Jaccard Distance = 0.2848
  disparity_filter: Jaccard Distance = 1.0000
  overlapping_trees: Jaccard Distance = 0.5084
  k_core_decomposition: Jaccard Distance = 0.0633

Results for grid_graph_weighted:
  ms

In [11]:
from benchmark.net_filter_bench import bench_noise_filtering

# Run bench_noise_filtering for each technique and each network
results = {}
for net_file in simulated_net_files:
	net_name = os.path.basename(net_file).split('.')[0]
	results[net_name] = {}
	
	# Load the network
	try:
		with open(net_file, 'rb') as f:
			original_net = pickle.load(f)
	except Exception as e:
		print(f"Error loading {net_file}: {e}")
		continue

	for func in filtering_funcs:
		# Prepare the filter function with default parameters
		if func == filter_instance.threshold:
			filter_func = lambda G: func(G, threshold=0.5)
		elif func in [filter_instance.local_degree_sparsifier, filter_instance.random_edge_sparsifier]:
			filter_func = lambda G: func(G, target_ratio=0.5)
		else:
			filter_func = func

		# Run the benchmark
		score = bench_noise_filtering(original_net, filter_func)
		results[net_name][func.__name__] = score

# Print results
for net_name, net_results in results.items():
	print(f"\nResults for {net_name}:")
	for technique, score in net_results.items():
		print(f"  {technique}: Jaccard Score = {score:.4f}")



Results for random_graph:
  local_degree_sparsifier: Jaccard Score = 0.4260
  random_edge_sparsifier: Jaccard Score = 0.4082
  simmelian_sparsifier: Jaccard Score = 0.5323
  disparity_filter: Jaccard Score = 0.0000
  overlapping_trees: Jaccard Score = 0.3521
  k_core_decomposition: Jaccard Score = 0.6271

Results for barabasi_albert_graph:
  local_degree_sparsifier: Jaccard Score = 0.4298
  random_edge_sparsifier: Jaccard Score = 0.4342
  simmelian_sparsifier: Jaccard Score = 0.7626
  disparity_filter: Jaccard Score = 0.0000
  overlapping_trees: Jaccard Score = 0.6074
  k_core_decomposition: Jaccard Score = 0.2819

Results for grid_graph:
  local_degree_sparsifier: Jaccard Score = 0.6650
  random_edge_sparsifier: Jaccard Score = 0.3992
  simmelian_sparsifier: Jaccard Score = 0.7519
  disparity_filter: Jaccard Score = 0.0000
  overlapping_trees: Jaccard Score = 0.6617
  k_core_decomposition: Jaccard Score = 1.0000
