In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import time
import sys
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error

class HistogramDistanceEstimator:
    def __init__(self, histograms_dir="../../large_files/traditional_methods/histogram"):
        """
        Initialize the histogram-based distance estimator
        """
        self.histograms_dir = histograms_dir
        self.histograms = {}
        self.metadata = {}
        self.cache = {}
        
        os.makedirs(f"{histograms_dir}/results/distance", exist_ok=True)
        os.makedirs(f"{histograms_dir}/visualizations/distance", exist_ok=True)
        
        self.load_histograms()
    
    def load_histograms(self):
        """Load all available histograms from the histograms directory."""
        files = os.listdir(self.histograms_dir)
        histogram_files = [f for f in files if f.endswith('_histogram.npy')]
        
        for hf in histogram_files:
            dataset_name = hf.replace('_histogram.npy', '')
            metadata_file = f"{dataset_name}_metadata.npy"
            
            if os.path.exists(os.path.join(self.histograms_dir, metadata_file)):
                print(f"Loading histogram for {dataset_name}...")
                sys.stdout.flush()
                self.histograms[dataset_name] = np.load(
                    os.path.join(self.histograms_dir, hf)
                )
                self.metadata[dataset_name] = np.load(
                    os.path.join(self.histograms_dir, metadata_file),
                    allow_pickle=True
                ).item()
                
        print(f"Loaded {len(self.histograms)} histograms")
        sys.stdout.flush()

    def parse_mbr(self, mbr_str):
        """Parse MBR string from '(x1, y1, x2, y2)'."""
        if isinstance(mbr_str, str):
            coords = mbr_str.strip('"()').split(', ')
            return [float(coord) for coord in coords]
        return mbr_str
    
    def estimate_distance_count(self, dataset_name, object_mbr, min_distance, max_distance):
        """
        Optimized method to estimate objects within a distance range using rectangle pre-filtering
        """
        # Handle caching and input parsing as before
        if isinstance(object_mbr, list):
            object_mbr = tuple(object_mbr)
        
        cache_key = f"{dataset_name}_{object_mbr}_{min_distance}_{max_distance}"
        if cache_key in self.cache:
            return self.cache[cache_key]
        
        if dataset_name not in self.histograms:
            raise ValueError(f"No histogram found for {dataset_name}")
        
        if isinstance(object_mbr, str):
            object_mbr = self.parse_mbr(object_mbr)
            object_mbr = tuple(object_mbr)
        
        grid = self.histograms[dataset_name]
        metadata = self.metadata[dataset_name]
        grid_dim_x, grid_dim_y = metadata['dimensions']
        univ_xmin, univ_ymin, univ_xmax, univ_ymax = metadata['universe']
        
        # Extract object MBR coordinates and calculate center
        o_xmin, o_ymin, o_xmax, o_ymax = object_mbr
        o_center_x = (o_xmin + o_xmax) / 2
        o_center_y = (o_ymin + o_ymax) / 2
        
        # Calculate cell dimensions
        cell_width = (univ_xmax - univ_xmin) / grid_dim_x
        cell_height = (univ_ymax - univ_ymin) / grid_dim_y
        
        # Calculate inner and outer rectangle bounds based on min/max distances
        inner_rect = (
            o_center_x - min_distance, 
            o_center_y - min_distance, 
            o_center_x + min_distance, 
            o_center_y + min_distance
        )
        
        outer_rect = (
            o_center_x - max_distance, 
            o_center_y - max_distance, 
            o_center_x + max_distance, 
            o_center_y + max_distance
        )
        
        # Convert rectangle bounds to grid cell indices
        inner_min_i = max(0, int((inner_rect[0] - univ_xmin) / cell_width))
        inner_min_j = max(0, int((inner_rect[1] - univ_ymin) / cell_height))
        inner_max_i = min(grid_dim_x - 1, int((inner_rect[2] - univ_xmin) / cell_width))
        inner_max_j = min(grid_dim_y - 1, int((inner_rect[3] - univ_ymin) / cell_height))
        
        outer_min_i = max(0, int((outer_rect[0] - univ_xmin) / cell_width))
        outer_min_j = max(0, int((outer_rect[1] - univ_ymin) / cell_height))
        outer_max_i = min(grid_dim_x - 1, int((outer_rect[2] - univ_xmin) / cell_width))
        outer_max_j = min(grid_dim_y - 1, int((outer_rect[3] - univ_ymin) / cell_height))
        
        # Create a mask for all cells in the outer rectangle
        outer_mask = np.zeros((grid_dim_x, grid_dim_y), dtype=bool)
        outer_mask[outer_min_i:outer_max_i+1, outer_min_j:outer_max_j+1] = True
        
        # Create a mask for all cells in the inner rectangle
        inner_mask = np.zeros((grid_dim_x, grid_dim_y), dtype=bool)
        inner_mask[inner_min_i:inner_max_i+1, inner_min_j:inner_max_j+1] = True
        
        # Get the cells between the inner and outer rectangles
        candidate_cells = outer_mask & ~inner_mask
        
        # Create cell center coordinates only for candidate cells
        candidate_indices = np.where(candidate_cells)
        cell_centers_x = univ_xmin + (candidate_indices[0] + 0.5) * cell_width
        cell_centers_y = univ_ymin + (candidate_indices[1] + 0.5) * cell_height
        
        # Calculate distances only for candidate cells
        distances = np.sqrt((cell_centers_x - o_center_x)**2 + (cell_centers_y - o_center_y)**2)
        
        # Create distance mask for accurate filtering
        in_range_mask = (distances >= min_distance) & (distances <= max_distance)
        
        # Sum objects only from cells that are truly in range
        valid_cells = candidate_indices[0][in_range_mask], candidate_indices[1][in_range_mask]
        objects_in_range = np.sum(grid[valid_cells])
        
        # Add any objects in the inner rectangle when min_distance is 0
        if min_distance == 0:
            objects_in_range += np.sum(grid[inner_min_i:inner_max_i+1, inner_min_j:inner_max_j+1])
        
        # Cache and return the result
        self.cache[cache_key] = objects_in_range
        return objects_in_range
    
    def evaluate_on_dataset(self, dataset_name, results_file=None, sample_ratio=0.2):
        if not results_file:
            results_file = f"../../large_files/resultsDistance/{dataset_name}_results.csv"
        
        if not os.path.exists(results_file):
            raise ValueError(f"Results file not found: {results_file}")
        
        print(f"Loading query results from {results_file}")
        sys.stdout.flush()
        
        # Load results file - assume CSV format with Object MBR, Distance Min, Distance Max, Count MBR
        results_df = pd.read_csv(results_file)
        
        sample_size = max(1, int(len(results_df) * sample_ratio))
        print(f"Using {sample_ratio*100}% sample: {sample_size} out of {len(results_df)} queries")
        sys.stdout.flush()
        
        sampled_results = results_df.sample(n=sample_size, random_state=42)
        
        actual_counts = []
        estimated_counts = []
        estimation_times = []
        
        # Use simple progress reporting instead of relying solely on tqdm
        print(f"Processing {dataset_name} queries: ", end="", flush=True)
        sys.stdout.flush()
        
        total_queries = len(sampled_results)
        progress_step = max(1, total_queries // 10)
        
        for i, (index, row) in enumerate(sampled_results.iterrows()):
            # Show simple progress every 10%
            if i % progress_step == 0 or i == total_queries - 1:
                progress = (i+1) / total_queries * 100
                print(f"{progress:.1f}%... ", end="", flush=True)
                sys.stdout.flush()
            
            # Parse query parameters - object MBR, min and max distance
            object_mbr = self.parse_mbr(row['Object MBR'])
            min_distance = row['Distance Min']
            max_distance = row['Distance Max']
            actual_count = row['Count MBR']
            
            start_time = time.time()
            estimated_count = self.estimate_distance_count(dataset_name, object_mbr, min_distance, max_distance)
            end_time = time.time()
            
            actual_counts.append(actual_count)
            estimated_counts.append(estimated_count)
            estimation_times.append((end_time - start_time) * 1000)
        
        print("Done!")
        sys.stdout.flush()
        
        # Convert to arrays for calculations
        actual_counts = np.array(actual_counts)
        estimated_counts = np.array(estimated_counts)
        estimation_times = np.array(estimation_times)
        
        # Ensure non-negative estimates
        estimated_counts = np.maximum(0, estimated_counts)
        
        # Calculate MAE
        mae = mean_absolute_error(actual_counts, estimated_counts)
        
        # Calculate MAPE with handling for zeros
        non_zero_mask = (actual_counts != 0)
        zero_mask = ~non_zero_mask
        mape_sum = 0
        count = len(actual_counts)
        
        if np.any(non_zero_mask):
            mape_sum += np.sum(
                np.abs((actual_counts[non_zero_mask] - estimated_counts[non_zero_mask]) / actual_counts[non_zero_mask])
            )
        
        if np.any(zero_mask):
            mape_sum += np.sum(np.abs(actual_counts[zero_mask] - estimated_counts[zero_mask]) / 100)
        
        mape = mape_sum / count if count > 0 else 0
        
        # Calculate q-score
        valid_indices = (actual_counts != 0) & (estimated_counts != 0)
        if np.any(valid_indices):
            ratios = np.maximum(
                estimated_counts[valid_indices] / actual_counts[valid_indices],
                actual_counts[valid_indices] / estimated_counts[valid_indices]
            )
            q_score = np.mean(ratios)
        else:
            q_score = float('inf')
        
        avg_time_ms = np.mean(estimation_times) if len(estimation_times) > 0 else 0
        
        results = {
            'Dataset': dataset_name,
            'MAE': mae,
            'MAPE': mape,
            'Q_Score': q_score,
            'Avg_Time_ms': avg_time_ms,
            'Num_Queries': len(sampled_results),
            'Sample_Ratio': sample_ratio
        }
        
        results_df_out = pd.DataFrame([results])
        results_df_out.to_csv(
            f"{self.histograms_dir}/results/distance/{dataset_name}_evaluation_sample{int(sample_ratio*100)}.csv",
            index=False
        )
        
        # Generate visualization
        self.visualize_results(dataset_name, actual_counts, estimated_counts, sample_ratio)
        
        print(f"Evaluation results for {dataset_name} ({sample_ratio*100}% sample):")
        print(f"  MAE: {mae:.2f}")
        print(f"  MAPE: {mape:.2%}")
        print(f"  Q-Score: {q_score:.2f}")
        print(f"  Avg. Estimation Time: {avg_time_ms:.4f} ms")
        sys.stdout.flush()
        
        return results
    
    def visualize_results(self, dataset_name, actual_counts, estimated_counts, sample_ratio=0.2):
        plt.figure(figsize=(12, 10))
        plt.scatter(actual_counts, estimated_counts, alpha=0.5, s=8)
        
        max_val = max(np.max(actual_counts), np.max(estimated_counts))
        plt.plot([0, max_val], [0, max_val], 'r--', alpha=0.7)
        
        plt.xlabel('Actual Count')
        plt.ylabel('Estimated Count')
        plt.title(f'Histogram-based Distance Estimation for {dataset_name} ({int(sample_ratio*100)}% sample)')
        plt.grid(True, alpha=0.3)
        
        out_path = f"{self.histograms_dir}/visualizations/distance/{dataset_name}_estimation_sample{int(sample_ratio*100)}.png"
        plt.savefig(out_path, dpi=150)
        plt.close()
        
        sample_size = min(100, len(actual_counts))
        indices = np.random.choice(len(actual_counts), sample_size, replace=False)
        
        plt.figure(figsize=(20, 10))
        plt.scatter(
            range(sample_size),
            actual_counts[indices],
            label='Actual Count',
            s=100, alpha=0.7, marker='o', color='green'
        )
        plt.scatter(
            range(sample_size),
            estimated_counts[indices],
            label='Histogram Estimate',
            s=100, alpha=0.7, marker='x', color='blue'
        )
        
        plt.xlabel('Query Index')
        plt.ylabel('Object Count')
        plt.title(
            f'Histogram Estimation vs. Actual Count for {dataset_name} - '
            f'Sample of {sample_size} Queries ({int(sample_ratio*100)}% dataset)'
        )
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        out_path_comp = f"{self.histograms_dir}/visualizations/distance/{dataset_name}_comparison_sample{int(sample_ratio*100)}.png"
        plt.savefig(out_path_comp, dpi=150)
        plt.close()

def evaluate_all_datasets(histograms_dir="../../large_files/traditional_methods/histogram", sample_ratio=0.2):
    """
    Evaluate histogram-based distance estimation on all available datasets sequentially.
    Shows progress for each dataset independently.
    """
    print("Initializing estimator...")
    sys.stdout.flush()
    
    estimator = HistogramDistanceEstimator(histograms_dir)
    dataset_names = list(estimator.histograms.keys())
    
    print(f"Found {len(dataset_names)} datasets to evaluate")
    sys.stdout.flush()
    
    all_results = []
    
    # Process each dataset with clear separation
    for idx, dataset_name in enumerate(dataset_names, start=1):
        print("\n" + "="*80)
        print(f"DATASET {idx}/{len(dataset_names)}: {dataset_name}")
        print("="*80)
        sys.stdout.flush()
        
        try:
            results = estimator.evaluate_on_dataset(dataset_name, sample_ratio=sample_ratio)
            all_results.append(results)
            print(f"Finished processing {dataset_name} ({idx}/{len(dataset_names)})")
            sys.stdout.flush()
        except Exception as e:
            print(f"Error evaluating {dataset_name}: {e}")
            sys.stdout.flush()
    
    # Combine and save results
    if all_results:
        all_results_df = pd.DataFrame(all_results)
        out_file = f"{histograms_dir}/results/distance/all_datasets_evaluation_sample{int(sample_ratio*100)}.csv"
        all_results_df.to_csv(out_file, index=False)
        print("\nCombined results:")
        print(all_results_df)
    else:
        print("No results were generated")
    
    sys.stdout.flush()

if __name__ == "__main__":
    evaluate_all_datasets(sample_ratio=0.2)
    print("Histogram-based distance estimation evaluation complete!")

Initializing estimator...


Loading histogram for powerthingwaysorted...


Loading histogram for zcta5...


Loading histogram for emergencythingwaysorted...


Loading histogram for yago2...


Loading histogram for aerowaythingnodesorted...


Loading histogram for barrierthingwaysorted...


Loading histogram for leisurewaysorted...


Loading histogram for arealm...


Loading histogram for aerowaythingwaysorted...


Loading histogram for cyclewaythingwaysorted...


Loading histogram for powerthingnodesorted...


Loading histogram for historicthingwaysorted...


Loading histogram for areawater...


Loading histogram for craftwaysorted...


Loaded 14 histograms


Found 14 datasets to evaluate



DATASET 1/14: powerthingwaysorted


Loading query results from ../../large_files/resultsDistance/powerthingwaysorted_results.csv


Using 20.0% sample: 543457 out of 2717289 queries


Processing powerthingwaysorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

100.0%... 

Done!


Evaluation results for powerthingwaysorted (20.0% sample):
  MAE: 162729.15
  MAPE: 37.89%
  Q-Score: 38.36
  Avg. Estimation Time: 0.7613 ms


Finished processing powerthingwaysorted (1/14)



DATASET 2/14: zcta5


Loading query results from ../../large_files/resultsDistance/zcta5_results.csv


Using 20.0% sample: 1325 out of 6626 queries


Processing zcta5 queries: 

0.1%... 

10.0%... 

20.0%... 

30.0%... 

39.9%... 

49.9%... 

59.8%... 

69.8%... 

79.8%... 

89.7%... 

99.7%... 

100.0%... 

Done!


Evaluation results for zcta5 (20.0% sample):
  MAE: 2602.86
  MAPE: 64.73%
  Q-Score: 58.62
  Avg. Estimation Time: 0.0541 ms


Finished processing zcta5 (2/14)



DATASET 3/14: emergencythingwaysorted


Loading query results from ../../large_files/resultsDistance/emergencythingwaysorted_results.csv


Using 20.0% sample: 32302 out of 161514 queries


Processing emergencythingwaysorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

100.0%... 

Done!


Evaluation results for emergencythingwaysorted (20.0% sample):
  MAE: 18387.55
  MAPE: 46.66%
  Q-Score: 20.83
  Avg. Estimation Time: 0.2646 ms


Finished processing emergencythingwaysorted (3/14)



DATASET 4/14: yago2


Loading query results from ../../large_files/resultsDistance/yago2_results.csv


Using 20.0% sample: 179788 out of 898942 queries


Processing yago2 queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

100.0%... 

Done!


Evaluation results for yago2 (20.0% sample):
  MAE: 83279393.66
  MAPE: 418723.33%
  Q-Score: 3544.17
  Avg. Estimation Time: 0.7627 ms


Finished processing yago2 (4/14)



DATASET 5/14: aerowaythingnodesorted


Loading query results from ../../large_files/resultsDistance/aerowaythingnodesorted_results.csv


Using 20.0% sample: 3168 out of 15843 queries


Processing aerowaythingnodesorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

39.9%... 

49.9%... 

59.9%... 

69.9%... 

79.8%... 

89.8%... 

99.8%... 

100.0%... 

Done!


Evaluation results for aerowaythingnodesorted (20.0% sample):
  MAE: 1492.62
  MAPE: 58.67%
  Q-Score: 29.01
  Avg. Estimation Time: 0.0673 ms


Finished processing aerowaythingnodesorted (5/14)



DATASET 6/14: barrierthingwaysorted


Loading query results from ../../large_files/resultsDistance/barrierthingwaysorted_results.csv


Using 20.0% sample: 916334 out of 4581670 queries


Processing barrierthingwaysorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

100.0%... 

Done!


Evaluation results for barrierthingwaysorted (20.0% sample):
  MAE: 466331.32
  MAPE: 37.82%
  Q-Score: 69.42
  Avg. Estimation Time: 0.7672 ms


Finished processing barrierthingwaysorted (6/14)



DATASET 7/14: leisurewaysorted


Loading query results from ../../large_files/resultsDistance/leisurewaysorted_results.csv


Using 20.0% sample: 1175314 out of 5876570 queries


Processing leisurewaysorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

100.0%... 

Done!


Evaluation results for leisurewaysorted (20.0% sample):
  MAE: 584936.10
  MAPE: 40.81%
  Q-Score: 62.84
  Avg. Estimation Time: 0.7793 ms


Finished processing leisurewaysorted (7/14)



DATASET 8/14: arealm


Loading query results from ../../large_files/resultsDistance/arealm_results.csv


Using 20.0% sample: 5166 out of 25833 queries


Processing arealm queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

59.9%... 

69.9%... 

79.9%... 

89.9%... 

99.9%... 

100.0%... 

Done!


Evaluation results for arealm (20.0% sample):
  MAE: 7759.20
  MAPE: 61.32%
  Q-Score: 33.83
  Avg. Estimation Time: 0.0806 ms


Finished processing arealm (8/14)



DATASET 9/14: aerowaythingwaysorted


Loading query results from ../../large_files/resultsDistance/aerowaythingwaysorted_results.csv


Using 20.0% sample: 73673 out of 368365 queries


Processing aerowaythingwaysorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

100.0%... 

Done!


Evaluation results for aerowaythingwaysorted (20.0% sample):
  MAE: 21366.42
  MAPE: 39.98%
  Q-Score: 12.80
  Avg. Estimation Time: 0.5636 ms


Finished processing aerowaythingwaysorted (9/14)



DATASET 10/14: cyclewaythingwaysorted


Loading query results from ../../large_files/resultsDistance/cyclewaythingwaysorted_results.csv


Using 20.0% sample: 213412 out of 1067063 queries


Processing cyclewaythingwaysorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

100.0%... 

Done!


Evaluation results for cyclewaythingwaysorted (20.0% sample):
  MAE: 108906.67
  MAPE: 41.91%
  Q-Score: 31.21
  Avg. Estimation Time: 0.7844 ms


Finished processing cyclewaythingwaysorted (10/14)



DATASET 11/14: powerthingnodesorted


Loading query results from ../../large_files/resultsDistance/powerthingnodesorted_results.csv


Using 20.0% sample: 420502 out of 2102514 queries


Processing powerthingnodesorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

100.0%... 

Done!


Evaluation results for powerthingnodesorted (20.0% sample):
  MAE: 124535.30
  MAPE: 38.15%
  Q-Score: 35.28
  Avg. Estimation Time: 0.8000 ms


Finished processing powerthingnodesorted (11/14)



DATASET 12/14: historicthingwaysorted


Loading query results from ../../large_files/resultsDistance/historicthingwaysorted_results.csv


Using 20.0% sample: 71687 out of 358439 queries


Processing historicthingwaysorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

100.0%... 

Done!


Evaluation results for historicthingwaysorted (20.0% sample):
  MAE: 58304.65
  MAPE: 41.24%
  Q-Score: 21.33
  Avg. Estimation Time: 0.5451 ms


Finished processing historicthingwaysorted (12/14)



DATASET 13/14: areawater


Loading query results from ../../large_files/resultsDistance/areawater_results.csv


Using 20.0% sample: 91710 out of 458552 queries


Processing areawater queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

80.0%... 

90.0%... 

100.0%... 

Done!


Evaluation results for areawater (20.0% sample):
  MAE: 98636.38
  MAPE: 40.09%
  Q-Score: 42.56
  Avg. Estimation Time: 0.6617 ms


Finished processing areawater (13/14)



DATASET 14/14: craftwaysorted


Loading query results from ../../large_files/resultsDistance/craftwaysorted_results.csv


Using 20.0% sample: 4364 out of 21822 queries


Processing craftwaysorted queries: 

0.0%... 

10.0%... 

20.0%... 

30.0%... 

40.0%... 

50.0%... 

60.0%... 

70.0%... 

79.9%... 

89.9%... 

99.9%... 

100.0%... 

Done!


Evaluation results for craftwaysorted (20.0% sample):
  MAE: 4530.32
  MAPE: 60.39%
  Q-Score: 18.00
  Avg. Estimation Time: 0.0783 ms


Finished processing craftwaysorted (14/14)



Combined results:
                    Dataset           MAE         MAPE      Q_Score  \
0       powerthingwaysorted  1.627292e+05     0.378906    38.357922   
1                     zcta5  2.602862e+03     0.647338    58.624370   
2   emergencythingwaysorted  1.838755e+04     0.466580    20.830957   
3                     yago2  8.327939e+07  4187.233250  3544.170221   
4    aerowaythingnodesorted  1.492617e+03     0.586670    29.007545   
5     barrierthingwaysorted  4.663313e+05     0.378185    69.418648   
6          leisurewaysorted  5.849361e+05     0.408077    62.838977   
7                    arealm  7.759196e+03     0.613225    33.829438   
8     aerowaythingwaysorted  2.136642e+04     0.399762    12.795304   
9    cyclewaythingwaysorted  1.089067e+05     0.419126    31.206578   
10     powerthingnodesorted  1.245353e+05     0.381453    35.279154   
11   historicthingwaysorted  5.830465e+04     0.412412    21.326452   
12                areawater  9.863638e+04     0.400870    

Histogram-based distance estimation evaluation complete!
