In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from time import process_time
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
import psutil
import os
import re
import multiprocessing
import joblib
import gc
from tqdm.notebook import tqdm  # Progress bars for Jupyter

# Configuration options
VISUALIZE_RECTANGLES = False  # Set to True if you want to visualize rectangles
SAVE_INTERMEDIATE_MODELS = True  # Set to False to save only final models

# Flag to control whether to use multiple scales or only the maximum scale
use_multiple_scales = True  # Set to False to use only maximum scale, True for all scales

# Get available CPU cores and set appropriate parallelism
n_cores = multiprocessing.cpu_count()
n_jobs = max(1, n_cores - 1)  # Leave one core free for system processes
print(f"Using {n_jobs} of {n_cores} available CPU cores")

# For reproducibility
RANDOM_SEED = 3
np.random.seed(RANDOM_SEED)

def monitor_memory():
    """Print current memory usage of the process"""
    process = psutil.Process()
    memory_mb = process.memory_info().rss / (1024 * 1024)
    print(f"Memory usage: {memory_mb:.2f} MB")

def MAPE(actual_values, predicted_values):
    """Calculate Mean Absolute Percentage Error with special handling for zeros"""
    # Vectorized implementation
    actual_flat = actual_values.flatten()
    pred_flat = predicted_values.flatten()
    
    # Create mask for non-zero actual values
    non_zero_mask = actual_flat != 0
    zero_mask = ~non_zero_mask
    
    # Calculate MAPE for non-zero elements
    mape_sum = 0
    count = len(actual_flat)
    
    if np.any(non_zero_mask):
        mape_sum += np.sum(np.abs((actual_flat[non_zero_mask] - pred_flat[non_zero_mask]) / actual_flat[non_zero_mask]))
    
    if np.any(zero_mask):
        mape_sum += np.sum(np.abs(actual_flat[zero_mask] - pred_flat[zero_mask]) / 100)
    
    return mape_sum / count

# Load spatial statistics to get universe boundaries for each dataset
print("Loading spatial statistics...")
spatial_stats = pd.read_csv('../spatial_statistics.csv')

# Directory containing the datasets
data_dir = '../large_files/resultsContains/'

# Parse bounding box information
def parse_bbox(bbox_str):
    # Extract coordinates from BOX string using regex
    pattern = r"BOX\(([-\d\.]+) ([-\d\.]+),([-\d\.]+) ([-\d\.]+)\)"
    match = re.search(pattern, bbox_str)
    if match:
        xmin = float(match.group(1))
        ymin = float(match.group(2))
        xmax = float(match.group(3))
        ymax = float(match.group(4))
        return xmin, ymin, xmax, ymax
    return -180, -90, 180, 90  # Default if parsing fails

# Extract universe boundaries for each dataset
universe_boundaries = {}
for _, row in spatial_stats.iterrows():
    table_name = row['Table Name']
    bbox = parse_bbox(row['Universe Limits (Bounding Box)'])
    universe_boundaries[table_name] = bbox

# Get list of all CSV files in the directory
print("Finding dataset files...")
csv_files = [f for f in os.listdir(data_dir) if f.endswith('.csv')]
print(f"Found {len(csv_files)} datasets to process")

# Define the scales of learning
scales = [1000, 5000, 10000, 50000, 100000, 500000, 1000000]

# Create necessary directories
os.makedirs('../large_files/LearnedModels/contain/KNN', exist_ok=True)
os.makedirs('../large_files/LearnedModels/contain/KNN/visualizations', exist_ok=True)
os.makedirs('../large_files/LearnedModels/contain/KNN/results', exist_ok=True)

# Lists to store all results
all_results_list = []

# Process each dataset
for csv_file in tqdm(csv_files, desc="Processing datasets"):
    # Force garbage collection at the start of each dataset
    gc.collect()
    monitor_memory()
    
    # Extract dataset name (remove "_results.csv")
    dataset_name = csv_file.replace('_results.csv', '')
    
    print(f"\nProcessing dataset: {dataset_name}")
    
    # Get universe boundaries for this dataset
    if dataset_name in universe_boundaries:
        univ_xmin, univ_ymin, univ_xmax, univ_ymax = universe_boundaries[dataset_name]
    else:
        # Default values if dataset not found in spatial stats
        univ_xmin, univ_ymin, univ_xmax, univ_ymax = -180, -90, 180, 90
    
    Surface_univ = (univ_xmax - univ_xmin) * (univ_ymax - univ_ymin)
    print(f"Universe boundaries for {dataset_name}: ({univ_xmin}, {univ_ymin}, {univ_xmax}, {univ_ymax})")
    
    # Load dataset - only load required columns
    data_path = os.path.join(data_dir, csv_file)
    print(f"Loading data from {data_path}")
    data = pd.read_csv(data_path, usecols=['Query MBR', 'Count MBR'])
    
    # Extract query MBR column (needs parsing as it's in string format)
    def parse_mbr(mbr_str):
        coords = mbr_str.strip('"()').split(', ')
        return [float(coord) for coord in coords]
    
    # Extract columns - use list comprehension for better performance
    print("Parsing MBR coordinates...")
    Rectangles = np.array([parse_mbr(mbr) for mbr in data['Query MBR']])
    Y = data[['Count MBR']].values  # Using Count MBR as target
    
    # Free up memory
    del data
    gc.collect()
    
    # Calculate basic statistics
    max_count = float(np.max(Y))
    min_count = float(np.min(Y))
    mean_count = float(np.mean(Y))
    median_count = float(np.median(Y))
    total_samples = len(Y)

    # Display basic statistics for the dataset
    print(f"\nBasic statistics for {dataset_name} dataset:")
    print(f"Max count: {max_count}")
    print(f"Min count: {min_count}")
    print(f"Mean count: {mean_count:.2f}")
    print(f"Median count: {median_count:.2f}")
    print(f"Total samples: {total_samples}\n")

    # Calculate rectangles density - vectorized version
    print("Calculating rectangle densities...")
    width = Rectangles[:, 2] - Rectangles[:, 0]
    height = Rectangles[:, 3] - Rectangles[:, 1]
    rectanglesDensity = np.abs(width * height / Surface_univ).reshape(-1, 1)
    
    # Prepare the dataset
    # X = np.append(Rectangles, rectanglesDensity, axis=1)
    X = Rectangles
    
    # Split the data into 80% train and 20% test
    print("Splitting data into train and test sets...")
    X_train, X_test_all, y_train, y_test_all = train_test_split(X, Y, test_size=0.2, random_state=RANDOM_SEED)
    
    # Visualize the first 1000 rectangles (only if enabled)
    if len(Rectangles) > 0 and VISUALIZE_RECTANGLES:
        print("Visualizing rectangles sample...")
        plt.figure(figsize=(10, 8))
        ax = plt.subplot()
        
        # Only visualize a sample to save time
        sample_size = min(1000, len(Rectangles))
        for i in range(sample_size):
            x1, y1, x2, y2 = Rectangles[i]
            color_val = float(rectanglesDensity[i][0]) if hasattr(rectanglesDensity[i], '__len__') else float(rectanglesDensity[i])
            rectangle = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, 
                                        linewidth=1, edgecolor='b', facecolor='none', alpha=min(1.0, color_val*10))
            ax.add_patch(rectangle)
            
        plt.xlim(univ_xmin-20, univ_xmax+20)
        plt.ylim(univ_ymin-10, univ_ymax+10)
        plt.title(f"Sample rectangles from {dataset_name}")
        plt.savefig(f"../large_files/LearnedModels/contain/KNN/visualizations/{dataset_name}_rectangles.png", dpi=150)
        plt.close()  # Close to free memory instead of plt.show()
    
    # Adjust scales to the dataset size
    max_size = len(X_train)
    print(f"Training set size: {max_size}")

    # KNN scales differently from tree-based models - for very large datasets, limit max size
    # if max_size > 100000:
    #     print(f"Large dataset detected - limiting KNN training size to 100,000 samples")
    #     max_size = 100000  # KNN can be slow with larger datasets
        
    if use_multiple_scales:
        # Use multiple scales as before
        adjusted_scales = [s for s in scales if s <= max_size]
        
        # Add the actual max size if it's not already in the list
        if max_size not in adjusted_scales:
            adjusted_scales.append(max_size)
            
        # Sort the scales to ensure they're in ascending order
        adjusted_scales.sort()
    else:
        # Use only the maximum scale
        adjusted_scales = [max_size]

    # List to store dataset-specific results
    dataset_results_list = []

    # Store best parameters from max scale training to reuse
    best_params = None
    
    # Process scales in reversed order (largest first)
    for sample_size in reversed(adjusted_scales):
        print(f"\nTraining with sample size: {sample_size}")
        monitor_memory()
        
        # Create training subset
        X_train_sample = X_train[:sample_size, :]
        y_train_sample = y_train[:sample_size]
        
        # Set reasonable K values for parameter tuning
        k_values = [3, 5, 7, 10, 15, 20, 30]
        
        # KNN parameters - optimize for performance
        params_knn = {
            'n_neighbors': k_values,
            'weights': ['distance'],      # Only use distance weighting
            'algorithm': ['auto'],        # Let KNN decide best algorithm
        }
        
        # Only do GridSearch for the max scale
        if sample_size == max_size or best_params is None:
            print("Performing grid search for optimal parameters...")
            knn = KNeighborsRegressor(n_jobs=n_jobs)
            knn_cv = GridSearchCV(knn, params_knn, cv=3, n_jobs=1)  # Use n_jobs=1 here as KNN uses parallelism internally
            
            # Time the grid search
            t1_start = process_time()
            knn_cv.fit(X_train_sample, y_train_sample.ravel())
            t1_stop = process_time()
            grid_search_time = t1_stop - t1_start
            
            # Store best parameters for reuse
            best_params = knn_cv.best_params_
            print(f"Grid search complete in {grid_search_time:.2f}s")
            print(f"Best parameters: {best_params}")
        else:
            # Skip grid search for smaller scales, use params from max scale
            knn_cv = None
            grid_search_time = 0
            print(f"Using best parameters from max scale: {best_params}")
        
        # Train the model with best parameters
        print("Training KNN model...")
        knn = KNeighborsRegressor(n_jobs=n_jobs, **best_params)
        
        t2_start = process_time()
        knn.fit(X_train_sample, y_train_sample.ravel())
        t2_stop = process_time()
        training_time = t2_stop - t2_start
        
        # Make predictions
        print("Making predictions...")
        y_pred = knn.predict(X_test_all).reshape(-1, 1)  # Reshape to match y_test_all format
        # Ensure predictions are non-negative (counts can't be negative)
        y_pred = np.maximum(0, y_pred)  # Convert any negative predictions to zero

        # Calculate metrics
        r2_score = knn.score(X_test_all, y_test_all)
        mae_value = MAE(y_test_all, y_pred)
        mape_value = MAPE(y_test_all, y_pred)
        
        # Calculate q-score - vectorized version
        print("Calculating performance metrics...")
        
        # Vectorized q-score calculation
        y_true_flat = y_test_all.flatten()
        y_pred_flat = y_pred.flatten() if y_pred.ndim > 1 else y_pred
        
        # Find indices where both values are non-zero
        valid_indices = (y_true_flat != 0) & (y_pred_flat != 0)
        
        if np.any(valid_indices):
            ratios = np.maximum(
                y_pred_flat[valid_indices] / y_true_flat[valid_indices],
                y_true_flat[valid_indices] / y_pred_flat[valid_indices]
            )
            q_score_mean = np.mean(ratios)
        else:
            q_score_mean = 0
        
        # Time prediction performance (10 iterations)
        print("Measuring prediction performance...")
        total_duration = 0
        total_read = 0
        total_write = 0
        
        for _ in range(10):
            io_before = psutil.disk_io_counters()
            t3_start = process_time()
            preds = knn.predict(X_test_all)
            preds = np.maximum(0, preds)  # Include this operation in timing
            t3_stop = process_time()
            io_after = psutil.disk_io_counters()
            
            total_duration += (t3_stop - t3_start)
            total_read += io_after.read_count - io_before.read_count
            total_write += io_after.write_count - io_before.write_count
        
        avg_pred_time_microsec = (total_duration / 10) / len(y_pred) * 1000000
        avg_reads = total_read / 10 / len(y_pred)
        avg_writes = total_write / 10 / len(y_pred)
        
        # Save the model using joblib instead of pickle for better efficiency
        if SAVE_INTERMEDIATE_MODELS or sample_size == max_size:
            print("Saving model...")
            filename = f'../large_files/LearnedModels/contain/KNN/{dataset_name}_knn_{sample_size}_{training_time:.2f}s_{mape_value:.2%}_{mae_value:.2f}.joblib'
            joblib.dump(knn, filename, compress=3)
            # Get model file size in KB
            model_size_kb = os.path.getsize(filename) / 1024
            print(f"Model size: {model_size_kb:.2f} KB")
        else:
            model_size_kb = 0  # Set to 0 if model wasn't saved
        
        # Print results
        print(f"\nResults for {dataset_name}, Sample Size: {sample_size}")
        print(f"Grid Search Time: {grid_search_time:.2f}s, Training Time: {training_time:.2f}s")
        print(f"KNN Parameters: {best_params}")
        print(f"Performance: R² = {r2_score:.4f}, MAE = {mae_value:.2f}, MAPE = {mape_value:.2%}")
        print(f"q-score: {q_score_mean:.2f}")
        print(f"Prediction time: {avg_pred_time_microsec:.4f} μs/sample")
        print(f"I/O: Reads={avg_reads:.6f}, Writes={avg_writes:.6f}")
        print("-" * 80)
        
        # Plot actual vs predicted only for the maximum scale
        if sample_size == adjusted_scales[-1]:  # Check if this is the maximum scale
            print("Generating prediction scatter plot...")
            plt.figure(figsize=(10, 8))
            plt.scatter(y_test_all, y_pred, s=0.5, alpha=0.5)
            plt.xlabel('True Values')
            plt.ylabel('Predictions')
            plt.title(f"{dataset_name} - Sample Size: {sample_size} (Maximum)")
            plt.grid(True, alpha=0.3)
            
            # Add diagonal line for perfect predictions
            max_val = max(np.max(y_test_all), np.max(y_pred))
            plt.plot([0, max_val], [0, max_val], 'r--', alpha=0.5)
            
            plt.savefig(f"../large_files/LearnedModels/contain/KNN/visualizations/{dataset_name}_{sample_size}_prediction.png", dpi=150)
            plt.close()  # Close to free memory
            
            # Create a scatter plot comparing predicted vs real values for first 100 rectangles
            print("Generating side-by-side comparison plot...")
            
            # Get predictions for first 100 test samples
            sample_indices = range(min(100, len(X_test_all)))
            X_sample = X_test_all[sample_indices]
            y_sample_true = y_test_all[sample_indices].flatten()
            y_sample_pred = knn.predict(X_sample)
            # Ensure non-negative predictions
            y_sample_pred = np.maximum(0, y_sample_pred)
            
            plt.figure(figsize=(20, 10))
            plt.scatter(range(len(sample_indices)), y_sample_pred, c='purple', 
                        label='Predicted number of objects (KNN)', alpha=0.7, s=100)
            plt.scatter(range(len(sample_indices)), y_sample_true, c='green', 
                        label='Real number of objects', alpha=0.7, s=100)
            
            plt.title(f'{dataset_name} - First {len(sample_indices)} Rectangles: Predicted vs Real Values', fontsize=16)
            plt.xlabel('Rectangle Index', fontsize=14)
            plt.ylabel('Number of objects in rectangle', fontsize=14)
            plt.legend(fontsize=12)
            plt.grid(True, alpha=0.3)
            plt.tight_layout()
            
            # Save the plot
            plt.savefig(f"../large_files/LearnedModels/contain/KNN/visualizations/{dataset_name}_comparison_plot.png", dpi=150)
            plt.close()
        
        # Store results in list (more efficient than DataFrame concat)
        result_row = {
            'Dataset': dataset_name,
            'Sample_Size': sample_size,
            'Training_Time': training_time,
            'Best_Params': str(best_params),
            'R2_Score': r2_score,
            'MAE': mae_value,
            'MAPE': float(mape_value),
            'Q_Score': q_score_mean,
            'Pred_Time_Microseconds': avg_pred_time_microsec,
            'IO_Reads': avg_reads,
            'IO_Writes': avg_writes,
            'Model_Size_KB': model_size_kb,
            'Max_Count': max_count,
            'Min_Count': min_count,
            'Mean_Count': mean_count,
            'Median_Count': median_count,
            'Total_Samples': total_samples
        }
        
        dataset_results_list.append(result_row)
        all_results_list.append(result_row)
        
        # Clean up to free memory
        if sample_size != max_size:  # Don't delete for max size as we might need it
            del X_train_sample, y_train_sample, knn
            gc.collect()
    
    # Save results for this dataset
    print(f"Saving results for {dataset_name}...")
    dataset_results = pd.DataFrame(dataset_results_list)
    dataset_results.to_csv(f'../large_files/LearnedModels/contain/KNN/results/{dataset_name}_results.csv', index=False)
    
    # Clear memory before next dataset
    del X_train, X_test_all, y_train, y_test_all, Rectangles, Y, rectanglesDensity
    gc.collect()
    
# Save all results
print("Saving combined results...")
all_results = pd.DataFrame(all_results_list)
all_results.to_csv('../large_files/LearnedModels/contain/KNN/all_results.csv', index=False)

print("All processing completed and results saved.")
monitor_memory()

Using 29 of 30 available CPU cores
Loading spatial statistics...
Finding dataset files...
Found 14 datasets to process


Processing datasets:   0%|          | 0/14 [00:00<?, ?it/s]

Memory usage: 243.40 MB

Processing dataset: craftwaysorted
Universe boundaries for craftwaysorted: (-175.2000514, -65.2458821, 175.3397782, 69.6673353)
Loading data from ../large_files/resultsContains/craftwaysorted_results.csv
Parsing MBR coordinates...



Basic statistics for craftwaysorted dataset:
Max count: 108929.0
Min count: 0.0
Mean count: 1705.23
Median count: 0.00
Total samples: 21822

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 17457

Training with sample size: 17457
Memory usage: 249.21 MB
Performing grid search for optimal parameters...


Grid search complete in 27.41s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
Training KNN model...
Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 1167.64 KB

Results for craftwaysorted, Sample Size: 17457
Grid Search Time: 27.41s, Training Time: 0.01s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
Performance: R² = 0.8607, MAE = 604.33, MAPE = 86.09%
q-score: 3.20
Prediction time: 181.8105 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 10000
Memory usage: 269.83 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
Training KNN model...
Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 665.63 KB

Results for craftwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
Performance: R² = 0.8323, MAE = 692.77, MAPE = 105.10%
q-score: 3.88
Prediction time: 140.5892 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 272.74 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 333.73 KB

Results for craftwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
Performance: R² = 0.7180, MAE = 907.30, MAPE = 126.97%
q-score: 4.79
Prediction time: 130.4349 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 272.79 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 38.45 KB

Results for craftwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'distance'}
Performance: R² = 0.4916, MAE = 1208.47, MAPE = 214.35%
q-score: 8.05
Prediction time: 61.5221 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for craftwaysorted...
Memory usage: 272.79 MB

Processing dataset: powerthingwaysorted
Universe boundaries for powerthingwaysorted: (-179.5002188, -75.1012051, 178.4574038, 82.5247908)
Loading data from ../large_files/resultsContains/powerthingwaysorted_results.csv


Parsing MBR coordinates...



Basic statistics for powerthingwaysorted dataset:
Max count: 13586342.0
Min count: 0.0
Mean count: 236375.56
Median count: 147.00
Total samples: 2717289

Calculating rectangle densities...
Splitting data into train and test sets...


Training set size: 2173831

Training with sample size: 2173831
Memory usage: 570.70 MB
Performing grid search for optimal parameters...


Grid search complete in 10578.43s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 152128.60 KB

Results for powerthingwaysorted, Sample Size: 2173831
Grid Search Time: 10578.43s, Training Time: 3.17s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9969, MAE = 12448.44, MAPE = 215.63%
q-score: 4.47
Prediction time: 580.1681 μs/sample
I/O: Reads=0.000022, Writes=0.000034
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 1000000
Memory usage: 1254.18 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 70169.65 KB

Results for powerthingwaysorted, Sample Size: 1000000
Grid Search Time: 0.00s, Training Time: 1.01s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9950, MAE = 15845.72, MAPE = 335.75%
q-score: 6.36
Prediction time: 694.7179 μs/sample
I/O: Reads=0.000021, Writes=0.000033
--------------------------------------------------------------------------------

Training with sample size: 500000
Memory usage: 1260.38 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 34968.34 KB

Results for powerthingwaysorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 0.38s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9929, MAE = 19578.70, MAPE = 566.07%
q-score: 9.95
Prediction time: 602.4710 μs/sample
I/O: Reads=0.000033, Writes=0.000330
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 1260.42 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6752.64 KB

Results for powerthingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.05s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9785, MAE = 34236.88, MAPE = 1628.26%
q-score: 25.66
Prediction time: 575.0394 μs/sample
I/O: Reads=0.000021, Writes=0.000177
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 1260.42 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3349.71 KB

Results for powerthingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.02s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9687, MAE = 42614.45, MAPE = 2714.03%
q-score: 41.84
Prediction time: 387.3196 μs/sample
I/O: Reads=0.000010, Writes=0.000048
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 1260.42 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 673.61 KB

Results for powerthingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9188, MAE = 74087.30, MAPE = 5266.27%
q-score: 79.56
Prediction time: 343.4957 μs/sample
I/O: Reads=0.000010, Writes=0.000043
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1260.42 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 338.04 KB

Results for powerthingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.8792, MAE = 94026.69, MAPE = 6447.24%
q-score: 97.03
Prediction time: 301.6182 μs/sample
I/O: Reads=0.000010, Writes=0.000019
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1260.42 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 39.09 KB

Results for powerthingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.7129, MAE = 148199.24, MAPE = 11045.44%
q-score: 161.51
Prediction time: 203.9859 μs/sample
I/O: Reads=0.000010, Writes=0.000012
--------------------------------------------------------------------------------
Saving results for powerthingwaysorted...
Memory usage: 1194.08 MB

Processing dataset: barrierthingwaysorted
Universe boundaries for barrierthingwaysorted: (-179.7595238, -70.776382, 179.19591350000002, 78.2501675)
Loading data from ../large_files/resultsContains/barrierthingwaysorted_results.csv


Parsing MBR coordinates...



Basic statistics for barrierthingwaysorted dataset:
Max count: 22908267.0
Min count: 0.0
Mean count: 399928.77
Median count: 329.00
Total samples: 4581670

Calculating rectangle densities...
Splitting data into train and test sets...


Training set size: 3665336

Training with sample size: 3665336
Memory usage: 1428.54 MB
Performing grid search for optimal parameters...


Grid search complete in 19714.71s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 250793.43 KB

Results for barrierthingwaysorted, Sample Size: 3665336
Grid Search Time: 19714.71s, Training Time: 5.82s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9957, MAE = 23391.01, MAPE = 251.19%
q-score: 4.80
Prediction time: 618.2219 μs/sample
I/O: Reads=0.000025, Writes=0.000037
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 1000000
Memory usage: 1854.43 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 70263.38 KB

Results for barrierthingwaysorted, Sample Size: 1000000
Grid Search Time: 0.00s, Training Time: 1.03s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9913, MAE = 34368.49, MAPE = 448.52%
q-score: 7.65
Prediction time: 529.9781 μs/sample
I/O: Reads=0.000019, Writes=0.000022
--------------------------------------------------------------------------------

Training with sample size: 500000
Memory usage: 1854.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 35016.31 KB

Results for barrierthingwaysorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 0.37s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9873, MAE = 42338.40, MAPE = 581.61%
q-score: 9.47
Prediction time: 486.1089 μs/sample
I/O: Reads=0.000019, Writes=0.000025
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 1854.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6761.01 KB

Results for barrierthingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.05s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9667, MAE = 71598.76, MAPE = 1889.51%
q-score: 28.28
Prediction time: 465.0589 μs/sample
I/O: Reads=0.000013, Writes=0.000027
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 1854.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3353.77 KB

Results for barrierthingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.02s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9512, MAE = 89709.88, MAPE = 2790.76%
q-score: 40.58
Prediction time: 406.9231 μs/sample
I/O: Reads=0.000012, Writes=0.000038
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 1854.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 674.53 KB

Results for barrierthingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.8618, MAE = 160361.94, MAPE = 5478.87%
q-score: 77.55
Prediction time: 259.5786 μs/sample
I/O: Reads=0.000006, Writes=0.000021
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1854.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 338.50 KB

Results for barrierthingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.8206, MAE = 187675.03, MAPE = 6835.48%
q-score: 91.79
Prediction time: 224.4812 μs/sample
I/O: Reads=0.000012, Writes=0.000010
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1854.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 39.14 KB

Results for barrierthingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.6332, MAE = 266062.01, MAPE = 11684.82%
q-score: 141.00
Prediction time: 139.3543 μs/sample
I/O: Reads=0.000006, Writes=0.000008
--------------------------------------------------------------------------------
Saving results for barrierthingwaysorted...
Memory usage: 1679.88 MB

Processing dataset: cyclewaythingwaysorted
Universe boundaries for cyclewaythingwaysorted: (-175.2093065, -75.1027861, 176.92582230000002, 71.0488105)
Loading data from ../large_files/resultsContains/cyclewaythingwaysorted_results.csv


Parsing MBR coordinates...



Basic statistics for cyclewaythingwaysorted dataset:
Max count: 5334899.0
Min count: 0.0
Mean count: 76781.20
Median count: 0.00
Total samples: 1067063

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 853650

Training with sample size: 853650
Memory usage: 1543.03 MB
Performing grid search for optimal parameters...


Grid search complete in 3147.55s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 57862.34 KB

Results for cyclewaythingwaysorted, Sample Size: 853650
Grid Search Time: 3147.55s, Training Time: 0.76s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9873, MAE = 7902.72, MAPE = 332.09%
q-score: 7.36
Prediction time: 939.6030 μs/sample
I/O: Reads=0.000030, Writes=0.000064
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 500000
Memory usage: 1514.32 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 34754.45 KB

Results for cyclewaythingwaysorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 0.38s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9827, MAE = 9249.21, MAPE = 409.46%
q-score: 8.65
Prediction time: 718.2128 μs/sample
I/O: Reads=0.000000, Writes=0.000037
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 1514.32 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6710.54 KB

Results for cyclewaythingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.05s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9566, MAE = 15497.89, MAPE = 890.02%
q-score: 17.46
Prediction time: 411.3981 μs/sample
I/O: Reads=0.000000, Writes=0.000395
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 1514.32 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3328.84 KB

Results for cyclewaythingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.02s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9367, MAE = 19123.30, MAPE = 1165.36%
q-score: 22.21
Prediction time: 364.1415 μs/sample
I/O: Reads=0.000027, Writes=0.000098
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 1514.32 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 669.22 KB

Results for cyclewaythingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.01s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.8467, MAE = 31682.61, MAPE = 2388.08%
q-score: 44.57
Prediction time: 229.4561 μs/sample
I/O: Reads=0.000000, Writes=0.000007
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1514.32 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 335.84 KB

Results for cyclewaythingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.7842, MAE = 38209.41, MAPE = 2956.15%
q-score: 54.66
Prediction time: 189.6204 μs/sample
I/O: Reads=0.000000, Writes=0.000057
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1514.32 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 38.68 KB

Results for cyclewaythingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.5959, MAE = 56403.42, MAPE = 5255.59%
q-score: 93.12
Prediction time: 121.8473 μs/sample
I/O: Reads=0.000027, Writes=0.000004
--------------------------------------------------------------------------------
Saving results for cyclewaythingwaysorted...
Memory usage: 1514.32 MB

Processing dataset: zcta5
Universe boundaries for zcta5: (-176.684744, -14.373776, 145.830505, 71.341324)
Loading data from ../large_files/resultsContains/zcta5_results.csv


Parsing MBR coordinates...

Basic statistics for zcta5 dataset:
Max count: 33131.0
Min count: 0.0
Mean count: 662.21
Median count: 0.00
Total samples: 6626

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 5300

Training with sample size: 5300
Memory usage: 1514.32 MB
Performing grid search for optimal parameters...


Grid search complete in 4.44s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 348.77 KB

Results for zcta5, Sample Size: 5300
Grid Search Time: 4.44s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.8711, MAE = 215.69, MAPE = 154.39%
q-score: 13.93
Prediction time: 45.9008 μs/sample
I/O: Reads=0.000000, Writes=0.000151
--------------------------------------------------------------------------------
Generating prediction scatter plot...
Generating side-by-side comparison plot...



Training with sample size: 5000
Memory usage: 1514.32 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 329.63 KB

Results for zcta5, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.8588, MAE = 240.07, MAPE = 162.09%
q-score: 14.37
Prediction time: 43.9122 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1514.33 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 37.78 KB

Results for zcta5, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.7192, MAE = 396.45, MAPE = 227.75%
q-score: 17.61
Prediction time: 39.4769 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for zcta5...


Memory usage: 1514.32 MB

Processing dataset: aerowaythingnodesorted
Universe boundaries for aerowaythingnodesorted: (-179.88088960000002, -90.0, 179.951004, 83.08333590000001)
Loading data from ../large_files/resultsContains/aerowaythingnodesorted_results.csv
Parsing MBR coordinates...

Basic statistics for aerowaythingnodesorted dataset:
Max count: 79139.0
Min count: 0.0
Mean count: 1260.61
Median count: 7.00
Total samples: 15843

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 12674

Training with sample size: 12674
Memory usage: 1513.32 MB
Performing grid search for optimal parameters...


Grid search complete in 34.78s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 841.17 KB

Results for aerowaythingnodesorted, Sample Size: 12674
Grid Search Time: 34.78s, Training Time: 0.01s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.9534, MAE = 348.01, MAPE = 191.77%
q-score: 4.26
Prediction time: 315.4197 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 10000
Memory usage: 1513.32 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 668.85 KB

Results for aerowaythingnodesorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.9516, MAE = 367.51, MAPE = 214.91%
q-score: 4.64
Prediction time: 232.6608 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1513.33 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 335.54 KB

Results for aerowaythingnodesorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.9309, MAE = 434.03, MAPE = 272.27%
q-score: 7.10
Prediction time: 255.7081 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1513.33 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 38.83 KB

Results for aerowaythingnodesorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.7569, MAE = 749.69, MAPE = 467.38%
q-score: 9.82
Prediction time: 112.7730 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for aerowaythingnodesorted...
Memory usage: 1513.33 MB

Processing dataset: leisurewaysorted
Universe boundaries for leisurewaysorted: (-179.8728244, -89.6957847, 179.8091866, 81.0280175)
Loading data from ../large_files/resultsContains/leisurewaysorted_results.csv


Parsing MBR coordinates...



Basic statistics for leisurewaysorted dataset:
Max count: 29382686.0
Min count: 0.0
Mean count: 489264.44
Median count: 253.00
Total samples: 5000000

Calculating rectangle densities...
Splitting data into train and test sets...


Training set size: 4000000

Training with sample size: 4000000
Memory usage: 1830.09 MB
Performing grid search for optimal parameters...


Grid search complete in 30509.98s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 281945.02 KB

Results for leisurewaysorted, Sample Size: 4000000
Grid Search Time: 30509.98s, Training Time: 6.87s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9969, MAE = 24634.17, MAPE = 288.70%
q-score: 5.42
Prediction time: 825.0542 μs/sample
I/O: Reads=0.000113, Writes=0.000052
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 1000000
Memory usage: 2098.33 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 70230.46 KB

Results for leisurewaysorted, Sample Size: 1000000
Grid Search Time: 0.00s, Training Time: 1.01s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9929, MAE = 37573.05, MAPE = 533.31%
q-score: 8.94
Prediction time: 854.4114 μs/sample
I/O: Reads=0.000871, Writes=0.000130
--------------------------------------------------------------------------------

Training with sample size: 500000
Memory usage: 2098.45 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 35000.79 KB

Results for leisurewaysorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 0.38s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9895, MAE = 46886.59, MAPE = 742.33%
q-score: 11.88
Prediction time: 841.9944 μs/sample
I/O: Reads=0.000029, Writes=0.000037
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 2098.45 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6758.75 KB

Results for leisurewaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.05s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9706, MAE = 79968.08, MAPE = 2042.34%
q-score: 30.89
Prediction time: 829.5611 μs/sample
I/O: Reads=0.000152, Writes=0.000104
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 2098.45 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3352.39 KB

Results for leisurewaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.03s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9547, MAE = 102333.27, MAPE = 3076.81%
q-score: 46.31
Prediction time: 734.6785 μs/sample
I/O: Reads=0.000028, Writes=0.000027
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 2098.45 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 674.29 KB

Results for leisurewaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.01s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.8819, MAE = 172479.00, MAPE = 8219.90%
q-score: 123.51
Prediction time: 487.1672 μs/sample
I/O: Reads=0.000017, Writes=0.000025
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 2098.45 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 338.60 KB

Results for leisurewaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.8535, MAE = 208481.28, MAPE = 10429.67%
q-score: 155.64
Prediction time: 421.7404 μs/sample
I/O: Reads=0.000013, Writes=0.000024
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 2098.45 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 39.17 KB

Results for leisurewaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.6306, MAE = 327598.58, MAPE = 25319.28%
q-score: 353.89
Prediction time: 272.5177 μs/sample
I/O: Reads=0.000011, Writes=0.000014
--------------------------------------------------------------------------------
Saving results for leisurewaysorted...
Memory usage: 1938.23 MB

Processing dataset: areawater
Universe boundaries for areawater: (-179.231086, -14.601813, 179.859681, 71.441059)
Loading data from ../large_files/resultsContains/areawater_results.csv


Parsing MBR coordinates...



Basic statistics for areawater dataset:
Max count: 2292728.0
Min count: 0.0
Mean count: 43929.12
Median count: 0.00
Total samples: 458552

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 366841

Training with sample size: 366841
Memory usage: 1787.63 MB
Performing grid search for optimal parameters...


Grid search complete in 2381.28s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 24556.38 KB

Results for areawater, Sample Size: 366841
Grid Search Time: 2381.28s, Training Time: 0.25s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9862, MAE = 4802.28, MAPE = 141.85%
q-score: 7.99
Prediction time: 514.5995 μs/sample
I/O: Reads=0.000000, Writes=0.000026
--------------------------------------------------------------------------------
Generating prediction scatter plot...
Generating side-by-side comparison plot...



Training with sample size: 100000
Memory usage: 1786.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6613.97 KB

Results for areawater, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.06s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9701, MAE = 7489.19, MAPE = 354.25%
q-score: 17.15
Prediction time: 636.2443 μs/sample
I/O: Reads=0.000062, Writes=0.000043
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 1786.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3280.74 KB

Results for areawater, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.02s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9542, MAE = 9462.18, MAPE = 652.32%
q-score: 32.40
Prediction time: 586.3871 μs/sample
I/O: Reads=0.000000, Writes=0.000027
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 1786.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 660.35 KB

Results for areawater, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.8882, MAE = 15727.10, MAPE = 2640.36%
q-score: 122.16
Prediction time: 433.5300 μs/sample
I/O: Reads=0.000000, Writes=0.000198
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1786.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 330.24 KB

Results for areawater, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.8388, MAE = 19198.33, MAPE = 3666.74%
q-score: 164.35
Prediction time: 366.3478 μs/sample
I/O: Reads=0.000000, Writes=0.000015
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1786.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 37.73 KB

Results for areawater, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.6167, MAE = 30946.96, MAPE = 6521.14%
q-score: 317.44
Prediction time: 238.5888 μs/sample
I/O: Reads=0.000062, Writes=0.000088
--------------------------------------------------------------------------------
Saving results for areawater...


Memory usage: 1786.66 MB

Processing dataset: yago2
Universe boundaries for yago2: (-179.98473, -90.0, 180.0, 90.0)
Loading data from ../large_files/resultsContains/yago2_results.csv


Parsing MBR coordinates...



Basic statistics for yago2 dataset:
Max count: 4494666.0
Min count: 0.0
Mean count: 79087.58
Median count: 678.00
Total samples: 898942

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 719153

Training with sample size: 719153
Memory usage: 1787.65 MB
Performing grid search for optimal parameters...


Grid search complete in 5497.79s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 49653.32 KB

Results for yago2, Sample Size: 719153
Grid Search Time: 5497.79s, Training Time: 0.60s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9968, MAE = 4848.33, MAPE = 327.19%
q-score: 4.63
Prediction time: 545.3638 μs/sample
I/O: Reads=0.000032, Writes=0.000021
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 500000
Memory usage: 1786.66 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 35157.62 KB

Results for yago2, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 0.37s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9957, MAE = 5502.60, MAPE = 401.61%
q-score: 5.45
Prediction time: 786.9995 μs/sample
I/O: Reads=0.000032, Writes=0.000674
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 1786.67 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6791.95 KB

Results for yago2, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.06s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9883, MAE = 9362.72, MAPE = 967.00%
q-score: 11.68
Prediction time: 826.4363 μs/sample
I/O: Reads=0.000032, Writes=0.000476
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 1786.67 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3369.42 KB

Results for yago2, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.02s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9826, MAE = 11758.65, MAPE = 1426.74%
q-score: 16.74
Prediction time: 754.4817 μs/sample
I/O: Reads=0.000032, Writes=0.000112
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 1786.67 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 677.32 KB

Results for yago2, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9530, MAE = 19346.90, MAPE = 3612.58%
q-score: 40.92
Prediction time: 489.5610 μs/sample
I/O: Reads=0.000032, Writes=0.000101
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1786.67 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 340.25 KB

Results for yago2, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9274, MAE = 24845.03, MAPE = 5351.34%
q-score: 59.99
Prediction time: 435.1939 μs/sample
I/O: Reads=0.000000, Writes=0.000065
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1786.67 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 39.58 KB

Results for yago2, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.7613, MAE = 42524.27, MAPE = 12638.47%
q-score: 140.10
Prediction time: 280.9233 μs/sample
I/O: Reads=0.000032, Writes=0.000011
--------------------------------------------------------------------------------
Saving results for yago2...


Memory usage: 1786.67 MB

Processing dataset: powerthingnodesorted
Universe boundaries for powerthingnodesorted: (-177.92741900000001, -77.8453164, 178.47197400000002, 78.2256315)
Loading data from ../large_files/resultsContains/powerthingnodesorted_results.csv


Parsing MBR coordinates...



Basic statistics for powerthingnodesorted dataset:
Max count: 10512575.0
Min count: 0.0
Mean count: 174964.97
Median count: 41.00
Total samples: 2102514

Calculating rectangle densities...
Splitting data into train and test sets...


Training set size: 1682011

Training with sample size: 1682011
Memory usage: 1788.67 MB
Performing grid search for optimal parameters...


Grid search complete in 11803.78s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 114970.86 KB

Results for powerthingnodesorted, Sample Size: 1682011
Grid Search Time: 11803.78s, Training Time: 1.93s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9963, MAE = 10245.40, MAPE = 777.38%
q-score: 14.53
Prediction time: 469.0636 μs/sample
I/O: Reads=0.000014, Writes=0.000017
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 1000000
Memory usage: 1786.93 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 70104.23 KB

Results for powerthingnodesorted, Sample Size: 1000000
Grid Search Time: 0.00s, Training Time: 0.99s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9949, MAE = 12101.10, MAPE = 967.59%
q-score: 17.78
Prediction time: 924.0420 μs/sample
I/O: Reads=0.000027, Writes=0.000056
--------------------------------------------------------------------------------

Training with sample size: 500000
Memory usage: 1794.89 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 34938.56 KB

Results for powerthingnodesorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 0.39s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9923, MAE = 14997.00, MAPE = 1306.54%
q-score: 23.55
Prediction time: 816.0680 μs/sample
I/O: Reads=0.000027, Writes=0.000037
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 1794.89 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6747.15 KB

Results for powerthingnodesorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.06s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9804, MAE = 25522.33, MAPE = 4229.49%
q-score: 74.16
Prediction time: 771.2600 μs/sample
I/O: Reads=0.000027, Writes=0.000035
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 1794.89 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3346.68 KB

Results for powerthingnodesorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.02s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9718, MAE = 31693.10, MAPE = 6436.13%
q-score: 111.95
Prediction time: 724.0337 μs/sample
I/O: Reads=0.000027, Writes=0.000030
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 1794.89 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 673.19 KB

Results for powerthingnodesorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.9159, MAE = 55757.37, MAPE = 14118.51%
q-score: 242.83
Prediction time: 517.9273 μs/sample
I/O: Reads=0.000028, Writes=0.000044
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1794.89 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 337.86 KB

Results for powerthingnodesorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.8748, MAE = 70494.08, MAPE = 19852.77%
q-score: 341.02
Prediction time: 443.0718 μs/sample
I/O: Reads=0.000014, Writes=0.000030
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1794.89 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 39.05 KB

Results for powerthingnodesorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 10, 'weights': 'distance'}
Performance: R² = 0.6561, MAE = 112378.87, MAPE = 33959.16%
q-score: 582.17
Prediction time: 266.7445 μs/sample
I/O: Reads=0.000014, Writes=0.000015
--------------------------------------------------------------------------------
Saving results for powerthingnodesorted...


Memory usage: 1794.89 MB

Processing dataset: emergencythingwaysorted
Universe boundaries for emergencythingwaysorted: (-175.221337, -53.7941359, 179.3313189, 78.22019230000001)
Loading data from ../large_files/resultsContains/emergencythingwaysorted_results.csv


Parsing MBR coordinates...



Basic statistics for emergencythingwaysorted dataset:
Max count: 807533.0
Min count: 0.0
Mean count: 13253.75
Median count: 15.00
Total samples: 161514

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 129211

Training with sample size: 129211
Memory usage: 1796.88 MB
Performing grid search for optimal parameters...


Grid search complete in 670.88s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 8894.09 KB

Results for emergencythingwaysorted, Sample Size: 129211
Grid Search Time: 670.88s, Training Time: 0.08s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9619, MAE = 2495.41, MAPE = 189.71%
q-score: 4.29
Prediction time: 669.9303 μs/sample
I/O: Reads=0.000000, Writes=0.000170
--------------------------------------------------------------------------------
Generating prediction scatter plot...
Generating side-by-side comparison plot...



Training with sample size: 100000
Memory usage: 1795.87 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6709.68 KB

Results for emergencythingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.06s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9576, MAE = 2684.03, MAPE = 220.77%
q-score: 4.84
Prediction time: 711.3742 μs/sample
I/O: Reads=0.000176, Writes=0.000731
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 1795.88 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3327.08 KB

Results for emergencythingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.02s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9464, MAE = 3168.98, MAPE = 271.17%
q-score: 5.81
Prediction time: 637.7534 μs/sample
I/O: Reads=0.000000, Writes=0.000570
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 1795.88 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 669.42 KB

Results for emergencythingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.8885, MAE = 5210.46, MAPE = 561.54%
q-score: 10.47
Prediction time: 411.7407 μs/sample
I/O: Reads=0.000000, Writes=0.000046
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1795.88 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 336.07 KB

Results for emergencythingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.8326, MAE = 6525.92, MAPE = 856.58%
q-score: 15.74
Prediction time: 352.1806 μs/sample
I/O: Reads=0.000000, Writes=0.000006
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1795.88 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 38.78 KB

Results for emergencythingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.6277, MAE = 9957.38, MAPE = 1833.40%
q-score: 32.43
Prediction time: 207.4622 μs/sample
I/O: Reads=0.000000, Writes=0.000350
--------------------------------------------------------------------------------
Saving results for emergencythingwaysorted...


Memory usage: 1795.88 MB

Processing dataset: historicthingwaysorted
Universe boundaries for historicthingwaysorted: (-179.99526020000002, -85.0036942, 179.99597930000002, 78.06750650000001)
Loading data from ../large_files/resultsContains/historicthingwaysorted_results.csv


Parsing MBR coordinates...



Basic statistics for historicthingwaysorted dataset:
Max count: 1792176.0
Min count: 0.0
Mean count: 29764.89
Median count: 16.00
Total samples: 358439

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 286751

Training with sample size: 286751
Memory usage: 1796.89 MB
Performing grid search for optimal parameters...


Grid search complete in 1736.79s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 19703.56 KB

Results for historicthingwaysorted, Sample Size: 286751
Grid Search Time: 1736.79s, Training Time: 0.20s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9803, MAE = 3615.24, MAPE = 115.67%
q-score: 3.09
Prediction time: 295.7983 μs/sample
I/O: Reads=0.000000, Writes=0.000013
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 100000
Memory usage: 1795.89 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6711.67 KB

Results for historicthingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.05s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9626, MAE = 5238.34, MAPE = 195.11%
q-score: 4.46
Prediction time: 275.9145 μs/sample
I/O: Reads=0.000080, Writes=0.000010
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 1795.90 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3329.32 KB

Results for historicthingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.02s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9499, MAE = 6388.50, MAPE = 241.29%
q-score: 5.26
Prediction time: 252.4569 μs/sample
I/O: Reads=0.000000, Writes=0.000584
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 1795.90 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 669.79 KB

Results for historicthingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.8852, MAE = 10507.29, MAPE = 492.21%
q-score: 9.40
Prediction time: 165.6234 μs/sample
I/O: Reads=0.000000, Writes=0.000006
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1795.90 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 336.11 KB

Results for historicthingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.8299, MAE = 13301.86, MAPE = 653.64%
q-score: 12.10
Prediction time: 138.1393 μs/sample
I/O: Reads=0.000000, Writes=0.000004
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1795.90 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 38.76 KB

Results for historicthingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.6036, MAE = 22211.12, MAPE = 2328.74%
q-score: 39.35
Prediction time: 98.4103 μs/sample
I/O: Reads=0.000000, Writes=0.000152
--------------------------------------------------------------------------------
Saving results for historicthingwaysorted...


Memory usage: 1795.90 MB

Processing dataset: aerowaythingwaysorted
Universe boundaries for aerowaythingwaysorted: (-179.88131460000002, -79.7773063, 179.426138, 85.05258450000001)
Loading data from ../large_files/resultsContains/aerowaythingwaysorted_results.csv


Parsing MBR coordinates...



Basic statistics for aerowaythingwaysorted dataset:
Max count: 1841551.0
Min count: 0.0
Mean count: 32184.18
Median count: 226.00
Total samples: 368365

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 294692

Training with sample size: 294692
Memory usage: 1798.88 MB
Performing grid search for optimal parameters...


Grid search complete in 1877.78s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 20326.62 KB

Results for aerowaythingwaysorted, Sample Size: 294692
Grid Search Time: 1877.78s, Training Time: 0.20s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9913, MAE = 3075.69, MAPE = 218.30%
q-score: 4.26
Prediction time: 296.0994 μs/sample
I/O: Reads=0.000000, Writes=0.000012
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 100000
Memory usage: 1797.90 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 6749.24 KB

Results for aerowaythingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 0.06s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9843, MAE = 4322.98, MAPE = 330.82%
q-score: 6.01
Prediction time: 289.6342 μs/sample
I/O: Reads=0.000077, Writes=0.000657
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 1797.91 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 3348.22 KB

Results for aerowaythingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 0.03s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9757, MAE = 5381.94, MAPE = 416.40%
q-score: 7.16
Prediction time: 253.6450 μs/sample
I/O: Reads=0.000000, Writes=0.000029
--------------------------------------------------------------------------------



Training with sample size: 10000
Memory usage: 1797.91 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 673.65 KB

Results for aerowaythingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9474, MAE = 8642.04, MAPE = 956.67%
q-score: 15.71
Prediction time: 170.3300 μs/sample
I/O: Reads=0.000000, Writes=0.000005
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1797.91 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 338.08 KB

Results for aerowaythingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.9251, MAE = 10560.44, MAPE = 1148.64%
q-score: 18.28
Prediction time: 141.9709 μs/sample
I/O: Reads=0.000000, Writes=0.000007
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1797.91 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 39.12 KB

Results for aerowaythingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Performance: R² = 0.7985, MAE = 16718.79, MAPE = 2107.03%
q-score: 32.05
Prediction time: 83.7863 μs/sample
I/O: Reads=0.000000, Writes=0.000011
--------------------------------------------------------------------------------
Saving results for aerowaythingwaysorted...


Memory usage: 1797.91 MB

Processing dataset: arealm
Universe boundaries for arealm: (-179.147236, -14.548699, 179.77847, 71.359879)
Loading data from ../large_files/resultsContains/arealm_results.csv
Parsing MBR coordinates...



Basic statistics for arealm dataset:
Max count: 129097.0
Min count: 0.0
Mean count: 2282.26
Median count: 0.00
Total samples: 25833

Calculating rectangle densities...
Splitting data into train and test sets...
Training set size: 20666

Training with sample size: 20666
Memory usage: 1797.02 MB
Performing grid search for optimal parameters...


Grid search complete in 61.91s
Best parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 1360.55 KB

Results for arealm, Sample Size: 20666
Grid Search Time: 61.91s, Training Time: 0.01s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.9227, MAE = 604.02, MAPE = 340.23%
q-score: 22.15
Prediction time: 137.9439 μs/sample
I/O: Reads=0.000000, Writes=0.005051
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating side-by-side comparison plot...



Training with sample size: 10000
Memory usage: 1797.01 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 659.63 KB

Results for arealm, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.8975, MAE = 755.80, MAPE = 213.83%
q-score: 11.16
Prediction time: 137.4708 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 1797.02 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 329.99 KB

Results for arealm, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.8804, MAE = 866.50, MAPE = 661.67%
q-score: 41.20
Prediction time: 121.6070 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 1797.02 MB
Using best parameters from max scale: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Training KNN model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 37.75 KB

Results for arealm, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.00s
KNN Parameters: {'algorithm': 'auto', 'n_neighbors': 5, 'weights': 'distance'}
Performance: R² = 0.6014, MAE = 1641.08, MAPE = 2294.12%
q-score: 150.79
Prediction time: 65.7581 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for arealm...


Saving combined results...
All processing completed and results saved.
Memory usage: 1797.02 MB
