In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from time import process_time
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
import psutil
import os
import re
import multiprocessing
import joblib
import gc
from tqdm.notebook import tqdm  # Progress bars for Jupyter

# Configuration options
VISUALIZE_OBJECTS = False  # Set to True if you want to visualize objects
SAVE_INTERMEDIATE_MODELS = True  # Set to False to save only final models

# Flag to control whether to use multiple scales or only the maximum scale
use_multiple_scales = True  # Set to False to use only maximum scale, True for all scales

# Get available CPU cores and set appropriate parallelism
n_cores = multiprocessing.cpu_count()
n_jobs = max(1, n_cores - 1)  # Leave one core free for system processes
print(f"Using {n_jobs} of {n_cores} available CPU cores")

def monitor_memory():
    """Print current memory usage of the process"""
    process = psutil.Process()
    memory_mb = process.memory_info().rss / (1024 * 1024)
    print(f"Memory usage: {memory_mb:.2f} MB")

def MAPE(actual_values, predicted_values):
    """Calculate Mean Absolute Percentage Error with special handling for zeros"""
    # Vectorized implementation
    actual_flat = actual_values.flatten()
    pred_flat = predicted_values.flatten()
    
    # Create mask for non-zero actual values
    non_zero_mask = actual_flat != 0
    zero_mask = ~non_zero_mask
    
    # Calculate MAPE for non-zero elements
    mape_sum = 0
    count = len(actual_flat)
    
    if np.any(non_zero_mask):
        mape_sum += np.sum(np.abs((actual_flat[non_zero_mask] - pred_flat[non_zero_mask]) / actual_flat[non_zero_mask]))
    
    if np.any(zero_mask):
        mape_sum += np.sum(np.abs(actual_flat[zero_mask] - pred_flat[zero_mask]) / 100)
    
    return mape_sum / count

# Load spatial statistics to get universe boundaries for each dataset
print("Loading spatial statistics...")
spatial_stats = pd.read_csv('../spatial_statistics.csv')

# Directory containing the datasets
data_dir = '../large_files/resultsDistance/'  # Changed to distance folder

# Parse bounding box information
def parse_bbox(bbox_str):
    # Extract coordinates from BOX string using regex
    pattern = r"BOX\(([-\d\.]+) ([-\d\.]+),([-\d\.]+) ([-\d\.]+)\)"
    match = re.search(pattern, bbox_str)
    if match:
        xmin = float(match.group(1))
        ymin = float(match.group(2))
        xmax = float(match.group(3))
        ymax = float(match.group(4))
        return xmin, ymin, xmax, ymax
    return -180, -90, 180, 90  # Default if parsing fails

# Parse object MBR from format like "(x1, y1, x2, y2)"
def parse_mbr(mbr_str):
    coords = mbr_str.strip('"()').split(', ')
    return [float(coord) for coord in coords]

# Extract universe boundaries for each dataset
universe_boundaries = {}
for _, row in spatial_stats.iterrows():
    table_name = row['Table Name']
    bbox = parse_bbox(row['Universe Limits (Bounding Box)'])
    universe_boundaries[table_name] = bbox

# Get list of all CSV files in the directory
print("Finding dataset files...")
csv_files = [f for f in os.listdir(data_dir) if f.endswith('.csv')]
print(f"Found {len(csv_files)} datasets to process")

# Define the scales of learning
scales = [1000, 5000, 10000, 50000, 100000, 500000, 1000000]

# Create necessary directories
os.makedirs('../large_files/LearnedModels/distance/RF', exist_ok=True)
os.makedirs('../large_files/LearnedModels/distance/RF/visualizations', exist_ok=True)
os.makedirs('../large_files/LearnedModels/distance/RF/results', exist_ok=True)

# Lists to store all results
all_results_list = []

# Process each dataset
for csv_file in tqdm(csv_files, desc="Processing datasets"):
    # Force garbage collection at the start of each dataset
    gc.collect()
    monitor_memory()
    
    # Extract dataset name (remove "_results.csv")
    dataset_name = csv_file.replace('_results.csv', '')
    
    print(f"\nProcessing dataset: {dataset_name}")
    
    # Get universe boundaries for this dataset
    if dataset_name in universe_boundaries:
        univ_xmin, univ_ymin, univ_xmax, univ_ymax = universe_boundaries[dataset_name]
    else:
        # Default values if dataset not found in spatial stats
        univ_xmin, univ_ymin, univ_xmax, univ_ymax = -180, -90, 180, 90
    
    Surface_univ = (univ_xmax - univ_xmin) * (univ_ymax - univ_ymin)
    print(f"Universe boundaries for {dataset_name}: ({univ_xmin}, {univ_ymin}, {univ_xmax}, {univ_ymax})")
    
    # Load dataset - loading required columns for distance dataset
    data_path = os.path.join(data_dir, csv_file)
    print(f"Loading data from {data_path}")
    data = pd.read_csv(data_path, usecols=['Object MBR', 'Distance Min', 'Distance Max', 'Count MBR'])
    
    # Extract object information - ONLY using MBR data
    print("Parsing object coordinates...")
    Objects_MBR = np.array([parse_mbr(mbr) for mbr in data['Object MBR']])
    
    # Extract distance information
    Distance_Min = data['Distance Min'].values.reshape(-1, 1)
    Distance_Max = data['Distance Max'].values.reshape(-1, 1)
    
    # Target variable: Count MBR
    Y = data[['Count MBR']].values
    
    # Free up memory
    del data
    gc.collect()
    
    # Calculate basic statistics
    max_count = float(np.max(Y))
    min_count = float(np.min(Y))
    mean_count = float(np.mean(Y))
    median_count = float(np.median(Y))
    total_samples = len(Y)

    # Display basic statistics for the dataset
    print(f"\nBasic statistics for {dataset_name} dataset:")
    print(f"Max count: {max_count}")
    print(f"Min count: {min_count}")
    print(f"Mean count: {mean_count:.2f}")
    print(f"Median count: {median_count:.2f}")
    print(f"Total samples: {total_samples}\n")

    # Prepare features using Object MBR instead of True Shape
    print("Calculating object features...")

    # Extract MBR coordinates
    x1 = Objects_MBR[:, 0].reshape(-1, 1)  # Left
    y1 = Objects_MBR[:, 1].reshape(-1, 1)  # Bottom
    x2 = Objects_MBR[:, 2].reshape(-1, 1)  # Right
    y2 = Objects_MBR[:, 3].reshape(-1, 1)  # Top

    # Calculate MBR center points
    obj_x = (x1 + x2) / 2  # Center X
    obj_y = (y1 + y2) / 2  # Center Y

    # Calculate MBR dimensions
    mbr_width = (x2 - x1)
    mbr_height = (y2 - y1)
    mbr_area = mbr_width * mbr_height

    # Normalized coordinates of MBR center (0-1 range within universe)
    norm_x = (obj_x - univ_xmin) / (univ_xmax - univ_xmin) if (univ_xmax - univ_xmin) != 0 else 0.5
    norm_y = (obj_y - univ_ymin) / (univ_ymax - univ_ymin) if (univ_ymax - univ_ymin) != 0 else 0.5

    # Distance range
    distance_range = Distance_Max - Distance_Min

    # Distance ratio (max/min)
    # Avoid division by zero
    min_non_zero = np.where(Distance_Min == 0, 0.0001, Distance_Min)
    distance_ratio = Distance_Max / min_non_zero
    
    # Combine all features
    X = np.hstack((
        obj_x,           # X coordinate of MBR center
        obj_y,           # Y coordinate of MBR center
        mbr_width,       # Width of MBR
        mbr_height,      # Height of MBR
        mbr_area,        # Area of MBR
        norm_x,          # Normalized X position (0-1)
        norm_y,          # Normalized Y position (0-1)
        Distance_Min,    # Minimum distance
        Distance_Max,    # Maximum distance
        distance_range,  # Range of distance
        distance_ratio.reshape(-1, 1)  # Ratio of max/min distance
    ))
    
    # Split the data into 80% train and 20% test
    print("Splitting data into train and test sets...")
    X_train, X_test_all, y_train, y_test_all = train_test_split(X, Y, test_size=0.2, random_state=3)
    
    # Visualize the first 100 objects using MBRs
    if VISUALIZE_OBJECTS:
        print("Visualizing objects sample...")
        plt.figure(figsize=(10, 8))
        
        # Only visualize a sample to save time
        sample_size = min(100, len(Objects_MBR))
        
        # Plot universe boundaries
        plt.plot([univ_xmin, univ_xmax, univ_xmax, univ_xmin, univ_xmin], 
                 [univ_ymin, univ_ymin, univ_ymax, univ_ymax, univ_ymin], 
                 'k-', linewidth=1, alpha=0.5)
        
        # Plot MBRs
        for i in range(sample_size):
            x1, y1, x2, y2 = Objects_MBR[i]
            rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, 
                                    linewidth=1, edgecolor='b', facecolor='none', 
                                    alpha=min(1.0, 0.3 + float(Distance_Min[i])/20))
            plt.gca().add_patch(rect)
            
        plt.colorbar(plt.cm.ScalarMappable(cmap='viridis'), label='Minimum Distance')
        plt.xlim(univ_xmin-20, univ_xmax+20)
        plt.ylim(univ_ymin-10, univ_ymax+10)
        plt.title(f"Sample MBRs from {dataset_name}")
        plt.savefig(f"../large_files/LearnedModels/distance/RF/visualizations/{dataset_name}_objects_mbr.png", dpi=150)
        plt.close()
    
    # Adjust scales to the dataset size
    max_size = len(X_train)
    print(f"Training set size: {max_size}")

    if use_multiple_scales:
        # Use multiple scales as before
        adjusted_scales = [s for s in scales if s <= max_size]
        
        # Add intermediate 1 million increments for large datasets
        if max_size > 1000000:
            million_increments = list(range(2000000, max_size, 1000000))
            adjusted_scales.extend(million_increments)
            
        # Add the actual max size if it's not already in the list
        if max_size not in adjusted_scales:
            adjusted_scales.append(max_size)
            
        # Sort the scales to ensure they're in ascending order
        adjusted_scales.sort()
    else:
        # Use only the maximum scale
        adjusted_scales = [max_size]

    # List to store dataset-specific results
    dataset_results_list = []

    # Store best parameters from max scale training to reuse
    best_params = None
    
    # Process scales in reversed order (largest first)
    for sample_size in reversed(adjusted_scales):
        print(f"\nTraining with sample size: {sample_size}")
        monitor_memory()
        
        # Create training subset
        X_train_sample = X_train[:sample_size, :]
        y_train_sample = y_train[:sample_size]
        
        # Random Forest Regressor parameters - optimized for performance
        params_rf = {
            "n_estimators": [50, 100, 200],
            "max_depth": [10, 20, 30, None],
            "min_samples_split": [2, 5, 10]
        }
        
        # For very large datasets, use smaller parameter grid
        if sample_size > 100000:
            params_rf = {
                "n_estimators": [50],
                "max_depth": [None],
                "min_samples_split": [5]
            }
            
        # Only do GridSearch for the max scale
        if sample_size == max_size or best_params is None:
            print("Performing grid search for optimal parameters...")
            # Use a smaller max_features value to reduce memory usage
            rf = RandomForestRegressor(random_state=3, max_features='sqrt', n_jobs=n_jobs)
            rf_cv = GridSearchCV(rf, params_rf, cv=3, n_jobs=1, verbose=1)  # Use n_jobs=1 here as RF already uses parallelism
            
            # Time the grid search
            t1_start = process_time()
            rf_cv.fit(X_train_sample, y_train_sample.ravel())  # Use ravel for 1D array
            t1_stop = process_time()
            grid_search_time = t1_stop - t1_start
            
            # Store best parameters for reuse
            best_params = rf_cv.best_params_
            print(f"Grid search complete in {grid_search_time:.2f}s")
            print(f"Best parameters: {best_params}")
        else:
            # Skip grid search for smaller scales, use params from max scale
            rf_cv = None
            grid_search_time = 0
            print(f"Using best parameters from max scale: {best_params}")
        
        # Train the model with best parameters
        print("Training random forest model...")
        rf = RandomForestRegressor(random_state=3, **best_params, n_jobs=n_jobs)
        t2_start = process_time()
        rf.fit(X_train_sample, y_train_sample.ravel())  # Use ravel for 1D array
        t2_stop = process_time()
        training_time = t2_stop - t2_start
        
        # Make predictions
        print("Making predictions...")
        y_pred = rf.predict(X_test_all).reshape(-1, 1)  # Reshape to match y_test_all format
        
        # Calculate metrics
        r2_score = rf.score(X_test_all, y_test_all.ravel())
        mae_value = MAE(y_test_all, y_pred)
        mape_value = MAPE(y_test_all, y_pred)
        
        # Calculate q-score - vectorized version
        print("Calculating performance metrics...")
        
        # Vectorized q-score calculation
        y_true_flat = y_test_all.flatten()
        y_pred_flat = y_pred.flatten() if y_pred.ndim > 1 else y_pred
        
        # Find indices where both values are non-zero
        valid_indices = (y_true_flat != 0) & (y_pred_flat != 0)
        
        if np.any(valid_indices):
            ratios = np.maximum(
                y_pred_flat[valid_indices] / y_true_flat[valid_indices],
                y_true_flat[valid_indices] / y_pred_flat[valid_indices]
            )
            q_score_mean = np.mean(ratios)
        else:
            q_score_mean = 0
        
        # Time prediction performance (10 iterations)
        print("Measuring prediction performance...")
        total_duration = 0
        total_read = 0
        total_write = 0
        
        for _ in range(10):
            io_before = psutil.disk_io_counters()
            t3_start = process_time()
            preds = rf.predict(X_test_all)
            preds = np.maximum(0, preds)  # Include this operation in timing
            t3_stop = process_time()
            io_after = psutil.disk_io_counters()
            
            total_duration += (t3_stop - t3_start)
            total_read += io_after.read_count - io_before.read_count
            total_write += io_after.write_count - io_before.write_count
        
        avg_pred_time_microsec = (total_duration / 10) / len(y_pred) * 1000000
        avg_reads = total_read / 10 / len(y_pred)
        avg_writes = total_write / 10 / len(y_pred)
        
        # Save the model using joblib instead of pickle for better efficiency
        if SAVE_INTERMEDIATE_MODELS or sample_size == max_size:
            print("Saving model...")
            filename = f'../large_files/LearnedModels/distance/RF/{dataset_name}_rf_{sample_size}_{training_time:.2f}s_{mape_value:.2%}_{mae_value:.2f}.joblib'
            joblib.dump(rf, filename, compress=3)
            # Get model file size in KB
            model_size_kb = os.path.getsize(filename) / 1024
            print(f"Model size: {model_size_kb:.2f} KB")
        else:
            model_size_kb = 0  # Set to 0 if model wasn't saved
        
        # Print results
        print(f"\nResults for {dataset_name}, Sample Size: {sample_size}")
        print(f"Grid Search Time: {grid_search_time:.2f}s, Training Time: {training_time:.2f}s")
        print(f"Random Forest Parameters: {best_params}")
        print(f"Performance: R² = {r2_score:.4f}, MAE = {mae_value:.2f}, MAPE = {mape_value:.2%}")
        print(f"q-score: {q_score_mean:.2f}")
        print(f"Prediction time: {avg_pred_time_microsec:.4f} μs/sample")
        print(f"I/O: Reads={avg_reads:.6f}, Writes={avg_writes:.6f}")
        print("-" * 80)
        
        # Plot actual vs predicted only for the maximum scale
        if sample_size == adjusted_scales[-1]:  # Check if this is the maximum scale
            print("Generating prediction scatter plot...")
            plt.figure(figsize=(10, 8))
            plt.scatter(y_test_all, y_pred, s=0.5, alpha=0.5)
            plt.xlabel('True Values')
            plt.ylabel('Predictions')
            plt.title(f"{dataset_name} - Sample Size: {sample_size} (Maximum)")
            plt.grid(True, alpha=0.3)
            
            # Add diagonal line for perfect predictions
            max_val = max(np.max(y_test_all), np.max(y_pred))
            plt.plot([0, max_val], [0, max_val], 'r--', alpha=0.5)
            
            plt.savefig(f"../large_files/LearnedModels/distance/RF/visualizations/{dataset_name}_{sample_size}_prediction.png", dpi=150)
            plt.close()  # Close to free memory
            
            # Feature importance plot
            if hasattr(rf, 'feature_importances_'):
                print("Generating feature importance plot...")
                plt.figure(figsize=(12, 6))
                feature_names = [
                    'Center X', 'Center Y', 'Width', 'Height', 'Area',
                    'Norm X', 'Norm Y', 'Min Dist', 'Max Dist', 
                    'Dist Range', 'Dist Ratio'
                ]
                importances = rf.feature_importances_
                indices = np.argsort(importances)[::-1]
                
                plt.bar(range(X.shape[1]), importances[indices])
                plt.xticks(range(X.shape[1]), [feature_names[i] for i in indices], rotation=45)
                plt.title(f'Feature Importances for {dataset_name}')
                plt.tight_layout()
                plt.savefig(f"../large_files/LearnedModels/distance/RF/visualizations/{dataset_name}_feature_importance.png", dpi=150)
                plt.close()
            
            # Create a scatter plot comparing predicted vs real values for first 100 objects
            print("Generating side-by-side comparison plot...")
            
            # Get predictions for first 100 test samples
            sample_indices = range(min(100, len(X_test_all)))
            X_sample = X_test_all[sample_indices]
            y_sample_true = y_test_all[sample_indices].flatten()
            y_sample_pred = rf.predict(X_sample)
            # Ensure non-negative predictions
            y_sample_pred = np.maximum(0, y_sample_pred)
            
            plt.figure(figsize=(20, 10))
            plt.scatter(range(len(sample_indices)), y_sample_pred, c='blue', 
                        label='Predicted number of objects (Random Forest)', alpha=0.7, s=100)
            plt.scatter(range(len(sample_indices)), y_sample_true, c='green', 
                        label='Real number of objects', alpha=0.7, s=100)
            
            plt.title(f'{dataset_name} - First {len(sample_indices)} Objects: Predicted vs Real Values', fontsize=16)
            plt.xlabel('Object Index', fontsize=14)
            plt.ylabel('Number of objects within distance', fontsize=14)
            plt.legend(fontsize=12)
            plt.grid(True, alpha=0.3)
            plt.tight_layout()
            
            # Save the plot
            plt.savefig(f"../large_files/LearnedModels/distance/RF/visualizations/{dataset_name}_comparison_plot.png", dpi=150)
            plt.close()
        
        # Store results in list (more efficient than DataFrame concat)
        result_row = {
            'Dataset': dataset_name,
            'Sample_Size': sample_size,
            'Training_Time': training_time,
            'Best_Params': str(best_params),
            'R2_Score': r2_score,
            'MAE': mae_value,
            'MAPE': float(mape_value),
            'Q_Score': q_score_mean,
            'Pred_Time_Microseconds': avg_pred_time_microsec,
            'IO_Reads': avg_reads,
            'IO_Writes': avg_writes,
            'Model_Size_KB': model_size_kb,
            'Max_Count': max_count,
            'Min_Count': min_count,
            'Mean_Count': mean_count,
            'Median_Count': median_count,
            'Total_Samples': total_samples
        }
        
        dataset_results_list.append(result_row)
        all_results_list.append(result_row)
        
        # Clean up to free memory
        if sample_size != max_size:  # Don't delete for max size as we might need it
            del X_train_sample, y_train_sample, rf
            gc.collect()
    
    # Save results for this dataset
    print(f"Saving results for {dataset_name}...")
    dataset_results = pd.DataFrame(dataset_results_list)
    dataset_results.to_csv(f'../large_files/LearnedModels/distance/RF/results/{dataset_name}_results.csv', index=False)
    
    # Clear memory before next dataset
    del X_train, X_test_all, y_train, y_test_all, Objects_MBR, Distance_Min, Distance_Max
    gc.collect()
    
# Save all results
print("Saving combined results...")
all_results = pd.DataFrame(all_results_list)
all_results.to_csv('../large_files/LearnedModels/distance/RF/all_results.csv', index=False)

print("All processing completed and results saved.")
monitor_memory()

Using 29 of 30 available CPU cores
Loading spatial statistics...
Finding dataset files...
Found 14 datasets to process


Processing datasets:   0%|          | 0/14 [00:00<?, ?it/s]

Memory usage: 249.42 MB

Processing dataset: craftwaysorted
Universe boundaries for craftwaysorted: (-175.2000514, -65.2458821, 175.3397782, 69.6673353)
Loading data from ../large_files/resultsDistance/craftwaysorted_results.csv
Parsing object coordinates...



Basic statistics for craftwaysorted dataset:
Max count: 100369.0
Min count: 0.0
Mean count: 11615.73
Median count: 3842.00
Total samples: 21822

Calculating object features...
Splitting data into train and test sets...
Training set size: 17457

Training with sample size: 17457
Memory usage: 258.79 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 36 candidates, totalling 108 fits


Grid search complete in 633.60s
Best parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 63614.89 KB

Results for craftwaysorted, Sample Size: 17457
Grid Search Time: 633.60s, Training Time: 53.72s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9199, MAE = 1787.10, MAPE = 182.73%
q-score: 2.88
Prediction time: 189.0582 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 10000
Memory usage: 969.07 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 36830.16 KB

Results for craftwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 26.85s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.8992, MAE = 2088.24, MAPE = 187.63%
q-score: 2.94
Prediction time: 166.6527 μs/sample
I/O: Reads=0.000000, Writes=0.000092
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 523.79 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 18637.83 KB

Results for craftwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 13.83s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.8678, MAE = 2565.39, MAPE = 296.25%
q-score: 4.04
Prediction time: 142.7261 μs/sample
I/O: Reads=0.000000, Writes=0.000183
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 525.95 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 3876.04 KB

Results for craftwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 3.03s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.7665, MAE = 4096.17, MAPE = 620.62%
q-score: 7.32
Prediction time: 101.2341 μs/sample
I/O: Reads=0.000000, Writes=0.000115
--------------------------------------------------------------------------------
Saving results for craftwaysorted...


Memory usage: 525.98 MB

Processing dataset: powerthingwaysorted
Universe boundaries for powerthingwaysorted: (-179.5002188, -75.1012051, 178.4574038, 82.5247908)
Loading data from ../large_files/resultsDistance/powerthingwaysorted_results.csv


Parsing object coordinates...



Basic statistics for powerthingwaysorted dataset:
Max count: 13577236.0
Min count: 0.0
Mean count: 885963.11
Median count: 467788.00
Total samples: 2717289

Calculating object features...


Splitting data into train and test sets...


Training set size: 2173831

Training with sample size: 2173831
Memory usage: 1381.24 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 2563.49s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 1092076.78 KB

Results for powerthingwaysorted, Sample Size: 2173831
Grid Search Time: 2563.49s, Training Time: 2776.33s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9965, MAE = 27360.36, MAPE = 43.35%
q-score: 1.38
Prediction time: 56.5407 μs/sample
I/O: Reads=0.000000, Writes=0.000002
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 2000000
Memory usage: 11471.75 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 1005352.90 KB

Results for powerthingwaysorted, Sample Size: 2000000
Grid Search Time: 0.00s, Training Time: 2661.89s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9963, MAE = 28236.19, MAPE = 47.60%
q-score: 1.42
Prediction time: 55.5024 μs/sample
I/O: Reads=0.000000, Writes=0.000002
--------------------------------------------------------------------------------

Training with sample size: 1000000
Memory usage: 1955.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 505564.41 KB

Results for powerthingwaysorted, Sample Size: 1000000
Grid Search Time: 0.00s, Training Time: 1184.68s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9940, MAE = 35596.87, MAPE = 76.97%
q-score: 1.67
Prediction time: 47.0288 μs/sample
I/O: Reads=0.000000, Writes=0.000003
--------------------------------------------------------------------------------

Training with sample size: 500000
Memory usage: 2957.98 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 254323.41 KB

Results for powerthingwaysorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 545.21s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9905, MAE = 45032.54, MAPE = 104.88%
q-score: 1.93
Prediction time: 37.7854 μs/sample
I/O: Reads=0.000000, Writes=0.000002
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 3096.95 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 51656.26 KB

Results for powerthingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 74.26s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9730, MAE = 76966.73, MAPE = 181.04%
q-score: 2.59
Prediction time: 17.3165 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 3096.95 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 26013.98 KB

Results for powerthingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 33.34s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9615, MAE = 95036.39, MAPE = 236.65%
q-score: 3.07
Prediction time: 11.7265 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 3096.95 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5332.84 KB

Results for powerthingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 6.54s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9109, MAE = 157729.59, MAPE = 525.59%
q-score: 5.84
Prediction time: 8.2260 μs/sample
I/O: Reads=0.000000, Writes=0.001064
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 3098.95 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2705.20 KB

Results for powerthingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 2.73s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8775, MAE = 190214.57, MAPE = 771.05%
q-score: 8.13
Prediction time: 7.1631 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 3098.95 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 564.12 KB

Results for powerthingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.60s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.7672, MAE = 279224.37, MAPE = 1549.40%
q-score: 15.51
Prediction time: 4.7558 μs/sample
I/O: Reads=0.000000, Writes=0.000002
--------------------------------------------------------------------------------
Saving results for powerthingwaysorted...


Memory usage: 2829.44 MB

Processing dataset: barrierthingwaysorted
Universe boundaries for barrierthingwaysorted: (-179.7595238, -70.776382, 179.19591350000002, 78.2501675)
Loading data from ../large_files/resultsDistance/barrierthingwaysorted_results.csv


Parsing object coordinates...



Basic statistics for barrierthingwaysorted dataset:
Max count: 22854431.0
Min count: 0.0
Mean count: 2474469.83
Median count: 1209323.00
Total samples: 4581670

Calculating object features...


Splitting data into train and test sets...


Training set size: 3665336

Training with sample size: 3665336
Memory usage: 3659.36 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 4747.31s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 1872693.45 KB

Results for barrierthingwaysorted, Sample Size: 3665336
Grid Search Time: 4747.31s, Training Time: 5116.37s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9973, MAE = 61976.78, MAPE = 65.46%
q-score: 1.61
Prediction time: 65.3166 μs/sample
I/O: Reads=0.000000, Writes=0.000004
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 3000000
Memory usage: 18718.13 MB


Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 1535317.48 KB

Results for barrierthingwaysorted, Sample Size: 3000000
Grid Search Time: 0.00s, Training Time: 4365.03s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9970, MAE = 65802.97, MAPE = 60.56%
q-score: 1.56
Prediction time: 61.8785 μs/sample
I/O: Reads=0.000006, Writes=0.000003
--------------------------------------------------------------------------------

Training with sample size: 2000000
Memory usage: 5348.31 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 1027180.24 KB

Results for barrierthingwaysorted, Sample Size: 2000000
Grid Search Time: 0.00s, Training Time: 2571.93s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9960, MAE = 75273.63, MAPE = 75.46%
q-score: 1.70
Prediction time: 56.2757 μs/sample
I/O: Reads=0.000000, Writes=0.000003
--------------------------------------------------------------------------------

Training with sample size: 1000000
Memory usage: 4937.55 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 516582.80 KB

Results for barrierthingwaysorted, Sample Size: 1000000
Grid Search Time: 0.00s, Training Time: 1199.79s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9939, MAE = 93956.13, MAPE = 111.98%
q-score: 2.05
Prediction time: 47.5184 μs/sample
I/O: Reads=0.000000, Writes=0.000002
--------------------------------------------------------------------------------

Training with sample size: 500000
Memory usage: 5374.32 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 259651.50 KB

Results for barrierthingwaysorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 534.74s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9903, MAE = 118364.53, MAPE = 173.16%
q-score: 2.61
Prediction time: 38.1970 μs/sample
I/O: Reads=0.000000, Writes=0.000002
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 5421.30 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 52724.78 KB

Results for barrierthingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 80.77s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9714, MAE = 205032.86, MAPE = 395.60%
q-score: 4.57
Prediction time: 17.5143 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 5421.30 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 26519.17 KB

Results for barrierthingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 38.43s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9598, MAE = 252495.71, MAPE = 640.85%
q-score: 6.75
Prediction time: 11.7632 μs/sample
I/O: Reads=0.000000, Writes=0.000002
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 5421.30 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5421.27 KB

Results for barrierthingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 6.68s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9027, MAE = 413817.89, MAPE = 1170.91%
q-score: 11.39
Prediction time: 8.2069 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 5421.30 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2747.18 KB

Results for barrierthingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 3.11s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8822, MAE = 492828.60, MAPE = 1579.77%
q-score: 15.21
Prediction time: 6.9225 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 5421.30 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 576.78 KB

Results for barrierthingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.69s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.7783, MAE = 775412.13, MAPE = 3277.70%
q-score: 30.94
Prediction time: 4.4458 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for barrierthingwaysorted...


Memory usage: 4966.88 MB

Processing dataset: cyclewaythingwaysorted
Universe boundaries for cyclewaythingwaysorted: (-175.2093065, -75.1027861, 176.92582230000002, 71.0488105)
Loading data from ../large_files/resultsDistance/cyclewaythingwaysorted_results.csv


Parsing object coordinates...



Basic statistics for cyclewaythingwaysorted dataset:
Max count: 5317936.0
Min count: 0.0
Mean count: 503879.05
Median count: 222046.00
Total samples: 1067063

Calculating object features...
Splitting data into train and test sets...


Training set size: 853650

Training with sample size: 853650
Memory usage: 4274.68 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 899.37s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 427842.65 KB

Results for cyclewaythingwaysorted, Sample Size: 853650
Grid Search Time: 899.37s, Training Time: 999.18s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9937, MAE = 20025.39, MAPE = 138.45%
q-score: 2.25
Prediction time: 46.7700 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 500000
Memory usage: 6356.19 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 252141.25 KB

Results for cyclewaythingwaysorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 540.45s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9908, MAE = 23988.01, MAPE = 166.61%
q-score: 2.51
Prediction time: 39.2594 μs/sample
I/O: Reads=0.000028, Writes=0.000005
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 4326.27 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 51414.62 KB

Results for cyclewaythingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 77.95s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9759, MAE = 41012.48, MAPE = 330.21%
q-score: 3.89
Prediction time: 18.3745 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 4328.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 25953.19 KB

Results for cyclewaythingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 40.17s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9630, MAE = 51927.62, MAPE = 461.67%
q-score: 5.09
Prediction time: 12.8998 μs/sample
I/O: Reads=0.000000, Writes=0.000007
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 4328.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5323.73 KB

Results for cyclewaythingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 5.96s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9084, MAE = 85196.45, MAPE = 1025.89%
q-score: 9.89
Prediction time: 9.1727 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4328.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2710.78 KB

Results for cyclewaythingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 2.86s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8804, MAE = 103064.82, MAPE = 1363.99%
q-score: 12.97
Prediction time: 7.9441 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 4328.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 567.35 KB

Results for cyclewaythingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.64s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.7891, MAE = 158544.22, MAPE = 2611.77%
q-score: 24.51
Prediction time: 5.3294 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for cyclewaythingwaysorted...


Memory usage: 4328.23 MB

Processing dataset: zcta5
Universe boundaries for zcta5: (-176.684744, -14.373776, 145.830505, 71.341324)
Loading data from ../large_files/resultsDistance/zcta5_results.csv


Parsing object coordinates...

Basic statistics for zcta5 dataset:
Max count: 31899.0
Min count: 0.0
Mean count: 5892.11
Median count: 4178.50
Total samples: 6626

Calculating object features...
Splitting data into train and test sets...
Training set size: 5300

Training with sample size: 5300
Memory usage: 4265.43 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 36 candidates, totalling 108 fits


Grid search complete in 257.47s
Best parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 19370.19 KB

Results for zcta5, Sample Size: 5300
Grid Search Time: 257.47s, Training Time: 15.89s
Random Forest Parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9576, MAE = 601.90, MAPE = 100.33%
q-score: 1.88
Prediction time: 245.0617 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 5000
Memory usage: 4272.89 MB
Using best parameters from max scale: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 18296.95 KB

Results for zcta5, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 15.25s
Random Forest Parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9579, MAE = 608.67, MAPE = 107.42%
q-score: 1.96
Prediction time: 244.4647 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 4272.90 MB
Using best parameters from max scale: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 3822.00 KB

Results for zcta5, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 3.17s
Random Forest Parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9149, MAE = 948.60, MAPE = 204.24%
q-score: 2.87
Prediction time: 188.8030 μs/sample
I/O: Reads=0.000000, Writes=0.000452
--------------------------------------------------------------------------------
Saving results for zcta5...
Memory usage: 4272.90 MB

Processing dataset: aerowaythingnodesorted
Universe boundaries for aerowaythingnodesorted: (-179.88088960000002, -90.0, 179.951004, 83.08333590000001)
Loading data from ../large_files/resultsDistance/aerowaythingnodesorted_results.csv


Parsing object coordinates...

Basic statistics for aerowaythingnodesorted dataset:
Max count: 74280.0
Min count: 0.0
Mean count: 4340.59
Median count: 2002.00
Total samples: 15843

Calculating object features...
Splitting data into train and test sets...
Training set size: 12674

Training with sample size: 12674
Memory usage: 4272.90 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 36 candidates, totalling 108 fits


Grid search complete in 434.95s
Best parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 45670.93 KB

Results for aerowaythingnodesorted, Sample Size: 12674
Grid Search Time: 434.95s, Training Time: 31.01s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9352, MAE = 720.30, MAPE = 137.25%
q-score: 2.43
Prediction time: 189.2304 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 10000
Memory usage: 4278.90 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 36187.45 KB

Results for aerowaythingnodesorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 27.02s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9283, MAE = 761.37, MAPE = 151.99%
q-score: 2.58
Prediction time: 181.5026 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4278.90 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 18294.55 KB

Results for aerowaythingnodesorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 12.41s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9014, MAE = 909.43, MAPE = 215.76%
q-score: 3.23
Prediction time: 159.0201 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 4278.90 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 3793.48 KB

Results for aerowaythingnodesorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 2.83s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.8085, MAE = 1263.73, MAPE = 489.47%
q-score: 5.99
Prediction time: 118.1792 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for aerowaythingnodesorted...


Memory usage: 4278.90 MB

Processing dataset: leisurewaysorted
Universe boundaries for leisurewaysorted: (-179.8728244, -89.6957847, 179.8091866, 81.0280175)
Loading data from ../large_files/resultsDistance/leisurewaysorted_results.csv


Parsing object coordinates...



Basic statistics for leisurewaysorted dataset:
Max count: 29357364.0
Min count: 0.0
Mean count: 2752172.81
Median count: 1238338.00
Total samples: 5876570

Calculating object features...


Splitting data into train and test sets...


Training set size: 4701256

Training with sample size: 4701256
Memory usage: 4681.35 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 6303.06s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 2395613.94 KB

Results for leisurewaysorted, Sample Size: 4701256
Grid Search Time: 6303.06s, Training Time: 6942.31s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9979, MAE = 63250.11, MAPE = 42.03%
q-score: 1.39
Prediction time: 67.5724 μs/sample
I/O: Reads=0.000005, Writes=0.000004
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 4000000
Memory usage: 21844.59 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 2040806.76 KB

Results for leisurewaysorted, Sample Size: 4000000
Grid Search Time: 0.00s, Training Time: 5719.56s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9976, MAE = 66805.86, MAPE = 50.25%
q-score: 1.47
Prediction time: 65.0944 μs/sample
I/O: Reads=0.000005, Writes=0.000004
--------------------------------------------------------------------------------

Training with sample size: 3000000
Memory usage: 6153.56 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 1534110.80 KB

Results for leisurewaysorted, Sample Size: 3000000
Grid Search Time: 0.00s, Training Time: 4209.67s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9971, MAE = 73006.82, MAPE = 56.58%
q-score: 1.53
Prediction time: 60.6853 μs/sample
I/O: Reads=0.000005, Writes=0.000003
--------------------------------------------------------------------------------

Training with sample size: 2000000
Memory usage: 4719.46 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 1026019.95 KB

Results for leisurewaysorted, Sample Size: 2000000
Grid Search Time: 0.00s, Training Time: 2564.25s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9962, MAE = 83378.81, MAPE = 74.37%
q-score: 1.69
Prediction time: 55.6102 μs/sample
I/O: Reads=0.000005, Writes=0.000004
--------------------------------------------------------------------------------

Training with sample size: 1000000
Memory usage: 5018.61 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 515759.27 KB

Results for leisurewaysorted, Sample Size: 1000000
Grid Search Time: 0.00s, Training Time: 1191.34s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9940, MAE = 104479.39, MAPE = 103.78%
q-score: 1.92
Prediction time: 46.7803 μs/sample
I/O: Reads=0.000005, Writes=0.000003
--------------------------------------------------------------------------------

Training with sample size: 500000
Memory usage: 4780.35 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 259197.89 KB

Results for leisurewaysorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 547.92s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9907, MAE = 128666.91, MAPE = 158.05%
q-score: 2.36
Prediction time: 37.4151 μs/sample
I/O: Reads=0.000000, Writes=0.000006
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 4789.17 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 52512.63 KB

Results for leisurewaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 78.82s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9734, MAE = 221955.07, MAPE = 552.05%
q-score: 5.96
Prediction time: 17.2396 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 4789.31 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 26408.05 KB

Results for leisurewaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 37.77s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9590, MAE = 283010.36, MAPE = 1057.27%
q-score: 10.63
Prediction time: 11.5487 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 4789.31 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5397.71 KB

Results for leisurewaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 6.82s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9050, MAE = 467540.00, MAPE = 3749.05%
q-score: 36.53
Prediction time: 7.9888 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4789.31 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2745.02 KB

Results for leisurewaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 2.95s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8789, MAE = 553029.07, MAPE = 4935.81%
q-score: 47.61
Prediction time: 6.7761 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 4789.31 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 571.70 KB

Results for leisurewaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.64s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.7694, MAE = 800738.57, MAPE = 6881.07%
q-score: 65.47
Prediction time: 4.3398 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for leisurewaysorted...


Memory usage: 4394.77 MB

Processing dataset: areawater
Universe boundaries for areawater: (-179.231086, -14.601813, 179.859681, 71.441059)
Loading data from ../large_files/resultsDistance/areawater_results.csv


Parsing object coordinates...



Basic statistics for areawater dataset:
Max count: 2292056.0
Min count: 0.0
Mean count: 483232.05
Median count: 331295.00
Total samples: 458552

Calculating object features...
Splitting data into train and test sets...
Training set size: 366841

Training with sample size: 366841
Memory usage: 4399.71 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 373.03s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 180957.66 KB

Results for areawater, Sample Size: 366841
Grid Search Time: 373.03s, Training Time: 411.30s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9971, MAE = 14697.12, MAPE = 176.96%
q-score: 2.61
Prediction time: 35.0832 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 100000
Memory usage: 4421.34 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 50042.46 KB

Results for areawater, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 97.58s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9934, MAE = 22074.49, MAPE = 462.57%
q-score: 5.10
Prediction time: 18.5167 μs/sample
I/O: Reads=0.000000, Writes=0.000011
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 4405.34 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 25201.49 KB

Results for areawater, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 44.86s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9900, MAE = 27461.78, MAPE = 1510.11%
q-score: 15.33
Prediction time: 14.4386 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 4405.34 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5175.94 KB

Results for areawater, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 7.15s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9757, MAE = 44765.93, MAPE = 6073.14%
q-score: 60.29
Prediction time: 10.6196 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4405.34 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2631.03 KB

Results for areawater, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 3.28s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9590, MAE = 56884.77, MAPE = 7589.98%
q-score: 75.29
Prediction time: 9.5816 μs/sample
I/O: Reads=0.000000, Writes=0.000002
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 4405.34 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 556.77 KB

Results for areawater, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.75s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8945, MAE = 93563.91, MAPE = 11563.07%
q-score: 114.95
Prediction time: 7.0636 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for areawater...


Memory usage: 4405.34 MB

Processing dataset: yago2
Universe boundaries for yago2: (-179.98473, -90.0, 180.0, 90.0)
Loading data from ../large_files/resultsDistance/yago2_results.csv


Parsing object coordinates...



Basic statistics for yago2 dataset:
Max count: 4470390.0
Min count: 0.0
Mean count: 279738.01
Median count: 172925.00
Total samples: 898942

Calculating object features...
Splitting data into train and test sets...


Training set size: 719153

Training with sample size: 719153
Memory usage: 4410.31 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 741.10s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 362173.94 KB

Results for yago2, Sample Size: 719153
Grid Search Time: 741.10s, Training Time: 791.45s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9852, MAE = 14691.61, MAPE = 46.93%
q-score: 1.36
Prediction time: 44.1506 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 500000
Memory usage: 5822.65 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 252500.05 KB

Results for yago2, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 556.55s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9822, MAE = 16344.46, MAPE = 59.09%
q-score: 1.45
Prediction time: 39.4966 μs/sample
I/O: Reads=0.000000, Writes=0.000004
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 4369.20 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 51220.77 KB

Results for yago2, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 86.76s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9631, MAE = 25944.65, MAPE = 133.13%
q-score: 2.02
Prediction time: 18.1174 μs/sample
I/O: Reads=0.000033, Writes=0.000003
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 4369.20 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 25797.98 KB

Results for yago2, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 41.88s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9510, MAE = 31151.40, MAPE = 140.95%
q-score: 1.98
Prediction time: 13.3800 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 4369.20 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5301.90 KB

Results for yago2, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 6.55s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9118, MAE = 45117.45, MAPE = 514.48%
q-score: 4.65
Prediction time: 9.4268 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4369.20 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2692.38 KB

Results for yago2, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 3.39s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8837, MAE = 53019.38, MAPE = 647.62%
q-score: 5.52
Prediction time: 8.2966 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 4369.20 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 563.82 KB

Results for yago2, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.64s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8246, MAE = 68501.18, MAPE = 857.62%
q-score: 6.97
Prediction time: 5.6013 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------
Saving results for yago2...


Memory usage: 4369.20 MB

Processing dataset: powerthingnodesorted
Universe boundaries for powerthingnodesorted: (-177.92741900000001, -77.8453164, 178.47197400000002, 78.2256315)
Loading data from ../large_files/resultsDistance/powerthingnodesorted_results.csv


Parsing object coordinates...



Basic statistics for powerthingnodesorted dataset:
Max count: 10461467.0
Min count: 0.0
Mean count: 665028.42
Median count: 348182.50
Total samples: 2102514

Calculating object features...


Splitting data into train and test sets...


Training set size: 1682011

Training with sample size: 1682011
Memory usage: 4375.11 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 1833.64s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 841069.00 KB

Results for powerthingnodesorted, Sample Size: 1682011
Grid Search Time: 1833.64s, Training Time: 1991.43s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9961, MAE = 21963.51, MAPE = 123.44%
q-score: 2.20
Prediction time: 53.5396 μs/sample
I/O: Reads=0.000000, Writes=0.000005
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 1000000
Memory usage: 9353.76 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 502109.92 KB

Results for powerthingnodesorted, Sample Size: 1000000
Grid Search Time: 0.00s, Training Time: 1065.51s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9946, MAE = 26005.03, MAPE = 168.52%
q-score: 2.61
Prediction time: 47.1062 μs/sample
I/O: Reads=0.000014, Writes=0.000004
--------------------------------------------------------------------------------

Training with sample size: 500000
Memory usage: 4794.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 252482.90 KB

Results for powerthingnodesorted, Sample Size: 500000
Grid Search Time: 0.00s, Training Time: 484.97s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9917, MAE = 32466.31, MAPE = 237.19%
q-score: 3.26
Prediction time: 37.6654 μs/sample
I/O: Reads=0.000000, Writes=0.000002
--------------------------------------------------------------------------------

Training with sample size: 100000
Memory usage: 4794.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 51246.13 KB

Results for powerthingnodesorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 74.84s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9772, MAE = 54979.14, MAPE = 676.83%
q-score: 7.48
Prediction time: 17.3488 μs/sample
I/O: Reads=0.000000, Writes=0.000001
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 4794.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 25818.77 KB

Results for powerthingnodesorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 34.29s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9663, MAE = 68377.10, MAPE = 931.64%
q-score: 9.95
Prediction time: 12.2664 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 4794.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5269.64 KB

Results for powerthingnodesorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 5.37s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9174, MAE = 113987.89, MAPE = 1777.18%
q-score: 18.19
Prediction time: 8.3761 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4794.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2682.69 KB

Results for powerthingnodesorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 2.72s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8842, MAE = 139691.36, MAPE = 2141.05%
q-score: 21.77
Prediction time: 7.1926 μs/sample
I/O: Reads=0.000000, Writes=0.001369
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 4794.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 557.07 KB

Results for powerthingnodesorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.58s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.7933, MAE = 196257.42, MAPE = 3966.19%
q-score: 39.46
Prediction time: 4.8373 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for powerthingnodesorted...


Memory usage: 4794.23 MB

Processing dataset: emergencythingwaysorted
Universe boundaries for emergencythingwaysorted: (-175.221337, -53.7941359, 179.3313189, 78.22019230000001)
Loading data from ../large_files/resultsDistance/emergencythingwaysorted_results.csv


Parsing object coordinates...



Basic statistics for emergencythingwaysorted dataset:
Max count: 802965.0
Min count: 0.0
Mean count: 67511.32
Median count: 16253.00
Total samples: 161514

Calculating object features...
Splitting data into train and test sets...
Training set size: 129211

Training with sample size: 129211
Memory usage: 4795.21 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 94.72s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 62629.46 KB

Results for emergencythingwaysorted, Sample Size: 129211
Grid Search Time: 94.72s, Training Time: 121.43s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9589, MAE = 7410.52, MAPE = 163.34%
q-score: 2.64
Prediction time: 27.8663 μs/sample
I/O: Reads=0.000000, Writes=0.000009
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 100000
Memory usage: 4794.23 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 48674.89 KB

Results for emergencythingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 82.35s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9528, MAE = 8077.83, MAPE = 180.95%
q-score: 2.80
Prediction time: 25.2617 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 4794.24 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 24644.08 KB

Results for emergencythingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 37.80s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9350, MAE = 9908.51, MAPE = 230.65%
q-score: 3.29
Prediction time: 20.3437 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 4794.24 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5094.60 KB

Results for emergencythingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 6.69s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8737, MAE = 15386.08, MAPE = 419.83%
q-score: 5.05
Prediction time: 14.9215 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4794.24 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2601.88 KB

Results for emergencythingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 3.40s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8442, MAE = 18164.52, MAPE = 620.07%
q-score: 6.91
Prediction time: 14.0168 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 4794.24 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 554.58 KB

Results for emergencythingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.67s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.7631, MAE = 23984.36, MAPE = 937.56%
q-score: 9.90
Prediction time: 10.9921 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Saving results for emergencythingwaysorted...


Memory usage: 4794.24 MB

Processing dataset: historicthingwaysorted
Universe boundaries for historicthingwaysorted: (-179.99526020000002, -85.0036942, 179.99597930000002, 78.06750650000001)
Loading data from ../large_files/resultsDistance/historicthingwaysorted_results.csv


Parsing object coordinates...



Basic statistics for historicthingwaysorted dataset:
Max count: 1790224.0
Min count: 0.0
Mean count: 261684.06
Median count: 134375.00
Total samples: 358439

Calculating object features...
Splitting data into train and test sets...
Training set size: 286751

Training with sample size: 286751
Memory usage: 4796.20 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 230.34s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 143842.40 KB

Results for historicthingwaysorted, Sample Size: 286751
Grid Search Time: 230.34s, Training Time: 276.51s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9902, MAE = 11654.85, MAPE = 97.98%
q-score: 1.99
Prediction time: 34.4713 μs/sample
I/O: Reads=0.000000, Writes=0.000795
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 100000
Memory usage: 4796.22 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...


Measuring prediction performance...


Saving model...


Model size: 50635.03 KB

Results for historicthingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 77.67s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9808, MAE = 16475.81, MAPE = 137.52%
q-score: 2.38
Prediction time: 20.4699 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 4746.48 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 25505.55 KB

Results for historicthingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 34.45s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9702, MAE = 20593.95, MAPE = 201.93%
q-score: 3.02
Prediction time: 15.3508 μs/sample
I/O: Reads=0.000000, Writes=0.000006
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 4746.48 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5228.05 KB

Results for historicthingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 6.20s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9395, MAE = 32707.19, MAPE = 420.52%
q-score: 5.21
Prediction time: 11.8428 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4746.48 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2663.26 KB

Results for historicthingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 3.13s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9183, MAE = 39985.01, MAPE = 780.70%
q-score: 8.78
Prediction time: 10.1094 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 4746.48 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 560.04 KB

Results for historicthingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.62s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8511, MAE = 60149.54, MAPE = 1206.84%
q-score: 12.98
Prediction time: 7.6145 μs/sample
I/O: Reads=0.000000, Writes=0.000003
--------------------------------------------------------------------------------
Saving results for historicthingwaysorted...


Memory usage: 4746.48 MB

Processing dataset: aerowaythingwaysorted
Universe boundaries for aerowaythingwaysorted: (-179.88131460000002, -79.7773063, 179.426138, 85.05258450000001)
Loading data from ../large_files/resultsDistance/aerowaythingwaysorted_results.csv


Parsing object coordinates...



Basic statistics for aerowaythingwaysorted dataset:
Max count: 1829409.0
Min count: 0.0
Mean count: 107433.16
Median count: 54304.00
Total samples: 368365

Calculating object features...
Splitting data into train and test sets...
Training set size: 294692

Training with sample size: 294692
Memory usage: 4748.44 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 1 candidates, totalling 3 fits


Grid search complete in 248.16s
Best parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 147847.79 KB

Results for aerowaythingwaysorted, Sample Size: 294692
Grid Search Time: 248.16s, Training Time: 285.95s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9866, MAE = 7239.43, MAPE = 46.06%
q-score: 1.47
Prediction time: 33.8925 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 100000
Memory usage: 4792.04 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 50609.47 KB

Results for aerowaythingwaysorted, Sample Size: 100000
Grid Search Time: 0.00s, Training Time: 73.79s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9740, MAE = 9997.23, MAPE = 67.61%
q-score: 1.68
Prediction time: 20.4784 μs/sample
I/O: Reads=0.000000, Writes=0.002757
--------------------------------------------------------------------------------

Training with sample size: 50000
Memory usage: 4792.04 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 25476.52 KB

Results for aerowaythingwaysorted, Sample Size: 50000
Grid Search Time: 0.00s, Training Time: 39.26s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9620, MAE = 12273.15, MAPE = 101.81%
q-score: 2.01
Prediction time: 15.3859 μs/sample
I/O: Reads=0.000000, Writes=0.001789
--------------------------------------------------------------------------------

Training with sample size: 10000
Memory usage: 4792.04 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...


Making predictions...
Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 5205.76 KB

Results for aerowaythingwaysorted, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 5.83s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.9156, MAE = 19366.33, MAPE = 292.81%
q-score: 3.88
Prediction time: 11.2547 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4792.04 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 2641.78 KB

Results for aerowaythingwaysorted, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 2.76s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.8790, MAE = 23340.21, MAPE = 401.43%
q-score: 4.94
Prediction time: 9.8261 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------



Training with sample size: 1000
Memory usage: 4792.04 MB
Using best parameters from max scale: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Training random forest model...
Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...
Model size: 550.57 KB

Results for aerowaythingwaysorted, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 0.63s
Random Forest Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Performance: R² = 0.7820, MAE = 34120.02, MAPE = 760.06%
q-score: 8.47
Prediction time: 7.4163 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------


Saving results for aerowaythingwaysorted...
Memory usage: 4792.04 MB

Processing dataset: arealm
Universe boundaries for arealm: (-179.147236, -14.548699, 179.77847, 71.359879)
Loading data from ../large_files/resultsDistance/arealm_results.csv


Parsing object coordinates...

Basic statistics for arealm dataset:
Max count: 128233.0
Min count: 0.0
Mean count: 23800.58
Median count: 15460.00
Total samples: 25833

Calculating object features...
Splitting data into train and test sets...
Training set size: 20666

Training with sample size: 20666
Memory usage: 4792.04 MB
Performing grid search for optimal parameters...
Fitting 3 folds for each of 36 candidates, totalling 108 fits


Grid search complete in 832.74s
Best parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 76542.31 KB

Results for arealm, Sample Size: 20666
Grid Search Time: 832.74s, Training Time: 72.60s
Random Forest Parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9807, MAE = 1763.98, MAPE = 129.49%
q-score: 2.25
Prediction time: 170.3357 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------
Generating prediction scatter plot...


Generating feature importance plot...


Generating side-by-side comparison plot...



Training with sample size: 10000
Memory usage: 4828.73 MB
Using best parameters from max scale: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 37329.63 KB

Results for arealm, Sample Size: 10000
Grid Search Time: 0.00s, Training Time: 34.75s
Random Forest Parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9656, MAE = 2255.06, MAPE = 188.95%
q-score: 2.78
Prediction time: 147.3723 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 5000
Memory usage: 4828.73 MB
Using best parameters from max scale: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 18829.85 KB

Results for arealm, Sample Size: 5000
Grid Search Time: 0.00s, Training Time: 17.90s
Random Forest Parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9562, MAE = 2719.76, MAPE = 354.32%
q-score: 4.37
Prediction time: 131.3817 μs/sample
I/O: Reads=0.000000, Writes=0.000000
--------------------------------------------------------------------------------

Training with sample size: 1000
Memory usage: 4828.73 MB
Using best parameters from max scale: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Training random forest model...


Making predictions...


Calculating performance metrics...
Measuring prediction performance...


Saving model...


Model size: 3932.16 KB

Results for arealm, Sample Size: 1000
Grid Search Time: 0.00s, Training Time: 3.07s
Random Forest Parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
Performance: R² = 0.9161, MAE = 4171.37, MAPE = 1098.04%
q-score: 11.68
Prediction time: 94.4986 μs/sample
I/O: Reads=0.000000, Writes=0.025876
--------------------------------------------------------------------------------
Saving results for arealm...
Saving combined results...
All processing completed and results saved.
Memory usage: 4828.73 MB
