In [None]:
import time
import os
from selenium.webdriver.chrome.options import Options
import sys
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from tools import *

node_file = '../data/porto_network/nodes.shp'
edge_file = '../data/porto_network/edges.shp'
G, nodes, edges, edge_id_map = load_road_network(node_file, edge_file)

In [None]:
with open('../../porto_demo.pkl', 'rb') as f:
    trajectories = pickle.load(f)

In [None]:
trajectories = trajectories[:20]

In [None]:
import os
import time
import threading
from tqdm import tqdm
import concurrent.futures
import json

# Global configuration parameters
CONFIG = {
    'max_workers': 8,  # Number of parallel threads
    'base_dir': '../data',
    'html_dir': '../data/last_trajectory_htmls',
    'images_dir': '../data/last_trajectory_images',
    'trajectory_limit': 20  # Trajectory point limit
}

def ensure_dirs_exist():
    """Ensure all necessary directories exist"""
    for dir_path in [CONFIG['html_dir'], CONFIG['images_dir']]:
        if not os.path.exists(dir_path):
            print(f"Creating directory: {dir_path}")
            os.makedirs(dir_path)
        else:
            print(f"Directory already exists: {dir_path}")

def preprocess_trajectory(trajectory):
    """Preprocess trajectory data"""
    # Convert coordinate format
    o_geo = trajectory['o_geo']
    o_geo = [[lon, lat] for lat, lon in o_geo]
    trajectory['o_geo'] = o_geo

    # Truncate to the most recent trajectory points
    trajectory['o_geo'] = o_geo[-CONFIG['trajectory_limit']:]
    return trajectory

def process_trajectory(trajectory):
    """Process single trajectory, only generate HTML file"""
    trajectory_id = trajectory['devid']
    html_path = f"{CONFIG['html_dir']}/trajectory_{trajectory_id}.html"
    png_path = f"{CONFIG['images_dir']}/trajectory_{trajectory_id}.png"

    # Check if HTML file already exists
    if os.path.exists(html_path):
        return {
            'status': 'html_exists',
            'id': trajectory_id,
            'html_path': html_path,
            'png_path': png_path,
            'message': f"HTML file for trajectory ID {trajectory_id} already exists"
        }

    try:
        # Preprocess trajectory data
        trajectory = preprocess_trajectory(trajectory)

        # Generate visualization map
        m = visualize_trajectory_and_connections(trajectory, G, nodes, edges, edge_id_map, trajectory_id)

        # Save HTML file
        m.save(html_path)

        return {
            'status': 'html_created',
            'id': trajectory_id,
            'html_path': html_path,
            'png_path': png_path,
            'message': f"Created HTML file for trajectory ID {trajectory_id}"
        }

    except Exception as e:
        return {
            'status': 'error',
            'id': trajectory_id,
            'message': f"Error occurred while processing trajectory ID {trajectory_id}: {e}",
            'error': str(e)
        }

def process_all_trajectories(trajectories_to_process):
    """Process all trajectories serially, only generate HTML files"""
    # Ensure directories exist
    ensure_dirs_exist()

    results = []
    total_start_time = time.time()

    # Cumulative values for recording stage times
    cumulative_times = {
        'preprocess_time': 0,
        'map_generation_time': 0,
        'save_time': 0,
        'total_time': 0
    }

    # Create progress bar
    progress_bar = tqdm(trajectories_to_process, desc="Generating HTML files")

    for trajectory in progress_bar:
        # Process single trajectory
        result = process_trajectory(trajectory)
        results.append(result)

        # Update cumulative times
        if result['status'] == 'html_created' and 'performance' in result:
            for key in cumulative_times.keys():
                if key in result['performance']:
                    cumulative_times[key] += result['performance'][key]

        # Calculate average processing speed
        elapsed = time.time() - total_start_time
        processed = len(results)
        speed = processed / elapsed if elapsed > 0 else 0

        # Update progress bar description
        status_counts = {
            'created': sum(1 for r in results if r['status'] == 'html_created'),
            'exists': sum(1 for r in results if r['status'] == 'html_exists'),
            'error': sum(1 for r in results if r['status'] == 'error')
        }

        progress_bar.set_postfix({
            'Created': status_counts['created'],
            'Exists': status_counts['exists'],
            'Error': status_counts['error'],
            'Speed': f"{speed:.2f}traj/sec"
        })

    # Display performance summary after processing
    if cumulative_times['total_time'] > 0:
        processed_count = sum(1 for r in results if r['status'] == 'html_created')
        if processed_count > 0:
            print("\nPerformance Analysis:")
            print(f"Total processed trajectories: {processed_count}")
            print(f"Total time: {cumulative_times['total_time']:.2f}seconds")
            print(f"Average total time per trajectory: {cumulative_times['total_time']/processed_count:.2f}seconds")
            print(f"Total preprocessing time: {cumulative_times['preprocess_time']:.2f}seconds "
                  f"({cumulative_times['preprocess_time']/cumulative_times['total_time']*100:.1f}%)")
            print(f"Total map generation time: {cumulative_times['map_generation_time']:.2f}seconds "
                  f"({cumulative_times['map_generation_time']/cumulative_times['total_time']*100:.1f}%)")
            print(f"Total file save time: {cumulative_times['save_time']:.2f}seconds "
                  f"({cumulative_times['save_time']/cumulative_times['total_time']*100:.1f}%)")

    return results

def generate_snapshot_info(results):
    """Generate list of HTML file information that needs screenshots"""
    snapshot_list = []
    for result in results:
        if result['status'] in ['html_exists', 'html_created']:
            snapshot_list.append({
                'id': result['id'],
                'html_path': result['html_path'],
                'png_path': result['png_path']
            })

    return snapshot_list

def print_statistics(results):
    """Print processing result statistics"""
    html_created_count = sum(1 for r in results if r['status'] == 'html_created')
    html_exists_count = sum(1 for r in results if r['status'] == 'html_exists')
    error_count = sum(1 for r in results if r['status'] == 'error')

    print(f"\n===== Processing Result Statistics =====")
    print(f"Total processed: {len(results)} trajectories")
    print(f"HTML newly created: {html_created_count}")
    print(f"HTML already exists: {html_exists_count}")
    print(f"Processing errors: {error_count}")
    print("=======================================")

    # Show first few processing results
    print("\nFirst 10 processing results:")
    for result in results[:10]:
        print(result['message'])

    # If there are errors, show some error details
    if error_count > 0:
        print("\nError details examples:")
        error_results = [r for r in results if r['status'] == 'error']
        for result in error_results[:5]:  # Only show first 5 errors
            print(f"Trajectory ID {result['id']}: {result.get('error', 'Unknown error')}")

# Main execution logic
def main():
    # Select trajectories to process
    # trajectory_id_list = trajectory_ids
    trajectories_to_process = trajectories
    print(f"Total of {len(trajectories_to_process)} trajectories need processing")

    # Only generate HTML files
    results = process_all_trajectories(trajectories_to_process)

    # Display statistics
    print_statistics(results)

    return results

# Execute main function
if __name__ == "__main__":
    results = main()