# Mode Prediction Experiment

This experiment simulate the mode choice of 1000 agents in Cambridge and compare them with real data

###  Run experiments

In [None]:
import json
import os
import random
from tqdm import tqdm
import pandas as pd
import concurrent.futures
from functools import partial

from mobility_agent.agent import MobilityAgent
from baseline.eval import cal_group_kl_divergence

def process_row(row, num_samples, profile_columns,city,reference_city,reference_file,init_memory):
    try:
        idx = row.name
        profile = row[profile_columns].to_dict()
        desire = row['trip_purpose']
        time = row['start_time']
        
        agent = MobilityAgent(profile=json.dumps(profile), 
                              sample_num=num_samples,city=city,
                              reference_city=reference_city,
                              reference_file=reference_file)
        agent.working_memory = [init_memory]
        
        if num_samples == 0:
            mode_preference = agent.get_mode_prefernce(desire=desire, time=time, use_reference=False)
        else:
            mode_preference = agent.get_mode_prefernce(desire=desire, time=time, use_reference=True)
        
        choice_weights = mode_preference['choice_weights']
        modes = [choice['primary_mode'] for choice in choice_weights]
        weights = [choice['weight'] for choice in choice_weights]
        selected_mode_idx = random.choices(range(len(modes)), weights=weights, k=1)[0]
        selected_mode = choice_weights[selected_mode_idx]
        
        return {
            'idx': idx,
            'predicted_mode': selected_mode['primary_mode'],
            'predicted_duration': selected_mode['duration_minutes'],
            'selection_reason': json.dumps(mode_preference['think']),
            'choice_weights': json.dumps(choice_weights)
        }
    except Exception as e:
        print(f"Error processing row {row.name}: {e}")
        return None

def run_experiments(eval_df,max_workers,num_samples,save_path,city='Cambridge,MA',reference_city='Cambridge,MA',reference_file='data/reference/replica-cambridge_trips.csv',init_memory='Today is a normal weekday'):
    # Set up parameters
    profile_columns = ['age_group', 'income_group', 'employment_status', 'household_size', 'available_vehicles', 'education']
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    print(f"=======Mobility Agent (num_samples={num_samples})=======")

    # Create a partial function with fixed parameters
    process_row_partial = partial(process_row, num_samples=num_samples, profile_columns=profile_columns,city=city,reference_city=reference_city,reference_file=reference_file,init_memory=init_memory)

    # Process rows in parallel
    results = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all jobs
        future_to_idx = {executor.submit(process_row_partial, row): idx 
                        for idx, row in eval_df.iterrows()}
        
        # Process results as they complete with a progress bar
        for future in tqdm(concurrent.futures.as_completed(future_to_idx), total=len(eval_df)):
            result = future.result()
            if result:
                results.append(result)
                
            # Save intermediate results every 100 completed rows
            if len(results) % 100 == 0:
                # Update the dataframe with results so far
                temp_df = eval_df.copy()
                for res in results:
                    idx = res['idx']
                    temp_df.loc[idx, "predicted_mode"] = res['predicted_mode']
                    temp_df.loc[idx, "predicted_duration"] = res['predicted_duration']
                    temp_df.loc[idx, "selection_reason"] = res['selection_reason']
                    temp_df.loc[idx, "choice_weights"] = res['choice_weights']
                temp_df.to_csv(save_path)

    # Update final dataframe with all results
    for res in results:
        idx = res['idx']
        eval_df.loc[idx, "predicted_mode"] = res['predicted_mode']
        eval_df.loc[idx, "predicted_duration"] = res['predicted_duration']
        eval_df.loc[idx, "selection_reason"] = res['selection_reason']
        eval_df.loc[idx, "choice_weights"] = res['choice_weights']

    # Save final results
    eval_df.to_csv(save_path)

    print(f"=======Evaluating model=======")
    result_df = eval_df.copy()
    kl_df, overall_kl, overall_mae = cal_group_kl_divergence(result_df=result_df)
    print(f"Overall average KL divergence: {overall_kl:.4f}")
    print(f"Overall mean absolute error: {overall_mae:.4f}")

### Get Results from local LLM

In [None]:
# Change BASEURL and APIKEY in .env to use ollama
from baseline.data import load_data
import os
import shutil

# clear cache
cache_graph_folder = "cache/graph"
if os.path.exists(cache_graph_folder):
    shutil.rmtree(cache_graph_folder)

eval_file = 'data/eval/replica-cambridge_trips_eval.csv'
eval_df = load_data(eval_file)
eval_df.head(2)

In [None]:

l1 = [i*10 for i in range(11)]
l2 = [i*100 for i in range(2,11)]
all_samples = l1 + l2
max_workers = min(os.cpu_count(), 8) 

for num_samples in all_samples:
    save_path = f"results/cambridge/mobility_agent_{num_samples}.csv"
    run_experiments(eval_df,max_workers=max_workers,num_samples=num_samples,save_path=save_path)

### Cambridge refrence predict SF

In [None]:
from baseline.data import load_data
import os
import shutil

# clear cache
cache_graph_folder = "cache/graph"
if os.path.exists(cache_graph_folder):
    shutil.rmtree(cache_graph_folder)

# Change eval data to SF
eval_file = 'data/eval/replica-sf_trips_eval.csv'
eval_df = load_data(eval_file)
eval_df.head(2)

In [None]:
max_workers = 4
num_samples = 0
city="San Francisco,CA"
reference_city="Cambridge,MA"
reference_file = 'data/reference/replica-cambridge_trips.csv'
save_path = f"results/sanfransico/mobility_agent_sf_{num_samples}.csv"
# change city to SF
run_experiments(eval_df,max_workers=max_workers,
                num_samples=num_samples,     
                save_path=save_path,
                city=city,
                reference_city=reference_city,
                reference_file=reference_file,
                )

In [None]:
max_workers = 4
num_samples = 50
city="San Francisco,CA"
reference_city="Cambridge,MA"
reference_file = 'data/reference/replica-cambridge_trips.csv'
save_path = f"results/sanfransico/mobility_agent_sf_{num_samples}.csv"
# change city to SF
run_experiments(eval_df,max_workers=max_workers,
                num_samples=num_samples,     
                save_path=save_path,
                city=city,
                reference_city=reference_city,
                reference_file=reference_file,
                )

### SF refrence predict SF

In [None]:
from baseline.data import load_data
import os
import shutil


# clear cache
cache_graph_folder = "cache/graph"
if os.path.exists(cache_graph_folder):
    shutil.rmtree(cache_graph_folder)

eval_file = 'data/eval/replica-sf_trips_eval.csv'
eval_df = load_data(eval_file)
eval_df.head(2)



In [None]:
max_workers = 4
num_samples = 50
city="San Francisco,CA"
reference_city="San Francisco,CA"
reference_file = 'data/reference/replica-sf_trips.csv'
save_path = f"results/sanfransico/mobility_agent_sf_refsf_{num_samples}.csv"
# change city to SF
run_experiments(eval_df,max_workers=max_workers,
                num_samples=num_samples,     
                save_path=save_path,
                city=city,
                reference_city=reference_city,
                reference_file=reference_file,
                )

### SF refrence predict Cambridge

In [None]:
from baseline.data import load_data
import os
import shutil


# clear cache
cache_graph_folder = "cache/graph"
if os.path.exists(cache_graph_folder):
    shutil.rmtree(cache_graph_folder)

eval_file = 'data/eval/replica-cambridge_trips_eval.csv'
eval_df = load_data(eval_file)
eval_df.head(2)



In [None]:
max_workers = 4
num_samples = 50
city="Cambridge,MA"
reference_city="San Francisco,CA"
reference_file = 'data/reference/replica-sf_trips.csv'
save_path = f"results/cambridge/mobility_agent_refsf_{num_samples}.csv"
# change city to SF
run_experiments(eval_df,max_workers=max_workers,
                num_samples=num_samples,     
                save_path=save_path,
                city=city,
                reference_city=reference_city,
                reference_file=reference_file,
                )