# Mode Prediction Experiment

This experiment simulate the mode choice of 1000 agents in Cambridge and compare them with real data

### Get Evaluation Data

In [1]:
from baseline.data import load_data
eval_file = 'data/eval/replica-cambridge_trips_eval.csv'
eval_df = load_data(eval_file)
eval_df.head(2)

Unnamed: 0,person_id,age,gender,employment_status,household_size,household_income,available_vehicles,industry,education,trip_purpose,start_time,primary_mode,duration_minutes,age_group,income_group
0,14941376504966255761,16,male,not_in_labor_force,4,140343,two,not_working,k_12,eat,14,walking,10-20,Under 18,$100k-$150k
1,741506727884677094,60,male,employed,1,37850,one,naics445110,bachelors_degree,eat,14,walking,50-60,55-64,$10k-$50k


###  Get Results

In [None]:
import json
import os
import random
from tqdm import tqdm

from mobility_agent.agent import MobilityAgent
from baseline.eval import cal_group_kl_divergence,cal_topk_acc

profile_columns = ['age_group','income_group','employment_status','household_size','available_vehicles','education']

group_features = ['age_group','income_group', 'employment_status', 'household_size','available_vehicles', 'education', 'trip_purpose','start_time']

eval_results = {}
for i in range(0,11):
    num_samples = i*100
    print(f"=======Mobility Agent (num_samples={num_samples})=======")
    save_path = f"models/mobility_agent/mobility_agent_{num_samples}.csv"
    os.makedirs(os.path.dirname(save_path),exist_ok=True)
    for idx, row in tqdm(eval_df.iterrows(),total=len(eval_df)):
        try:
            profile = row[profile_columns].to_dict()
            desire  = row['trip_purpose']
            time = row['start_time']
            agent = MobilityAgent(profile=json.dumps(profile),sample_num=num_samples)
            agent.working_memory = ["Today is a normal weekday"]
            if num_samples == 0:
                mode_prefernce= agent.get_mode_prefernce_without_reference(desire=desire,time=time)
            else:
                mode_prefernce= agent.get_mode_prefernce(desire=desire,time=time)
            choice_weights = mode_prefernce['choice_weights']
            modes = [choice['primary_mode'] for choice in choice_weights]
            weights = [choice['weight'] for choice in choice_weights]
            selected_mode_idx = random.choices(range(len(modes)), weights=weights, k=1)[0]
            selected_mode = choice_weights[selected_mode_idx]
            eval_df.loc[idx,"predicted_mode"] = selected_mode['primary_mode']
            eval_df.loc[idx,"predicted_duration"] = selected_mode['duration_minutes']
            eval_df.loc[idx,"selection_reason"] = json.dumps(mode_prefernce['think'])
            eval_df.loc[idx,"choice_weights"] = json.dumps(choice_weights)
            if idx % 100 ==0:
                eval_df.to_csv(save_path)
        except Exception as e:
            print(e)
    eval_df.to_csv(save_path)
    print(f"=======Evaluating model=======")
    k = 3
    kl_df, overall_kl,overall_mape = cal_group_kl_divergence(result_df=eval_df,group_features=group_features)
    topk_accuracies = cal_topk_acc(result_df=eval_df,k=k)
    print(f"Top {k} accuracy: { topk_accuracies['average']:.4f}")
    print(f"Overall average KL divergence: {overall_kl:.4f}")
    print(f"Overall mean absolute percentage error: {overall_mape:.4f}")

 13%|█▎        | 132/1000 [29:18<3:12:44, 13.32s/it]


KeyboardInterrupt: 

### Visualization

### Comparision of Duration Choice Distribution

### Mode Prediction 2: Chaning Conditions