# Route Data Playground

# Utils

We are sampling the zone randomly for each route.

In [16]:
import json

def extract_zone_id_prefix(json_data):
    for route_data in json_data.values():
        for item in route_data["stops"].values():            
            zone_id = item["zone_id"]
            if zone_id:
                item["zone_id"] = zone_id.split('.')[0]                        
    return json_data

def encode_stop_type(json_data):
    # Extract unique "type" values and map them to integers, treating "null" as 0
    type_mapping = {"Dropoff": 1, "Station": 2}
    # Use nested maps to update "type" values
    for route_data in json_data.values():
        for item in route_data["stops"].values():
            item["type"] = type_mapping.get(item.get("type"), 0)
    return json_data

# Function to reshape the JSON data
def reshape_json(data):
    reshaped_data = []
    for route_id, route_data in data.items():
        row = {
            "RouteID": route_id,
            "station_code": route_data["station_code"],
            "date_YYYY_MM_DD": route_data["date_YYYY_MM_DD"],
            "departure_time_utc": route_data["departure_time_utc"],
            "executor_capacity_cm3": route_data["executor_capacity_cm3"]
        }
        stops = route_data.get("stops", {})
        for stop_code, stop_info in stops.items():
            row[stop_code] = stop_info.get("type", "")
            row["Zone"] = stop_info.get("zone_id", "")
        reshaped_data.append(row)
    return reshaped_data


In [17]:
ROUTE_DATA_RAW_DATASET_PATH = "../datasets/raw/eval_route_data_formatted.json"
ROUTE_DATA_PROCESSED_DATASET_PATH = "../datasets/processed/eval_route_data_formatted.json"

with open(ROUTE_DATA_RAW_DATASET_PATH,"r") as eval_route:
    data_eval_route = json.load(eval_route)

In [18]:
eval_route_processed = extract_zone_id_prefix(data_eval_route)
eval_route_processed = encode_stop_type(eval_route_processed)
eval_route_processed = reshape_json(eval_route_processed)

In [19]:
with open(ROUTE_DATA_PROCESSED_DATASET_PATH, 'w') as file:
    json.dump(eval_route_processed, file, indent=4)