In [7]:
import os
from openai import OpenAI
from dotenv import load_dotenv
import pandas as pd
import json

In [8]:
# Load .env for API keys, etc.
load_dotenv()
client = OpenAI()

In [147]:
def get_enriched_prompt(predicted_info, max_distance, max_emission, max_emission_grams, predicted_mode):
    template = f"""
You are a sustainability logistics planner. Based on the predicted route information below, generate a full multi-segment route that:

- Stays within a total distance of {max_distance} km.
- Stays under a total CO₂ emission of {max_emission} kg (i.e., {max_emission_grams} grams).
- Primarily uses the predicted mode: {predicted_mode}, but you may use other modes to optimize for emissions or realism.
- If predicted mode is 'Medium Truck', choose sustainable alternatives: Cargo Train, Container Ship, Electric Van.
- Choose realistic transport paths (city-to-city freight rail, ports, road access).
- Prefer sustainable alternatives: Cargo Train, Container Ship, Electric Van.
- Limit to maximum 3 hopes

Emission rates:
- Air Freight: 600 g/km
- Container Ship: 10 g/km
- Cargo Train: 50 g/km
- Heavy Truck: 500 g/km
- Medium Truck: 300 g/km
- Small Van: 180 g/km
- Electric Van: 40 g/km

Each segment must include: `from`, `to`, `country`, `mode`, `vehicle`, `distance_km`, `emission_g`

Output JSON:
{{
  "route": [{{...}}],
  "total_distance_km": int,
  "total_emission_g": int
}}

### Predicted Info:
{predicted_info}
"""
    return template

In [148]:
from pydantic import BaseModel

class Route(BaseModel):
    start_from: str
    to: str
    country: str
    mode: str
    vehicle: str
    distance_km: int
    emission_g: int

class Routes(BaseModel):
    routes: list[Route]


In [149]:
def generate_route(predicted_info, model="gpt-4o", temperature=0.7, n=3):
    routes = []
    # predicted_info=json.dumps(predicted_info, indent=2)
    max_distance=predicted_info["distance"]
    max_emission=predicted_info["emission"]
    max_emission_grams=int(float(predicted_info["emission"]) * 1000)
    predicted_mode=predicted_info["transportation_mode"]
    enriched_prompt = get_enriched_prompt(predicted_info, max_distance, max_emission, max_emission_grams, predicted_mode)
    for _ in range(n):
        response = client.chat.completions.parse(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful sustainability logistics planner."},
                {"role": "user", "content": enriched_prompt}
            ],
            response_format=Routes,
            temperature=temperature,
        )
        try:
            #route_data = json.loads(response)
            result = response.choices[0].message.content
            routes.append(result)
        except Exception as e:
            print(f"⚠️ Failed to parse JSON: {e}")
    return routes

In [1]:

def parse_routes(raw_outputs):
    parsed_routes = []
    for raw in raw_outputs:
        data = json.loads(raw)
        segments = data["routes"]
        total_distance = sum(seg["distance_km"] for seg in segments)
        total_emission = sum(seg["emission_g"] for seg in segments)
        parsed_routes.append({
            "route": segments,
            "total_distance_km": total_distance,
            "total_emission_g": total_emission
        })
    return parsed_routes

In [168]:
def validate_route(route, max_distance_km, max_emission_kg):
    total_distance = route["total_distance_km"]
    total_emission_kg = route["total_emission_g"] / 1000  # convert from g to kg

    return {
        "total_distance": total_distance,
        "total_emission": total_emission_kg,
        "distance_valid": total_distance <= max_distance_km,
        "emission_valid": total_emission_kg <= max_emission_kg,
        "status": "valid" if total_distance <= max_distance_km and total_emission_kg <= max_emission_kg else "invalid"
    }


In [169]:
def export_routes_summary_to_excel(routes, predicted_info):
    predicted_max_distance_km = int(predicted_info["distance"])
    predicted_max_emission_kg = float(predicted_info["emission"])

    summary_rows = []

    for i, route in enumerate(routes, 1):

        # Extract source and destination from first and last segments
        source_city = route["route"][0]["start_from"]
        destination_city = route["route"][-1]["to"]
        validation = validate_route(route, predicted_max_distance_km, predicted_max_emission_kg)

        # Optional: compact string version of segments
        segment_strings = []
        for seg in route["route"]:
            segment_strings.append(
                f"{seg['start_from']}→{seg['to']} ({seg['mode']}, {seg['distance_km']} km, {round(seg['emission_g']/1000,2)} kg)"
            )
        route_summary = " | ".join(segment_strings)

        summary_rows.append({
            "Route #": i,
            "Source": source_city,
            "Destination": destination_city,
            "Predicted Distance (km)": route["total_distance_km"],
            "Predicted Emission (kg CO₂)": round(route["total_emission_g"] / 1000, 2),
            "Real Distance (km)": validation["total_distance"],
            "Real Emission (kg CO₂)": validation["total_emission"],
            "Distance Valid": validation["distance_valid"],
            "Emission Valid": validation["emission_valid"],
            "Overall Status": validation["status"],
            "Route Summary": route_summary
        })

    df = pd.DataFrame(summary_rows)
    return df


In [2]:
import ast


def generate_full_routes(row):

    predicted_info = {}
    predicted_info["source"] = row['source']
    predicted_info["source_country"] = row['source_country']
    predicted_info["destination"] = row['destination']
    predicted_info["destination_country"] = row['destination_country']
    vae_output = row['vae_output']

    d = ast.literal_eval(vae_output)
    predicted_info["distance"] = d['distance']
    predicted_info["emission"] = d['emission']
    predicted_info["transportation_mode"] = d['mode']


    generate_route(predicted_info, model="gpt-4o", temperature=0.7, n=3)
    candidate_routes = generate_route(predicted_info, n=3)
    parsed_routes = parse_routes(candidate_routes)
    # Rank by emission
    top_routes = sorted(parsed_routes, key=lambda r: r["total_emission_g"])[:3]
    generated_routes_df = export_routes_summary_to_excel(
        routes=top_routes,
        predicted_info=predicted_info
    )
    return generated_routes_df

In [None]:
df_list = []
test_df = pd.read_excel("results/vae_outputs/test_result_VAE_output.xlsx")
sub_df = test_df.apply(lambda x: generate_full_routes(x), axis=1)
df_list.append(sub_df)
df = pd.concat(df_list, ignore_index=True)
df.to_excel("results/generated_routes/test_set_generated_routes.xlsx", index=False)