In [11]:
import os
import time
from langchain_huggingface import ChatHuggingFace
from langchain_core.messages import HumanMessage
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline
from huggingface_hub import InferenceClient
from langchain_huggingface import HuggingFaceEndpoint
os.environ["HF_TOKEN"] = "add token id"


In [2]:
# Mock intent and entities (from preprocessing layer)
intent = "route_delay_insight"

entities = {
    "departure_airport": "ORD",
    "arrival_airport": "LAX"
}

# Mock baseline Cypher results
baseline_results = [
    {
        "flight": "AA235",
        "route": "ORD → LAX",
        "avg_delay_minutes": 27,
        "passenger_rating": 3.2
    },
    {
        "flight": "UA876",
        "route": "ORD → LAX",
        "avg_delay_minutes": 18,
        "passenger_rating": 3.8
    }
]

# Mock embedding-based retrieval results
embedding_results = [
    {
        "flight": "AA235",
        "similarity": 0.91
    },
    {
        "flight": "DL442",
        "route": "JFK → LAX",
        "similarity": 0.87
    }
]


In [None]:
def merge_retrieval_results(baseline_results, embedding_results):
    merged = {}

    # Handle baseline (Cypher / KG) results
    for item in baseline_results:
        flight_id = (
            item.get("flight")
            or item.get("flight_id")
            or item.get("id")
        )

        if flight_id is None:
            continue

        merged[flight_id] = {
            "flight": flight_id,
            "route": item.get("route"),
            "avg_delay": item.get("avg_delay_minutes") or item.get("avg_delay"),
            "rating": item.get("passenger_rating") or item.get("rating"),
            "similarity": None,
            "source": ["cypher"]
        }

    # Handle embedding-based results
    for item in embedding_results:
        flight_id = (
            item.get("flight")
            or item.get("flight_id")
            or item.get("id")
        )

        if flight_id is None:
            continue

        if flight_id in merged:
            merged[flight_id]["similarity"] = (
                item.get("similarity") or item.get("score")
            )
            merged[flight_id]["source"].append("embedding")
        else:
            merged[flight_id] = {
                "flight": flight_id,
                "route": None,
                "avg_delay": None,
                "rating": None,
                "similarity": item.get("similarity") or item.get("score"),
                "source": ["embedding"]
            }

    return list(merged.values())


In [3]:
def build_context(baseline, embeddings):
    context = "Retrieved airline operational insights:\n\n"

    for f in baseline:
        context += (
            f"Flight {f['flight']} on route {f['route']} "
            f"has an average delay of {f['avg_delay_minutes']} minutes "
            f"and passenger rating {f['passenger_rating']}.\n"
        )

    context += "\nSimilar flights or routes based on embeddings:\n"
    for e in embeddings:
        context += (
            f"Flight {e['flight']} has similarity score {e['similarity']}.\n"
        )

    return context


kg_context = build_context(baseline_results, embedding_results)
print(kg_context)


Retrieved airline operational insights:

Flight AA235 on route ORD → LAX has an average delay of 27 minutes and passenger rating 3.2.
Flight UA876 on route ORD → LAX has an average delay of 18 minutes and passenger rating 3.8.

Similar flights or routes based on embeddings:
Flight AA235 has similarity score 0.91.
Flight DL442 has similarity score 0.87.



In [4]:
def build_prompt(context, question):
    return f"""
Persona:
You are an airline operations insight assistant.
You help airline companies analyze flight delays,
routes performance, and passenger satisfaction.

Context:
The following information was retrieved from the airline knowledge graph.
Use ONLY this information. Do NOT use external knowledge.

{context}

Task:
Provide clear operational insights and recommendations
based only on the context above.
If the information is insufficient, say so clearly.

User Question:
{question}
"""
user_question = "Which route has the highest delays and what should the airline improve?"
prompt = build_prompt(kg_context, user_question)
print(prompt)




Persona:
You are an airline operations insight assistant.
You help airline companies analyze flight delays,
routes performance, and passenger satisfaction.

Context:
The following information was retrieved from the airline knowledge graph.
Use ONLY this information. Do NOT use external knowledge.

Retrieved airline operational insights:

Flight AA235 on route ORD → LAX has an average delay of 27 minutes and passenger rating 3.2.
Flight UA876 on route ORD → LAX has an average delay of 18 minutes and passenger rating 3.8.

Similar flights or routes based on embeddings:
Flight AA235 has similarity score 0.91.
Flight DL442 has similarity score 0.87.


Task:
Provide clear operational insights and recommendations
based only on the context above.
If the information is insufficient, say so clearly.

User Question:
Which route has the highest delays and what should the airline improve?



In [5]:
def load_chat_model(repo_id):
    pipe = pipeline(
        "text-generation",
        model=repo_id,
        max_new_tokens=300,
        temperature=0.2
    )

    llm = HuggingFacePipeline(pipeline=pipe)
    return llm


In [16]:
MODELS = {
    "Mistral": "mistralai/Mistral-7B-Instruct-v0.2",
    "Zephyr": "HuggingFaceH4/zephyr-7b-beta",
    "Llama2": "meta-llama/Llama-2-7b-chat-hf"
}


In [17]:
def run_all_models(prompt):
    results = {}

    for name, repo in MODELS.items():
        print(f"Running {name}...")
        llm = load_chat_model(repo)

        start = time.time()
        response = llm.invoke(prompt)
        elapsed = time.time() - start

        results[name] = {
            "response": response,
            "time_seconds": round(elapsed, 2)
        }

    return results

In [None]:
outputs = run_all_models(prompt)
outputs


Running Mistral...


Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=pipe)
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
