In [1]:
# Always put this in the first cell of your notebook to activate text wrapping in the output cells
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [2]:
!pip install groq langchain langchain_groq -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/127.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.5/127.5 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
# Import the packages
import os
from google.colab import drive, userdata
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
#from langchain.chains.router import MultiPromptChain
#from langchain.chains.router.llm_router import LLMRouterChain,RouterOutputParser

In [4]:
# Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive/')

Mounting Google Drive...
Mounted at /content/drive/


In [5]:
# Access the Groq API key
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')

# Initialize the language model
llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0.8
)

In [6]:
# Get a driver utterance
def get_utterance():
    driver_utterance = input("\nTell me something: ")
    #audio_file = f"/content/drive/MyDrive/Colab_Notebooks/{filename}"
    return driver_utterance

In [7]:
audio_media_domain_template = """You are a multilingual linguist for an in-car voice assistant.
You understand synonyms and paraphrases across different languages.
You specialize in audio media commands.

## Task:
Analyze the following driver utterance:
"{user_utterance}"

Follow these steps:
1. If the utterance contains multiple requests, split them into atomic statements (each with a single instruction).
2. For each atomic statement:
   - Identify the most appropriate **intent** from the list below.
   - Extract up to **three** relevant **slot–value pairs**.
3. Treat polite requests, questions, and indirect expressions as equivalent to direct commands.
4. Focus on the meaning of the utterance — ignore tone, politeness, or sentence structure.
5. Resolve general modifiers like “all” or “two songs ahead” into appropriate slot–value pairs (e.g. "scope": "all", "quantity": 2).
6. If a slot is implied but not directly named (e.g. “Turn it on”), infer based on common usage (e.g. assume “radio” if ambiguous) or leave it empty.
7. For each atomic request, output only the JSON in the specified format. Include a short rationale and a confidence score as fields in the JSON.
8. Do not add any title of explanation. Return only the JSON output.

## Available Intents:
* lower_volume: Decrease the audio playback volume.
* pause_track: Temporarily stop the current audio track.
* play_track: Start playing a specific audio track.
* raise_volume: Increase the audio playback volume.
* resume_track: Continue playing the paused audio track.
* skip_track: Move forward to the next track in the playlist.
* stop_track: Completely stop audio playback.

## Available Slots and Their Descriptions:
* direction: The skip direction if applicable (e.g. "forward", "backward").
* media_source: The origin of the media (e.g. "my playlist", "jazz playlist", a named radio station).
* media_type: The category of media (e.g. music, podcast, radio station).
* music_genre: The genre of music (e.g. classical, jazz, pop).
* quantity: A number indicating how many tracks to skip or repeat.
* scope: Broad modifiers like "all" (e.g. "play all songs").
* target_device: The system or hardware to control (e.g. radio, volume).
* target_media: The specific media being controlled (e.g. podcast, album, song).

## Guidelines
* Interpret "skip forward N songs" as moving ahead past N songs. For example, from song A, "skip forward 2 songs" means play song D.
* Interpret "play the N-th next song" (e.g., “2 songs ahead”) as the destination song. From song A, "2 songs ahead" refers to song C.
* In Japanese:
- 「Xつ先の曲」 refers to the destination song (e.g., song C).
- 「曲をXつスキップして」 means skip X songs and play the one after (e.g., song D).
- 「Xつ前の曲」 refers to the N-th previous song.
* Avoid using unnatural expressions like 「曲をXつ後ろにスキップして」. Prefer 「Xつ前の曲に戻って」 or 「X曲前に戻って」.
* Always distinguish between the number of songs to skip and the destination index when extracting slot values.

## Output Format (JSON):
For each atomic request, return:
{{"intent": ..., "slots": {{...}}, "confidence": <float from 0 to 1>, "rationale": "<brief explanation of how you identified the intent and slot(s)>"}}

## Examples:
* "Lower the radio volume"
  → {{"intent": "lower_volume", "slots": {{"target_device": "radio"}}, "confidence": 0.96, "rationale": "Clear command to decrease volume with specified device."}}

* "Pause the podcast"
  → {{"intent": "pause_track", "slots": {{"target_media": "podcast"}}, "confidence": 0.95, "rationale": "Direct pause command with clear media type."}}

* "Play some classical music from my playlist"
  → {{"intent": "play_track", "slots": {{"media_source": "my playlist", "media_type": "music", "music_genre": "classical"}}, "confidence": 0.94, "rationale": "Explicit intent to play music with genre and source specified."}}

* "Turn up the radio"
  → {{"intent": "raise_volume", "slots": {{"target_device": "radio"}}, "confidence": 0.93, "rationale": "Common phrasing for increasing radio volume."}}

* "Resume the podcast"
  → {{"intent": "resume_track", "slots": {{"target_media": "podcast"}}, "confidence": 0.92, "rationale": "Standard resume action with clear media context."}}

* "Skip forward two songs"
  → {{"intent": "skip_track", "slots": {{"direction": "forward", "target_media": "song", "quantity": 2}}, "confidence": 0.91, "rationale": "Includes direction and quantity, explicitly referencing songs."}}

* "Stop playing the song"
  → {{"intent": "stop_track", "slots": {{"target_media": "song"}}, "confidence": 0.93, "rationale": "Clear stop command targeting specific media type."}}

* "Could you turn it down a bit?"
  → {{"intent": "lower_volume", "slots": {{"target_device": "audio system"}}, "confidence": 0.68, "rationale": "Polite indirect phrasing; assumes context refers to audio volume."}}

* "Let's hear something upbeat"
  → {{"intent": "play_track", "slots": {{"music_mood": "upbeat"}}, "confidence": 0.61, "rationale": "Vague suggestion to play music; genre and source not specified, mood inferred."}}

* "Change the track"
  → {{"intent": "skip_track", "slots": {{"direction": "forward"}}, "confidence": 0.75, "rationale": "Unclear whether skipping or selecting specific track; 'change' implies forward skip."}}

* "Make it quieter"
  → {{"intent": "lower_volume", "slots": {{"target_device": "audio system"}}, "confidence": 0.70, "rationale": "Ambiguous but likely volume-related; device assumed from context."}}

Now extract the intents and slots from the utterance:
"{user_utterance}"
"""

climate_control_domain_template = """You are a multilingual linguist working on an in-car voice assistant.
You are highly skilled in understanding synonyms, paraphrases, and indirect commands across multiple languages.
You specialize in interpreting vehicle climate control instructions.
Climate control devices refer to systems that control the cabin climate like the air conditioner (a/c), fan, and seat heating. This category excludes physical operation or configuration of the vehicle, such as windows, mirrors, and lights, which are treated separately.

## Task:
Analyze the following user utterance:
"{user_utterance}"

Follow these steps:
1. If the utterance contains more than one instruction, split them into atomic statements (each with a single instruction).
2. For each atomic statement:
   - Identify the most appropriate **intent** from the list below.
   - Extract up to **three** relevant **slot–value pairs**.
3. Treat polite requests, questions, and indirect expressions as equivalent to direct commands.
4. Focus on the meaning of the utterance — ignore tone, politeness, or sentence structure.
5. Resolve general modifiers like “fully,” “all,” or “a little” into specific slot values when possible (e.g. "amount": "fully", "scope": "all").
6. If a device or slot is implied (e.g. “Turn it off”), infer the most likely value based on context. If uncertain, leave it empty.
7. If the utterance is ambiguous (e.g. "Turn up the a/c"), consider multiple interpretations:
   - One may refer to adjusting **cooling intensity** (e.g. "increase a/c power").
   - Another may refer to changing the **direction** of airflow (e.g. "tilt the vents upward").
   Choose the most plausible interpretation based on context. If unclear, return multiple atomic interpretations.
8. For each atomic request, output only the JSON in the specified format. Include a short rationale and a confidence score as fields in the JSON.
9. Do not add any title of explanation. Return only the JSON output.

## Available Intents:
* set_climate_condition: Change a measurable climate condition (e.g. temperature, humidity).
* set_device: Adjust a specific part or feature of the climate control system (e.g. airflow direction or fan speed).
* switch_off: Turn off a device.
* switch_on: Turn on a device.

## Available Slots and Their Descriptions:
* application_target: The object to effect the climate condition (e.g. "towards the footwells").
* climate_condition: The climate condition to act upon (e.g. "temperature", "humidity").
* device_feature: The part of the target device to operate (e.g. the vents of the a/c).
* direction: Movement direction (e.g. "up", "down")
* level: Degree or extent of operation (e.g. "fully", "a little").
* position: The location or orientation of the device (e.g. "rear left", "driver's side", "overhead", "footwells").
* quantity: The numerical value of measurement (e.g. "seventy", "eighteen", "twenty-four").
* target_device: The system or hardware to control (e.g. a/c, fan, seat heating).
* unit: The unit of temperature measurement (e.g. "degrees Celsius", "degrees Fahrenheit", "percent")

## Output Format (JSON):
For each atomic request, return:
{{"intent": ..., "slots": {{...}}, "confidence": <float from 0 to 1>, "rationale": "<brief explanation of how you identified the intent and slot(s)>"}}

## Examples:
* "Start the ventilation fan at level four"
  → {{"intent": "switch_on", "slots": {{"target_device": "ventilation fan", "level": "level four"}}, "confidence": 0.95, "rationale": "Explicit device and level mentioned, typical startup command."}}

* "Turn on the a/c at eighteen degrees Celsius"
  → {{"intent": "switch_on", "slots": {{"target_device": "a/c", "quantity": "eighteen", "unit": "degrees Celsius"}}, "confidence": 0.96, "rationale": "Clear switch-on intent with precise temperature."}}

* "Switch off the fan at the front"
  → {{"intent": "switch_off", "slots": {{"target_device": "fan", "position": "front"}}, "confidence": 0.94, "rationale": "Standard turn-off phrasing and position provided."}}

* "Turn up the heating to twenty-four degrees"
  → {{"intent": "set_climate_condition", "slots": {{"target_device": "seat heating", "climate_condition": "temperature", "quantity": "twenty-four", "unit": "degrees Celsius"}}, "confidence": 0.91, "rationale": "Intent is to raise temperature, with explicit target value."}}

* "Turn up the a/c by five degrees"
  → {{"intent": "set_climate_condition", "slots": {{"target_device": "a/c", "quantity": "five", "unit": "degrees Celsius", "direction": "up"}}, "confidence": 0.89, "rationale": "Implied relative increase in cooling level."}}

* "Decrease the humidity inside the cabin by thirty percent"
  → {{"intent": "set_climate_condition", "slots": {{"target_device": "humidifier", "climate_condition": "humidity", "quantity": "thirty", "unit": "percent", "direction": "down", "application_target": "cabin"}}, "confidence": 0.93, "rationale": "Well-specified target and condition with clear direction and scope."}}

* "Direct the airflow to the footwells"
  → {{"intent": "set_device", "slots": {{"target_device": "fan", "application_target": "footwells"}}, "confidence": 0.88, "rationale": "Common command with clear redirection of airflow."}}

* "Tilt the a/c vents upward"
  → {{"intent": "set_device", "slots": {{"target_device": "a/c", "direction": "up", "device_feature": "vents"}}, "confidence": 0.87, "rationale": "Vents and direction mentioned explicitly."}}

* "Defrost the windshield quickly"
  → {{"intent": "switch_on", "slots": {{"target_device": "defroster", "target_car_part": "windshield", "level": "high"}}, "confidence": 0.92, "rationale": "Speed implied 'quickly' maps to high level, device and target clear."}}

* "It's chilly in here"
  → {{"intent": "set_climate_condition", "slots": {{"target_device": "heater", "climate_condition": "temperature", "direction": "up"}}, "confidence": 0.66, "rationale": "User implies need for heat, but does not mention any action or value."}}

* "Can you make it more comfortable?"
  → {{"intent": "set_climate_condition", "slots": {{"target_device": "climate system", "direction": "adjust"}}, "confidence": 0.52, "rationale": "Very vague; system must infer from context whether user is hot, cold, or stuffy."}}

* "Turn up the a/c"
  → {{"intent": "set_device", "slots": {{"target_device": "a/c", "device_feature": "power", "direction": "up"}}, "confidence": 0.78, "rationale": "Could refer to fan speed, temperature, or vent strength; assuming power."}}

Now extract the intents and slots from the utterance:
"{user_utterance}"
"""

communication_domain_template = """You are a multilingual linguist for an in-car voice assistant.
You understand synonyms and paraphrases across different languages.
You specialize in vehicle communication.

## Task:
Analyze the following driver utterance:
"{user_utterance}"

Follow these steps:
1. If the utterance contains multiple requests, split them into atomic statements (each with a single instruction).
2. For each atomic statement:
   - Identify the most appropriate **intent** from the list below.
   - Extract up to **three** relevant **slot–value pairs**.
3. Treat polite requests, questions, and indirect expressions as equivalent to direct commands.
4. Focus on the meaning of the utterance — ignore tone, politeness, or sentence structure.
5. If a slot is implied (e.g. “Call her”), infer likely values based on common use cases (e.g. the most recent female contact) or leave the slot empty.
6. For each atomic request, output only the JSON in the specified format. Include a short rationale and a confidence score as fields in the JSON.
7. Do not add any title of explanation. Return only the JSON output.

## Available Intents:
* make_call: Call a specific contactee.
* read_out_message: Read out a message for the driver to hear.
* send_message: Compose a voice message and send it to a specific contactee.

## Available Slots and Their Descriptions:
* call_history: Refers to a previously dialed number (e.g. "last dialed number")
* contactee: The person to call or message (e.g. "my mum", "John")
* contact_list: The list where the contactee is saved (e.g. "my contacts", "contact list three")
* location_specification: Specifies which number to use (e.g. "her office", "his mobile")
* message_inbox: Refers to received messages (e.g. "latest message")
* message_text: The content of the message to send.
* message_to_read: The specific message to read aloud.
* message_type: Type of message to send (e.g. "text", "voice")
* recipient: The person receiving the message.
* source_device: The driver’s device used to place the call (e.g. "my mobile phone")
* target_device: The contactee’s device (e.g. "his cell phone")

## Output Format (JSON):
For each atomic request, return:
{{"intent": ..., "slots": {{...}}, "confidence": <float from 0 to 1>, "rationale": "<brief explanation of how you identified the intent and slot(s)>"}}

## Examples:
* "Dial Jane from my contacts"
  → {{"intent": "make_call", "slots": {{"contactee": "Jane", "contact_list": "my contacts"}}, "confidence": 0.94, "rationale": "Explicit action verb 'Dial', clear reference to contact name and contact list."}}

* "Phone James on his mobile device"
  → {{"intent": "make_call", "slots": {{"contactee": "James", "target_device": "his mobile device"}}, "confidence": 0.91, "rationale": "'Phone' and 'mobile device' clearly indicate a call intent and device."}}

* "Call Tom at his office"
  → {{"intent": "make_call", "slots": {{"contactee": "Tom", "location_specification": "his office"}}, "confidence": 0.90, "rationale": "'Call' verb plus location tag suggests work contact context."}}

* "Ring Mom from my cell phone"
  → {{"intent": "make_call", "slots": {{"contactee": "Mom", "source_device": "my cell phone"}}, "confidence": 0.89, "rationale": "'Ring' is a common informal synonym for calling; source device specified."}}

* "Call the last dialed number"
  → {{"intent": "make_call", "slots": {{"call_history": "last dialed number"}}, "confidence": 0.93, "rationale": "Refers to a previous interaction in the call log, implying a redial command."}}

* "Read out the latest message received"
  → {{"intent": "read_out_message", "slots": {{"message_inbox": "latest message"}}, "confidence": 0.92, "rationale": "Action verb 'read out' and mention of 'latest message' points to message retrieval."}}

* "Send a voice message to David saying 'Sorry, I'll be late'"
  → {{"intent": "send_message", "slots": {{"message_text": "Sorry, I'll be late", "recipient": "David", "message_type": "voice"}}, "confidence": 0.96, "rationale": "Clear structure of message composition with type, recipient, and content."}}

* "Message Anna"
  → {{"intent": "send_message", "slots": {{"recipient": "Anna"}}, "confidence": 0.72, "rationale": "The verb 'Message' implies sending a message, but the lack of message type or content makes intent partially underspecified."}}

* "Can you call?"
  → {{"intent": "make_call", "slots": {{}}, "confidence": 0.65, "rationale": "The phrase suggests a calling intent, but it lacks both recipient and target device info, making it ambiguous."}}

* "Tell John I’ll be there"
  → {{"intent": "send_message", "slots": {{"recipient": "John", "message_text": "I’ll be there"}}, "confidence": 0.78, "rationale": "Likely a message intent, but phrased more like spoken speech than a clear command to send."}}

Now extract the intents and slots from the utterance:
"{user_utterance}"
"""

device_control_domain_template = """You are a multilingual linguist working on an in-car voice assistant.
You are highly skilled in understanding synonyms, paraphrases, and indirect commands across multiple languages.
You specialize in interpreting vehicle device control instructions.
Vehicle devices refer to components that control the physical operation or configuration of the vehicle, such as windows, mirrors, and lights. This category excludes climate control systems like the air conditioner (A/C), fan, and seat heating, which are treated separately.

## Task:
Analyze the following user utterance:
"{user_utterance}"

Follow these steps:
1. If the utterance contains more than one instruction, split them into atomic statements (each with a single instruction).
2. For each atomic statement:
   - Identify the most appropriate **intent** from the list below.
   - Extract up to **three** relevant **slot–value pairs**.
3. Treat polite requests, questions, and indirect expressions as equivalent to direct commands.
4. Focus on the meaning of the utterance — ignore tone, politeness, or sentence structure.
5. Resolve general modifiers like “fully,” “all,” or “a little” into specific slot values when possible (e.g. "amount": "fully", "scope": "all").
6. If a device or slot is implied (e.g. “Turn it off”), infer the most likely value based on context. If uncertain, leave it empty.
7. For each atomic request, output only the JSON in the specified format. Include a short rationale and a confidence score as fields in the JSON.
8. Do not add any title of explanation. Return only the JSON output.

## Available Intents:
* close_device: Close a physical device or part of the vehicle (e.g. window, sunroof).
* open_device: Open a physical device or part of the vehicle (e.g. window, sunroof).
* set_device: Adjust a specific feature or configuration of a non-climate control device (e.g. mirror angle, headlight beam level).
* turn_on_device: Power on an electronic or mechanical device (e.g. reading light, wipers).
* turn_off_device: Power off an electronic or mechanical device (e.g. reading light, wipers).

## Available Slots and Their Descriptions:
* car_part: The specific part of the vehicle to control (e.g. window, light, rearview mirror).
* desired_state: The target condition of the device (e.g. "blue").
* direction: Movement direction (e.g. "up", "down").
* feature: Specific function being modified (e.g. "brightness", "temperature").
* level: Degree or extent of operation (e.g. "fully", "a little", "low", "high").
* position: The location or orientation of the device (e.g. "rear left", "driver's side", "overhead").
* quantity: The numerical value of measurement (e.g. "seventy", "eighteen", "twenty-four").
* scope: Broad modifiers indicating coverage (e.g. "all", "both").
* unit: The unit of temperature measurement (e.g. "percent")

## Output Format (JSON):
For each atomic request, return:
{{"intent": ..., "slots": {{...}}, "confidence": <float from 0 to 1>, "rationale": "<brief explanation of how you identified the intent and slot(s)>"}}

## Examples:
* "Fully open the rear left window"
  → {{"intent": "open_device", "slots": {{"car_part": "window", "level": "fully", "position": "rear left"}}, "confidence": 0.95, "rationale": "Clear command structure indicating full opening, target part, and position."}}

* "Close the driver's side window by a quarter"
  → {{"intent": "close_device", "slots": {{"car_part": "window", "level": "a quarter", "position": "driver's side"}}, "confidence": 0.92, "rationale": "Contains degree modifier and exact window position with close action."}}

* "Turn on all the ambient lighting in blue"
  → {{"intent": "turn_on_device", "slots": {{"car_part": "ambient lighting", "desired_state": "blue", "scope": "all"}}, "confidence": 0.90, "rationale": "Explicit request to activate a visual setting across all instances."}}

* "Dim the ambient lighting"
  → {{"intent": "set_device", "slots": {{"car_part": "ambient lighting", "feature": "brightness", "direction": "low"}}, "confidence": 0.88, "rationale": "‘Dim’ is a common expression implying lowering brightness."}}

* "Switch off the overhead cabin light"
  → {{"intent": "turn_off_device", "slots": {{"car_part": "cabin light", "position": "overhead"}}, "confidence": 0.93, "rationale": "Clearly indicates a specific light to be turned off."}}

* "Set the front right light to seventy percent brightness"
  → {{"intent": "set_device", "slots": {{"car_part": "light", "feature": "brightness", "quantity": "seventy", "unit": "percent", "position": "front right"}}, "confidence": 0.89, "rationale": "Structured value assignment to brightness with specified location."}}

* "Adjust the rearview mirror downwards slightly"
  → {{"intent": "set_device", "slots": {{"car_part": "rearview mirror", "direction": "downwards", "level": "slightly"}}, "confidence": 0.87, "rationale": "Adjustment instruction with target, direction, and intensity level."}}

* "Make it brighter in here"
  → {{"intent": "set_device", "slots": {{"car_part": "ambient lighting", "feature": "brightness", "direction": "up"}}, "confidence": 0.76, "rationale": "The phrase likely refers to ambient lighting brightness, but lacks explicit reference to lighting or level."}}

* "Turn it down a bit"
  → {{"intent": "set_device", "slots": {{"feature": "volume", "direction": "down", "level": "a bit"}}, "confidence": 0.64, "rationale": "'It' is ambiguous; the user could be referring to volume, fan speed, or brightness depending on prior context."}}

* "Open the window halfway"
  → {{"intent": "open_device", "slots": {{"car_part": "window", "level": "halfway"}}, "confidence": 0.84, "rationale": "Clear action and part, but no position specified (e.g., which window), reducing confidence slightly."}}

Now extract the intents and slots from the utterance:
"{user_utterance}"
"""

implict_to_direct_domain_template = """You are a multilingual linguist for an in-car voice assistant.
You understand synonyms, paraphrases, and implicit meanings across different languages.
Your task is to interpret indirect expressions of discomfort or desire, infer the driver’s intent, and generate a direct command that would resolve the situation.

## Task:
Analyze the following driver utterance:
"{user_utterance}"

Follow these steps:
1. If the utterance contains multiple requests, split it into **atomic statements** (each containing only one actionable request).
2. For each atomic statement:
   - Identify the **driver's underlying issue or desire**.
   - Generate a **direct command** that would address or resolve the issue.
3. Treat questions, polite expressions, and indirect or vague phrases as equivalent to direct commands.
4. Focus entirely on **semantic intent** — ignore tone, sentence structure, or politeness.
5. Identify expressions that signal a **problem or need**, then convert them into a command that would bring about the opposite or desired state.
   - Example: “I’m cold” → The driver wants to be warm → Generate a command that increases cabin temperature.
6. For each atomic request, output only the JSON in the specified format. Include a short rationale and a confidence score as fields in the JSON.
7. Do not add any title of explanation. Return only the JSON output.

## Output Format (JSON):
For each atomic request, return:
{{"driver_issue": ..., "solution_providing_command": ..., "confidence": <float from 0 to 1>, "rationale": "<brief explanation of how you identified the intent and slot(s)>"}}

## Examples:
* "It's hot in here"
  → {{"driver_issue": "The cabin is too hot", "solution_providing_command": "Set the A/C to 20 degrees Celsius", "confidence": 0.94, "rationale": "The complaint is about temperature discomfort, and the typical corrective action is adjusting the air conditioning."}}

* "I'm hungry"
  → {{"driver_issue": "The driver wants to eat", "solution_providing_command": "Find the nearest restaurant", "confidence": 0.91, "rationale": "Expressing hunger implies a desire to find food, usually solved by navigating to a restaurant."}}

* "I don't want the wind on me"
  → {{"driver_issue": "Airflow is directed at the driver", "solution_providing_command": "Turn off the fan or redirect airflow", "confidence": 0.89, "rationale": "The user is reacting to airflow discomfort, implying a need to adjust fan direction or intensity."}}

* "The fan is too strong"
  → {{"driver_issue": "Fan speed is too high", "solution_providing_command": "Lower the fan speed", "confidence": 0.93, "rationale": "The driver is objecting to fan strength, which clearly maps to reducing fan speed."}}

* "It's a bit dark in here"
  → {{"driver_issue": "The cabin is too dark", "solution_providing_command": "Turn on the interior lights", "confidence": 0.90, "rationale": "The complaint about darkness suggests a need for illumination, solvable by enabling cabin lights."}}

* "I feel like listening to something"
  → {{"driver_issue": "The driver wants to hear audio", "solution_providing_command": "Play some music", "confidence": 0.87, "rationale": "The phrase implies a desire for entertainment, with music being a common default."}}

* "It's stuffy in here"
  → {{"driver_issue": "The air feels stagnant", "solution_providing_command": "Open a window or turn on ventilation", "confidence": 0.72, "rationale": "The phrase suggests discomfort due to poor airflow, but the optimal solution may vary by user intent."}}

* "It's too quiet"
  → {{"driver_issue": "The cabin is too quiet", "solution_providing_command": "Play background music or turn on the radio", "confidence": 0.68, "rationale": "The driver may want audio entertainment, but could also be referencing conversation or mechanical noise."}}

* "I can't see very well"
  → {{"driver_issue": "Visibility is poor", "solution_providing_command": "Adjust the headlights or clean the windshield", "confidence": 0.65, "rationale": "The cause of poor visibility is unclear—could be lighting, weather, or windshield obstruction."}}

* "It's not comfortable"
  → {{"driver_issue": "The driver is feeling discomfort", "solution_providing_command": "Adjust seat or cabin temperature", "confidence": 0.60, "rationale": "General discomfort could stem from many sources—temperature, seating, posture, or noise."}}

* "It's noisy"
  → {{"driver_issue": "Cabin noise is too high", "solution_providing_command": "Lower the window or mute audio", "confidence": 0.58, "rationale": "‘Noisy’ is vague; it could refer to road noise, passengers, or media volume."}}

Now extract the issue and generate the corresponding direct command for the utterance:
"{user_utterance}"
"""

navigation_domain_template = """You are a multilingual linguist for an in-car voice assistant.
You understand synonyms, paraphrases, and user intent across different languages.
You specialize in vehicle navigation.

## Task:
Analyze the following user utterance:
"{user_utterance}"

Follow these steps::
1. If the utterance contains multiple requests, split them into atomic statements (each with a single instruction).
2. For each atomic statement:
   - Identify the most appropriate **intent** from the list below.
   - Extract up to **three** relevant **slot–value pairs**.
3. Treat polite requests, questions, and indirect expressions as equivalent to direct commands.
4. Focus on the meaning of the utterance — ignore tone, politeness, or sentence structure.
5. For each atomic request, output only the JSON in the specified format. Include a short rationale and a confidence score as fields in the JSON.
6. Do not add any title of explanation. Return only the JSON output.

## Available Intents:
* add_stopover: Insert an additional stop or waypoint into the current route.
* find_place: Search for a location or point of interest (e.g. restaurant, MacDonald's, the Alps).
* navigate_to: Start navigation to a specified destination.
* save_location: Store a location for future reference (e.g. save as "home" or "favorite").
* set_route_preference: Adjust route options or preferences (e.g. avoid tolls, prefer highways).

## Available Slots and Their Descriptions:
* destination: A specific place to navigate to (e.g. street, business name, city, personal POI).
* label: A name given to a saved location (e.g. "home", "favorite parking spot").
* location: A specific place to search or locate (same categories as destination).
* modifier: Descriptive constraints (e.g. "nearest", "cheap", "not crowded").
* route_preference: A preferred way to travel (e.g. "avoid tolls", "via the M25", "fastest route").
* stopover: A place to stop at along the route (same categories as destination).
* target_device: A device to act upon (e.g. map, screen).

## Clarification
- If the user is requesting a change to their current route to avoid traffic, the intent is "set_route_preference".
- If the user is asking to view possible roads or options — without necessarily changing the route — the intent is "find_place" or "show_options".

## Output Format (JSON):
For each atomic request, return:
{{"intent": ..., "slots": {{...}}, "confidence": <float from 0 to 1>, "rationale": "<brief explanation of how you identified the intent and slot(s)>"}}

## Examples:
* "Add Sainsbury's as a stopover on this journey"
  → {{"intent": "add_stopover", "slots": {{"stopover": "Sainsbury's"}}, "confidence": 0.95, "rationale": "The phrase 'add ... as a stopover' explicitly requests inserting a stop at a named location during the journey."}}

* "Where is the nearest petrol station?"
  → {{"intent": "find_place", "slots": {{"location": "petrol station", "modifier": "nearest"}}, "confidence": 0.92, "rationale": "This is a location query for information purposes, not a request to navigate."}}

* "Take me to the hospital"
  → {{"intent": "navigate_to", "slots": {{"destination": "hospital"}}, "confidence": 0.97, "rationale": "The phrase 'take me to' strongly indicates a request to begin navigation to a destination."}}

* "Mark this spot as my favorite parking location"
  → {{"intent": "save_location", "slots": {{"label": "my favorite parking location"}}, "confidence": 0.91, "rationale": "The command involves saving a location with a user-defined label, matching save_location intent."}}

* "Avoid the toll roads"
  → {{"intent": "set_route_preference", "slots": {{"route_preference": "avoid tolls"}}, "confidence": 0.94, "rationale": "The user expresses a routing preference, which clearly aligns with modifying the route strategy."}}

* "Navigate to 25 Oxford Street, London via the M25"
  → {{"intent": "navigate_to", "slots": {{"destination": "25 Oxford Street, London", "route_preference": "via the M25"}}, "confidence": 0.96, "rationale": "Both the destination and the preferred route are clearly stated, making this a direct navigation command."}}

* "Show alternative roads to avoid traffic"
  → {{"intent": "find_place", "slots": {{"location": "alternative roads", "route_preference": "avoid traffic"}}, "confidence": 0.85, "rationale": "The request is for a visual display or options, not a command to change the route immediately."}}

* "Avoid traffic"
  → {{"intent": "set_route_preference", "slots": {{"route_preference": "avoid traffic"}}, "confidence": 0.90, "rationale": "This is a succinct command to modify the route based on real-time or general traffic preferences."}}

Now extract the intents and slots from the utterance:
"{user_utterance}"
"""

In [8]:
domain_classification_prompt = PromptTemplate(
    input_variables=["user_utterance"],
    template = """You are a helpful multilingual linguist for an in-car voice assistant.
You have an excellent understanding of vocabulary, synonyms, paraphrasing, and intent recognition across multiple languages.

## Task:
Analyze the following user utterance:
"{user_utterance}"

Follow these steps:
1. If the utterance contains multiple requests, split them into atomic statements (each with a single instruction).
2. For each atomic statement, match the utterance to one of the predefined **domains** below, based on the **underlying goal** or **task** it expresses — not its surface form.
3. Do not add any title or explanation (e.g. "navigation").
4. Treat polite requests, questions, and indirect expressions as equivalent to direct commands.
5. Focus on the meaning of the utterance — ignore tone, politeness, or sentence structure.
6. Classify an utterance with no verb and is not a direct command into the "implicit_to_direct" domain.

## Available Domains:
* **audio_media**: Playing, pausing, skipping, or adjusting music, radio, podcasts, or playlists.
* **climate_control**: Adjusting or querying temperature, a/c, fan speed, heating, or airflow.
* **communication**: Making a call or sending/reading messages — to known contacts or by number.
* **device_control**: Opening, closing, or adjusting physical car elements like windows, mirrors, or lights.
* **implicit_to_direct**: Expressing a problem, discomfort, desire, or emotional state that implies a change is needed (e.g. hunger, cold, boredom).
* **navigation**: Finding locations, routing, or requesting directions.

## Output Format (JSON):
Return a list of objects. Each object should include:
- "domain": one of the valid domains
- "atomic_segment": the part of the utterance that corresponds to this domain

## Examples:
* "Play some jazz from my playlist" → [{{"domain": "audio_media", "atomic_segment": "Play some jazz from my playlist"}}]
* "Search for podcasts about gardening" → [{{"domain": "audio_media", "atomic_segment": "Search for podcasts about gardening"}}]
* "Turn on the A/C at twenty-two degrees Celsius" → [{{"domain": "climate_control", "atomic_segment": "Turn on the A/C at twenty-two degrees Celsius"}}]
* "Switch off the seat heating" → [{{"domain": "aclimate_control", "atomic_segment": "Switch off the seat heating"}}]
* "Call Pam on her cell phone" → [{{"domain": "communication", "atomic_segment": "Call Pam on her cell phone"}}]
* "Dial Kevin at work" → [{{"domain": "communication", "atomic_segment": "Dial Kevin at work"}}]
* "Roll down the windows" → [{{"domain": "device_control", "atomic_segment": "Roll down the windows"}}]
* "Turn off the ambient light" → [{{"domain": "device_control", "atomic_segment": "Turn off the ambient light"}}]
* "I fancy some steak" → [{{"domain": "implicit_to_direct", "atomic_segment": "I fancy some steak"}}]
* "I feel like some purple lighting" → [{{"domain": "implicit_to_direct", "atomic_segment": "I feel like some purple lighting"}}]
* "Where is the nearest petrol station?" → [{{"domain": "navigation", "atomic_segment": "Where is the nearest petrol station"}}]
* "Tell me where the nearest petrol station is" → [{{"domain": "navigation", "atomic_segment": "Tell me where the nearest petrol station is"}}]
* "Close all the windows then switch on the a/c" → [{{"domain": "device_control", "atomic_segment": "Close all the windows"}}, {{"domain": "climate_control", "atomic_segment": "switch on the a/c"}}]
* "Lower the radio volume and call my mum on her mobile" → [{{"domain": "audio_media", "atomic_segment": Lower the radio volume"}}, {{"domain": "communication", "atomic_segment": "call my mum on her mobile"}}]

Now classify the domain of the following utterance:
"{user_utterance}"
"""
)

In [9]:
# Define the chains

# A chain that takes an utterance and determines the domain for each atomic segment
domain_classification_chain = LLMChain(llm=llm, prompt=domain_classification_prompt)

# `destination_chains` is a dictionary mapping each destination label (e.g. domain name) to a specific LLMChain that should handle that part of the input
# Each key (domain) points to a chain that knows how to process atomic utterances for that domain
# Create the destination chains

# Links a domain to its intent+slots classification template
domain_router = [
    {"domain": "audio_media", "prompt_template": audio_media_domain_template},
    {"domain": "climate_control", "prompt_template": climate_control_domain_template},
    {"domain": "communication", "prompt_template": communication_domain_template},
    {"domain": "device_control", "prompt_template": device_control_domain_template},
    {"domain": "implicit_to_direct", "prompt_template": implict_to_direct_domain_template},
    {"domain": "navigation", "prompt_template": navigation_domain_template}
]

destination_chains = {}

for p_info in domain_router:
    domain = p_info["domain"]
    template = p_info["prompt_template"]
    prompt = ChatPromptTemplate.from_template(template)
    chain = LLMChain(llm=llm, prompt=prompt)
    destination_chains[domain] = chain

  domain_classification_chain = LLMChain(llm=llm, prompt=domain_classification_prompt)


In [10]:
import json

def run_multi_domain_router(user_utterance, classification_chain, destination_chains):
    """
    1. Classify utterance into atomic segments per domain
    2. Route each atomic segment to its corresponding domain-specific chain
    3. Return combined results
    """
    # Step 1: Run classification chain to get domains and atomic segments
    classification_response = classification_chain.run(user_utterance)

    try:
        atomic_segments = json.loads(classification_response)
    except json.JSONDecodeError as e:
        raise ValueError(f"Could not parse classification output as JSON: {classification_response}") from e

    if not isinstance(atomic_segments, list):
        raise ValueError(f"Expected a list of domain/segment pairs, got: {atomic_segments}")

    results = []

    # Step 2: Process each atomic segment through the appropriate domain-specific chain
    for item in atomic_segments:
        domain = item.get("domain")
        segment = item.get("atomic_segment")

        if domain not in destination_chains:
            raise ValueError(f"Unknown domain '{domain}' in classification output.")

        domain_chain = destination_chains[domain]

        # Call the chain with the atomic segment
        response = domain_chain.run({"user_utterance": segment})
        results.append({
            "domain": domain,
            "segment": segment,
            "response": response
        })

    return results

In [38]:
# Run the NLU module to classify an utterance
def main():
    # Start with the user's original utterance
    utterance_queue = [get_utterance()]

    while utterance_queue:
        current_utterance = utterance_queue.pop(0)

        # Run multi-domain classification and intent+slot extraction
        results = run_multi_domain_router(
            current_utterance,
            classification_chain=domain_classification_chain,
            destination_chains=destination_chains
        )

        for r in results:
            domain = r['domain']
            segment = r['segment']
            response = r['response']
            print(f"[{domain}] {segment} -> {response}")

            # If the domain is 'implicit_to_direct', check for solution command
            if domain == 'implicit_to_direct':
                try:
                    parsed_response = json.loads(response)
                    solution = parsed_response.get('solution_providing_command')
                    if solution:
                        # Feed solution back into the loop for re-processing
                        utterance_queue.append(solution)
                except json.JSONDecodeError:
                    print("Warning: Could not parse JSON from implicit_to_direct response.")

In [39]:
if __name__ == "__main__":
    main()


Tell me something: Close all the windows then switch on the a/c at 22 degrees
[device_control] Close all the windows -> {"intent": "close_device", "slots": {"car_part": "windows", "scope": "all"}, "confidence": 0.96, "rationale": "Direct command with clear action and broad coverage modifier."}
[climate_control] switch on the a/c at 22 degrees -> {"intent": "switch_on", "slots": {"target_device": "a/c", "quantity": "22", "unit": "degrees"}, "confidence": 0.96, "rationale": "Clear switch-on intent with precise temperature, assuming Celsius as the unit due to its common usage in climate control contexts."}
