In [43]:
import os
from pathlib import Path

from dotenv import load_dotenv


import whisper  # git+https://github.com/openai/whisper.git
# import whisperx # git+https://github.com/m-bain/whisperx.git

from pydantic import BaseModel, Field, ConfigDict
import torch
from langchain_groq import ChatGroq

load_dotenv()


ROOT_DIR = Path(".").resolve()
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

GROQ_BASE_URL = "https://api.groq.com/openai/v1/chat/completions"
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_MODEL_NAME = "openai/gpt-oss-20b"

In [38]:
llm = ChatGroq(api_key=GROQ_API_KEY, model=GROQ_MODEL_NAME)

In [40]:
prompt = "Tell me a joke"

llm.invoke(prompt).content

'Why don’t skeletons fight each other?\n\nThey just don’t have the guts!'

In [33]:
model = whisper.load_model("base", device="cpu")

model

Whisper(
  (encoder): AudioEncoder(
    (conv1): Conv1d(80, 512, kernel_size=(3,), stride=(1,), padding=(1,))
    (conv2): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,))
    (blocks): ModuleList(
      (0-5): 6 x ResidualAttentionBlock(
        (attn): MultiHeadAttention(
          (query): Linear(in_features=512, out_features=512, bias=True)
          (key): Linear(in_features=512, out_features=512, bias=False)
          (value): Linear(in_features=512, out_features=512, bias=True)
          (out): Linear(in_features=512, out_features=512, bias=True)
        )
        (attn_ln): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (mlp): Sequential(
          (0): Linear(in_features=512, out_features=2048, bias=True)
          (1): GELU(approximate='none')
          (2): Linear(in_features=2048, out_features=512, bias=True)
        )
        (mlp_ln): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      )
    )
    (ln_post): LayerNorm((512,), eps=1e-05,

In [44]:
audio_path = str(ROOT_DIR / ".secrets" / "Sample recording.mp3")
transcription = model.transcribe(audio_path)



In [45]:
transcription

{'text': " Thank you for calling Jacob's plumbing. This is Nick. Hey Nick, Steve and Manley, how are you, sir? Pretty good. Tell me Steve, how can I assist you today? Yeah, so I'm looking at, I'm going to have a shop that I'm wanting to turn into an apartment. I have plumbing there, so well, I have the, you know, lavatory, um, toilet things like that, but I won't have a shower and, um, I'll be wondering if I can ask somebody to come out and give me an estimate for that. Sure, no problem. Let me just go ahead and double check that we have coverage in your area. Is it okay if you provide me with your address? Yes, so it's... Let me just one second. Let me go ahead and confirm if we have coverage. Let me just a few moments, please. Okay. Okay, so we do have coverage in your area. Let me just take a look here at what we have available. Is there like a specific day that you would want someone to go out there? No, yeah, I'm just as long as I get heads up, I'll make sure I'm available. Okay, 

## Task 1:  Analyze the Recording

* Listen to the provided call between **Nick (Jacobs Plumbing)** and **Steven Manley (customer)**.
* Note the **flow of the conversation**, including:

  * Greeting and introduction
  * Collecting customer details (name, address, phone, email, service request)
  * Confirming availability
  * Scheduling the appointment
  * Reconfirming the details with the customer


* Note:

  * Should be real-time processing, like ... user's intent recognition.

In [None]:
class CustomerDetail(BaseModel):
    name: str
    address: str
    phone: str
    email: str = Field(default="", description="Customer's email")
    service: str


class ConfirmAvailabitiy(BaseModel):
    is_availability: bool
    reasons: str


class ConversationNote(BaseModel):
    greeting: str
    customer_detail: CustomerDetail
    confirm_availablity: ConfirmAvailabitiy
    appointment_time: str
    confirmation: str


# TODO: customer's sentiment evaluate.

In [48]:
llm_task1 = ChatGroq(api_key=GROQ_API_KEY, model=GROQ_MODEL_NAME)

llm_evaluate_dialog = llm_task1.bind_tools([ConversationNote])

In [59]:
conversation_note = llm_evaluate_dialog.invoke(transcription.get("text"))


conversation_note.tool_calls[0].get("args", {})

{'appointment_time': 'Tomorrow at 11:00 AM',
 'confirm_availablity': {'is_availability': True,
  'reasons': 'Coverage confirmed in area.'},
 'confirmation': 'Appointment confirmed for tomorrow at 11:00 AM.',
 'customer_detail': {'address': '',
  'email': '',
  'name': 'Steve',
  'phone': '',
  'service': 'Shower installation estimate'},
 'greeting': "Hello Steve, thank you for choosing Jacob's Plumbing."}

## Task 2: Create a Prompt for the AI

* Draft a **prompt (instruction set)** that guides the AI on **how to handle calls**.
* The prompt must ensure the AI:

  * Greets politely and professionally.
  * Asks for all required details (name, address, phone number, email, service request).
  * Confirms available service slots.
  * Books the appointment.
  * Confirms details back to the customer.


In [62]:
from langchain.schema import SystemMessage, HumanMessage, AIMessage
from langchain.memory import ConversationBufferMemory
from langchain.prompts import MessagesPlaceholder
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain.schema import SystemMessage, HumanMessage
from langchain.schema.runnable import Runnable
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts.chat import ChatPromptTemplate

OWNER = "Jacobs Plumbing"
INSTRUCTIONS = """
1. Greet them.
2. Collect their name, address, phone number, email, and plumbing service request.
3. Offer available time slots (you can make up times).
4. Confirm the appointment details back to them.
5. Close politely.
"""


SYSTEM_PROMPT = """
You are a polite and professional AI assistant working for {owner}
You are on a call with a customer. Your task is to:
{insructions}

Wait for user input at each step. Keep it conversational and natural. 
ONLY ask one thing at a time. Do not skip ahead.
When the customer says goodbye or ends the call, stop the conversation.
"""

system_prompt = SYSTEM_PROMPT.format(owner=OWNER, insructions=INSTRUCTIONS)

In [79]:
llm_task2 = ChatGroq(
    api_key=GROQ_API_KEY,
    model="compound-beta",
)

In [None]:
chat_prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessage(content=system_prompt),
        MessagesPlaceholder(variable_name="history"),
        HumanMessage(content="{input}"),
    ]
)

memory = ConversationBufferMemory(return_messages=True)

chain = chat_prompt | llm | StrOutputParser()

print(
    "📞 Call started. Type as if you're the customer. Type 'bye' or 'thank you' to end.\n"
)

while True:
    user_input = input()
    memory.chat_memory.add_user_message(user_input)

    print(f"👤 Customer: {user_input}")
    if any(x in user_input.lower() for x in ["bye", "goodbye", "thank you", "thanks"]):
        break
    ai_reply = chain.invoke(
        {"input": user_input, "history": memory.chat_memory.messages}
    )

    print("🤖 AI:", ai_reply)
    memory.chat_memory.add_ai_message(ai_reply)

📞 Call started. Type as if you're the customer. Type 'bye' or 'thank you' to end.

👤 Customer: hi
🤖 AI: Hello! Thank you for contacting Jacobs Plumbing. How can I assist you today?
👤 Customer: how can you assist me?
🤖 AI: Great! I’m here to help you schedule a plumbing service. Could you please tell me your name?
👤 Customer: sure! my name is Minh
🤖 AI: Nice to meet you, Minh! Could you please provide your address so we can locate your property?
👤 Customer: Yah! I live in Ho Chi Minh city, district 1
🤖 AI: Thanks for that, Minh! Could you give me a more specific address or a street name so we can pinpoint your location?
👤 Customer: I think just that is okay
🤖 AI: Got it—thanks for letting me know your location, Minh!  
Could you please share a contact phone number where we can reach you?
👤 Customer: yes, 0706208820
🤖 AI: Thank you! Lastly, could you provide an email address where we can send the appointment confirmation and any updates?
👤 Customer: I do not use email, I use Zalo with th

In [81]:
memory.chat_memory.messages

[HumanMessage(content='hi', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Hello! Thank you for contacting Jacobs Plumbing. How can I assist you today?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='how can you assist me?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Great! I’m here to help you schedule a plumbing service. Could you please tell me your name?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='sure! my name is Minh', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Nice to meet you, Minh! Could you please provide your address so we can locate your property?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Yah! I live in Ho Chi Minh city, district 1', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Thanks for that, Minh! Could you give me a more specific address or a street name so we can pinpoint your location?', additional_kwargs={}, res

In [82]:
dialog_passage = ""

for entity in memory.chat_memory.messages:
    content = entity.content
    dialog_passage += f"{content} "

In [85]:
conversation_note_task2 = llm_evaluate_dialog.invoke(dialog_passage)


conversation_note_task2.tool_calls[0].get("args", {})

{'appointment_time': 'Tomorrow, 9:00 AM – 10:00 AM',
 'confirm_availablity': {'is_availability': True, 'reasons': 'Slot confirmed'},
 'confirmation': 'Confirmed',
 'customer_detail': {'address': 'Ho Chi Minh City, District 1',
  'email': '',
  'name': 'Minh',
  'phone': '0706208820',
  'service': 'Drain cleaning'},
 'greeting': 'hi Hello! Thank you for contacting Jacobs Plumbing. How can I assist you today?'}

## Task 3: Implement the Agentic AI (Python)

* Develop a **Python script** that implements a simple agentic AI.
* Requirements:

  * Use the prompt from Step 2.
  * Add **context handling** (so the AI remembers details already collected).
  * Add **tool calls** (mock functions for scheduling, checking availability, etc.).
  * Make it capable of running through a conversation like the recording.


## Task 4: Provide a Full Script (CSV)

* Recreate the **entire conversation** from the recording as if the AI handled it.
* Save it in **CSV format** with columns:

  ```
  Speaker, Dialogue
  ```
* Each row = one turn (e.g., AI asks, Caller responds).
* The flow should match the recording: greeting → info collection → appointment confirmation → closing.


In [97]:
import pandas as pd


sample_df = pd.read_excel(
    str(ROOT_DIR / ".secrets" / "Sample Script in CSV Format.xlsx")
)
sample_df.head()

Unnamed: 0,Speaker,Dialogue,Unnamed: 2
0,AI,Thank you for calling Jacobs Plumbing. How can...,
1,Caller,Hi,I'm Steven Manley. I need to schedule a plumbi...
2,AI,Great,Steven. Can you please provide your address?
3,Caller,123 Main Street,Springfield.
4,AI,Thank you. Let me confirm we service that area...,we do. What service do you need?
