In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "meta-llama/Llama-3.2-1B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [45]:
def generate(prompt:str):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_length = inputs['input_ids'].shape[1]
    # Generate text
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=True,
        top_p=0.9,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and print response
    generated_tokens = outputs[0][input_length:]
    response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
    return response.strip()

generate("What is the capital of France?")

'Paris.\nWhat is the capital of Germany?\nBerlin.\nWhat is the capital of Italy?\nRome.\nWhat is the capital of Spain?\nMadrid.\nWhat is the capital of the United States?\nWashington, D.C.\nWhat is the capital of Canada?\nToronto.\nWhat is the capital of Australia?\nCanberra.\nWhat is the capital of New Zealand?\nWellington.\nWhat is the capital of Sweden?\nStockholm.\nWhat is the capital of Norway?\nOslo.\nWhat is the capital of'

#### Persuassion expert (Gemini)

In [46]:
def sentiment_expert(text_input: str) -> str:

   prompt = f"""
You are an AI trained to act solely as a **sentiment expert**. Your job is to analyze the **emotional tone** of the input text and classify it into one of the following three categories:

- **Positive** – The text expresses happiness, satisfaction, excitement, appreciation, or any other positive emotion.
- **Negative** – The text expresses disappointment, frustration, anger, sadness, criticism, or other negative feelings.
- **Neutral** – The text is emotionally balanced, factual, or shows no strong emotional content.

Your response must only contain:

1. **Sentiment:** One of the three labels – `Positive`, `Negative`, or `Neutral`
2. **Explanation:** A concise reason that supports the label, based only on emotional tone, word choice, or sentiment-laden phrases.

You must not:
- Provide summaries
- Offer personal opinions
- Evaluate content quality or logic
- Infer intent beyond emotional expression

Stick strictly to **sentiment analysis**.

### Few-Shot Examples:

1. **Text:** "Absolutely love this app – it's made my life so much easier!"
   **Sentiment:** Positive
   **Explanation:** The phrase "absolutely love" strongly conveys enthusiasm and satisfaction.

2. **Text:** "I'm really disappointed with the service. It was slow and rude."
   **Sentiment:** Negative
   **Explanation:** Words like "disappointed", "slow", and "rude" clearly express dissatisfaction.

3. **Text:** "The package arrived on Tuesday as scheduled."
   **Sentiment:** Neutral
   **Explanation:** This sentence is factual with no emotional language.

4. **Text:** "Not sure how I feel about this – it's kind of a mixed bag."
   **Sentiment:** Neutral
   **Explanation:** Ambiguous phrasing and lack of strong emotion suggest a neutral sentiment.

5. **Text:** "This is the worst experience I've had in months."
   **Sentiment:** Negative
   **Explanation:** The phrase "worst experience" indicates strong dissatisfaction.

Now analyze the following text:

**Text:** "{text_input}"
"""


   return generate(prompt)

#### Persuassion Expert

In [47]:
def persuassion_expert(text_input: str) -> str:

   prompt = f"""
You are an AI trained to act solely as a **sentiment expert**. Your job is to analyze the **emotional tone** of the input text and classify it into one of the following three categories:

- **Positive** – The text expresses happiness, satisfaction, excitement, appreciation, or any other positive emotion.
- **Negative** – The text expresses disappointment, frustration, anger, sadness, criticism, or other negative feelings.
- **Neutral** – The text is emotionally balanced, factual, or shows no strong emotional content.

Your response must only contain:

1. **Sentiment:** One of the three labels – `Positive`, `Negative`, or `Neutral`
2. **Explanation:** A concise reason that supports the label, based only on emotional tone, word choice, or sentiment-laden phrases.

You must not:
- Provide summaries
- Offer personal opinions
- Evaluate content quality or logic
- Infer intent beyond emotional expression

Stick strictly to **sentiment analysis**.

### Few-Shot Examples:

1. **Text:** "Absolutely love this app – it's made my life so much easier!"
   **Sentiment:** Positive
   **Explanation:** The phrase "absolutely love" strongly conveys enthusiasm and satisfaction.

2. **Text:** "I'm really disappointed with the service. It was slow and rude."
   **Sentiment:** Negative
   **Explanation:** Words like "disappointed", "slow", and "rude" clearly express dissatisfaction.

3. **Text:** "The package arrived on Tuesday as scheduled."
   **Sentiment:** Neutral
   **Explanation:** This sentence is factual with no emotional language.

4. **Text:** "Not sure how I feel about this – it's kind of a mixed bag."
   **Sentiment:** Neutral
   **Explanation:** Ambiguous phrasing and lack of strong emotion suggest a neutral sentiment.

5. **Text:** "This is the worst experience I've had in months."
   **Sentiment:** Negative
   **Explanation:** The phrase "worst experience" indicates strong dissatisfaction.

Now analyze the following text:

**Text:** "{text_input}"
"""


   return generate(prompt)

#### Keyterm Expert

In [48]:
def keyterms_expert(text_input: str) -> str:

   prompt = f"""
You are a **Keyterm Expert**. Your job is to extract the most important **key terms or phrases** from the input text. These terms should:

- Reflect the **core concepts**, **entities**, **topics**, or **important actions** in the text.
- Be **noun phrases**, **domain-specific vocabulary**, or **verb-based actions** relevant to the subject.

You must **not**:
- Summarize the text
- Explain or describe the text
- Output full sentences

Your response must include only a list of **key terms or phrases**, separated by commas.

### Few-Shot Examples:

1. **Text:** "Artificial intelligence is transforming industries like healthcare, finance, and education by automating tasks and providing data-driven insights."
   **Key Terms:** Artificial intelligence, healthcare, finance, education, automating tasks, data-driven insights

2. **Text:** "The Amazon rainforest, often referred to as the lungs of the Earth, is being threatened by illegal logging and wildfires."
   **Key Terms:** Amazon rainforest, lungs of the Earth, illegal logging, wildfires

3. **Text:** "Quantum computing uses principles of superposition and entanglement to perform complex calculations much faster than classical computers."
   **Key Terms:** Quantum computing, superposition, entanglement, complex calculations, classical computers

Now extract the key terms from the following text:

**Text:** "{text_input}"
"""

   return generate(prompt)


#### Intern Expert

In [49]:
def intent_expert(text_input: str) -> str:

   prompt = f"""
You are an **Intent Expert**. Your task is to analyze the user’s input and identify the **underlying intent** – what the person is trying to do, ask, or achieve with the message.

Intent should be classified in the form of **short, action-oriented phrases** such as:
- "ask a question"
- "make a complaint"
- "request help"
- "give feedback"
- "express gratitude"
- "seek information"
- "report an issue"
- "make a purchase inquiry"

You must provide:

1. **Intent:** A concise label summarizing the user's goal  
2. **Explanation:** A short justification based solely on the user’s wording or phrasing

You must **not**:
- Provide summaries
- Infer sentiment unless directly related to intent
- Rewrite or rephrase the input

Focus only on what the user is trying to achieve.

### Few-Shot Examples:

1. **Text:** "Can you help me reset my password?"  
   **Intent:** request help  
   **Explanation:** The user is directly asking for assistance with resetting their password.

2. **Text:** "This app keeps crashing every time I open it."  
   **Intent:** report an issue  
   **Explanation:** The user is describing a recurring problem with the app.

3. **Text:** "Is there a student discount available for this software?"  
   **Intent:** ask a question  
   **Explanation:** The user is seeking information about discounts.

4. **Text:** "Thanks so much for the quick response!"  
   **Intent:** express gratitude  
   **Explanation:** The user is showing appreciation using thankful language.

5. **Text:** "I’m interested in subscribing to your premium plan."  
   **Intent:** make a purchase inquiry  
   **Explanation:** The user is expressing interest in a paid product or service.

Now identify the intent for the following text:

**Text:** "{text_input}"
"""

   return generate(prompt)


#### 1)Language Detection

In [50]:
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0  # For consistent results


In [51]:

def detect_language(text):
    try:
        language = detect(text)
        return "The language of the text is: " + language
    except:
        return "Could not detect language"
    
detect_language("Hi how are you")

'The language of the text is: en'

#### 2)POS

In [52]:
def pos(stentence)->str:
    prompt = f"""
You are an advanced natural language model and a domain expert in English grammar and syntax. Your role is to identify the Part of Speech (POS) for each word in an English sentence using the standard Penn Treebank POS tag set (such as NN, VB, JJ, DT, RB, IN, etc.). You tag each word accurately based on its grammatical role in the sentence.

Return the result as a single plain string, formatted like this:

word1/POS1 word2/POS2 word3/POS3 ...

Do not return a list, tuple, dictionary, or any structured data. The output should be a flat string, where each word is immediately followed by a '/' and its corresponding POS tag. Words are separated by single spaces.

Few-shot Examples:

Input: The quick brown fox jumps over the lazy dog.  
Output: The/DT quick/JJ brown/JJ fox/NN jumps/VBZ over/IN the/DT lazy/JJ dog/NN

Input: She is reading a book under the tree.  
Output: She/PRP is/VBZ reading/VBG a/DT book/NN under/IN the/DT tree/NN

Input: Can you help me with this project?  
Output: Can/MD you/PRP help/VB me/PRP with/IN this/DT project/NN ?/.

Input: I have never seen such a beautiful painting before.  
Output: I/PRP have/VBP never/RB seen/VBN such/JJ a/DT beautiful/JJ painting/NN before/RB ./.

Input: They will be arriving at noon tomorrow.  
Output: They/PRP will/MD be/VB arriving/VBG at/IN noon/NN tomorrow/NN ./.

Input: After the storm, the sky looked incredibly clear.  
Output: After/IN the/DT storm/NN ,/, the/DT sky/NN looked/VBD incredibly/RB clear/JJ ./.

Input: John and Mary went to the market and bought some fresh vegetables.  
Output: John/NNP and/CC Mary/NNP went/VBD to/TO the/DT market/NN and/CC bought/VBD some/DT fresh/JJ vegetables/NNS ./.

Input: Although it was raining, they decided to go hiking.  
Output: Although/IN it/PRP was/VBD raining/VBG ,/, they/PRP decided/VBD to/TO go/VB hiking/VBG ./.

Now, analyze the following sentence and return the POS-tagged output in the specified format.
Sentence:{stentence}
"""
    return generate(prompt)


#### 3)NER

In [53]:
def ner(sentence):
    prompt = f"""
You are a highly skilled natural language model and a domain expert in Named Entity Recognition (NER). Your task is to analyze a given English sentence and label all named entities using standard entity types such as:

- PERSON: Names of people
- ORGANIZATION: Companies, institutions, etc.
- LOCATION: Geographical locations such as cities, countries, rivers
- GPE: Geopolitical entities (countries, cities, states)
- DATE: Specific dates or time expressions
- TIME: Times of day
- MONEY: Monetary values
- PERCENT: Percentage values
- FACILITY: Buildings, airports, highways, etc.
- PRODUCT: Consumer products
- EVENT: Named events (e.g. Olympic Games)
- WORK_OF_ART: Titles of books, songs, etc.
- LAW: Named legal documents
- LANGUAGE: Named languages

Return the result as a single plain string. The format must be:

word1/ENTITY1 word2/ENTITY2 word3/O ...

Each word should be followed by a `/` and its corresponding entity label. Use `O` (for "Outside") if a word is **not** part of a named entity. Words are separated by single spaces.

Do not return structured data like lists or dictionaries. The output should be a flat string exactly as specified.

---

Few-shot Examples:

Input: Barack Obama was born in Hawaii.  
Output: Barack/PERSON Obama/PERSON was/O born/O in/O Hawaii/GPE ./O

Input: Google was founded on September 4, 1998.  
Output: Google/ORGANIZATION was/O founded/O on/O September/DATE 4/DATE ,/O 1998/DATE ./O

Input: Apple released the iPhone in 2007.  
Output: Apple/ORGANIZATION released/O the/O iPhone/PRODUCT in/O 2007/DATE ./O

Input: I visited the Eiffel Tower in Paris last summer.  
Output: I/O visited/O the/O Eiffel/FACILITY Tower/FACILITY in/O Paris/GPE last/O summer/O ./O

Input: Elon Musk is the CEO of SpaceX and Tesla.  
Output: Elon/PERSON Musk/PERSON is/O the/O CEO/O of/O SpaceX/ORGANIZATION and/O Tesla/ORGANIZATION ./O

Input: Shakespeare wrote Hamlet in English.  
Output: Shakespeare/PERSON wrote/O Hamlet/WORK_OF_ART in/O English/LANGUAGE ./O

Input: The United Nations held a meeting in New York City.  
Output: The/O United/ORGANIZATION Nations/ORGANIZATION held/O a/O meeting/O in/O New/GPE York/GPE City/GPE ./O

---

Now, analyze the following sentence and return the NER-tagged output in the specified format.
sentence:{sentence}
"""
    return generate(prompt)


#### 4)Co Reference

In [54]:
def co_reference(sentence):
    prompt = f"""
You are a highly capable natural language model with expert-level understanding of **coreference resolution**. Your task is to analyze a given English paragraph or sentence and resolve all **coreferences**. A coreference occurs when multiple expressions in a text refer to the same person, object, or concept.

Your output must clearly identify all references that refer to the same entity and replace pronouns or ambiguous references with their explicit antecedents in **brackets**, immediately following the pronoun or referring word.

---

### Output Format:

Replace pronouns or other coreferent mentions with their antecedents in square brackets `[ ]` directly after the word. Keep the sentence structure intact. Only add the brackets for clarification—do not delete or rearrange any words.

Do **not** output a list, dictionary, or structured object—return a single modified **string**.

---

### Few-shot Examples:

**Input:** Mary went to the park. She enjoyed the fresh air.  
**Output:** Mary went to the park. She [Mary] enjoyed the fresh air.

**Input:** John gave his dog a bath. He did not enjoy it.  
**Output:** John gave his dog a bath. He [John] did not enjoy it [the bath].

**Input:** The book was on the table. It looked old and dusty.  
**Output:** The book was on the table. It [The book] looked old and dusty.

**Input:** Sarah and Emma went shopping. They bought dresses for the party.  
**Output:** Sarah and Emma went shopping. They [Sarah and Emma] bought dresses for the party.

**Input:** Michael met Tom at the station. He was running late.  
**Output:** Michael met Tom at the station. He [Michael or Tom] was running late.

(Note: If ambiguity exists, preserve it but mention both possible antecedents.)

**Input:** The students talked to the professor before they left.  
**Output:** The students talked to the professor before they [the students] left.

**Input:** Alice put the keys on the table and left. When she came back, they were gone.  
**Output:** Alice put the keys on the table and left. When she [Alice] came back, they [the keys] were gone.

---

Now, resolve the coreferences in the following text and return the result using the format described above.
sentence:{sentence}
"""
    return generate(sentence)

#### 5)Topic Segmentation

In [55]:
def topic_segment(sentence):
    prompt = f"""
You are an expert language model specialized in discourse analysis and topic segmentation. Your task is to perform **topic segmentation** on a given piece of text. Topic segmentation involves dividing a paragraph, article, or passage into coherent segments, where each segment discusses a distinct topic or subtopic.

---

### Task:

Given a continuous block of text, identify **where** the topic shifts and split the text into **clearly separated segments**. A topic shift can occur when:

- A new subject or event is introduced
- The focus shifts from one person/place/idea to another
- The writer moves from one argument or theme to another

Return the segmented text as a single string, with each segment **separated by a blank line** (`\\n\\n`). Keep all original words, grammar, and sentence structure intact. Only insert line breaks between topic boundaries.

---

### Few-shot Examples:

**Input:**

Alice loves baking cakes. She spends her weekends experimenting with new recipes. Her kitchen is always full of sweet smells and delicious treats.  
Recently, she started training for a marathon. Running helps her stay focused and healthy. She trains every morning before work.

**Output:**

Alice loves baking cakes. She spends her weekends experimenting with new recipes. Her kitchen is always full of sweet smells and delicious treats.

Recently, she started training for a marathon. Running helps her stay focused and healthy. She trains every morning before work.

---

**Input:**

The Great Wall of China is one of the most famous landmarks in the world. It stretches over 13,000 miles and was built to protect against invasions. Tourists from all over the world visit the wall every year.  
In other parts of Asia, ancient architecture also draws large crowds. Angkor Wat in Cambodia, for example, is another stunning historic site.

**Output:**

The Great Wall of China is one of the most famous landmarks in the world. It stretches over 13,000 miles and was built to protect against invasions. Tourists from all over the world visit the wall every year.

In other parts of Asia, ancient architecture also draws large crowds. Angkor Wat in Cambodia, for example, is another stunning historic site.

---

**Input:**

Tom works in advertising. He creates campaigns for tech companies and often travels for work.  
On weekends, Tom enjoys hiking in the mountains. He finds it refreshing after spending the week in meetings and on video calls.

**Output:**

Tom works in advertising. He creates campaigns for tech companies and often travels for work.

On weekends, Tom enjoys hiking in the mountains. He finds it refreshing after spending the week in meetings and on video calls.

---

Now, segment the following text based on topic shifts. Return the segmented version as a single string, with each segment separated by a blank line.
sentence:{sentence}
"""
    return generate(prompt)


In [60]:
import json
import re

def convert_structured_to_jsonl(text_block: str, i: int) -> str:
    # dialogue_match = re.search(r"<dialogue>\s*(.*?)\s*</dialogue>", text_block, re.DOTALL)
    # reasoning_match = re.search(r"<reasoning>\s*(.*?)\s*</reasoning>", text_block, re.DOTALL)
    # answer_match = re.search(r"answer\s*(.*?)\s*</answer>", text_block, re.DOTALL)

    # if not (dialogue_match and reasoning_match and answer_match):
    #     raise ValueError("Could not find all required tags in the text.")
    # dialogue = dialogue_match.group(1).strip()
    # reasoning = reasoning_match.group(1).strip()
    # answer = answer_match.group(1).strip()

    data = {
        "id_json":i,

        "answer": text_block.strip()
    }

    res=json.dumps(data)
    with open("/DATA/rohan_kirti/niladri/dataset3/router/router_response.jsonl", "a") as f:
        f.write(res + "\n")
    return res



In [61]:
import pandas as pd

# Load CSV
def csv_load(i:int):
    file_path = '/DATA/rohan_kirti/niladri/dataset3/conversation.csv'
    df = pd.read_csv(file_path)

    conv_id = i
    df = df[df['conversation_id'] == conv_id]

    # Sort by turn number to ensure correct sequence
    df.sort_values(by="turn_no", inplace=True)

    # Prepare conversation history
    history = []
    result = []

    # Iterate through each row except the last one
    for i in range(len(df)):
        row = df.iloc[i]
        speaker = row['speaker']
        utterance = row['utterance']

        # Add current cumulative history to result before appending new utterance
        # result.append(" ".join(history))

        # Add current utterance with speaker label to history
        result.append(f"{speaker}: {utterance}")

    # Add the last utterance in the specified format
    # last_utterance = df.iloc[-1]['utterance']
    # result.append(f"current utterance: {last_utterance}")
    return result




### Selecting expert

In [65]:
# ---------- Router Function ----------
def route_experts(sentence: str) -> list:
    prompt = f"""
You are a well-trained expert selector.
Your job is to analyze the input sentence and determine which of the following expert modules are required.

You MUST choose from the following list:
1 Intent Expert
2 Keyterm Expert
3 Persuasion Expert
4 Sentiment Expert

You may select 1, 2, 3, or all 4 — but only those that are clearly needed based on the text.

Always respond in **this below exact format**:
Input: [original sentence]
Selected Experts: [Expert1, Expert2, etc]
Reason: [one sentence explaining why those experts were selected]

Below is few shot examples to help you understand the format and reasoning:

Example #1

Input: Can someone please help me reset my password?
Selected Experts: [Intent Expert, Keyterm Expert]
Reason: The speaker is making a request (intent) and referring to a specific issue (keyterm).

Example #2
Input: This app is a complete disaster. It crashes every time I try to open it.
Selected Experts: [Intent Expert, Sentiment Expert, Keyterm Expert]
Reason: This is a complaint (intent), expresses negative emotion (sentiment), and includes technical keywords (keyterms).

Example #3
Input: Reset password link not working again.
Selected Experts: [Keyterm Expert]
Reason: Technical/factual content, no emotion or intent expressed.

Example #4
Input: I love how smooth the new interface feels – you guys nailed it!
Selected Experts: [Sentiment Expert, Persuasion Expert]
Reason: Positive emotional tone and praise as persuasion.

### Now process the following:
Input: {sentence}
"""
    try:

        response = generate(prompt)

        # response = model.generate_content(prompt).text.strip()
        selected_experts = []

        # Try regex to match the experts list
        match = re.search(r"Selected Experts:\s*\[(.*?)\]", response)
        if match:
            items = match.group(1).split(',')
            selected_experts = [item.strip().strip('"\'').lower() for item in items if item.strip()]

        return selected_experts
    except Exception as e:
        print("Error routing experts:", e)
        return []
    prompt = f"""
You are a well-trained expert selector.
Your job is to analyze a given input sentence and decide which expert modules should be activated, based on what the speaker is expressing or trying to do.

Available experts:
- Intent Expert: For purpose, request, question, or user goal
- Keyterm Expert: For extracting topic-specific or important terms
- Persuasion Expert: For emotional, persuasive, or rhetorical language
- Sentiment Expert: For emotional tone (positive, negative, or neutral)

Select ONLY the necessary experts based on content. Return 1, 2, 3, or 4 depending on relevance. Do NOT include experts unnecessarily.

### Output Format
Input: [sentence]
Selected Experts: [Expert1, Expert2, ...]
Reason: [Short explanation]

### Examples

Input: Can someone please help me reset my password?
Selected Experts: [Intent Expert, Keyterm Expert]
Reason: Request for help (intent), contains topic terms ("reset password")

Input: This app is a complete disaster. It crashes every time I try to open it.
Selected Experts: [Intent Expert, Sentiment Expert, Keyterm Expert]
Reason: Complaint (intent), frustration (sentiment), key terms mentioned

Input: Reset password link not working again.
Selected Experts: [Keyterm Expert]
Reason: Technical/factual content only

Input: {sentence}
"""

    # Generate response

    response = generate(prompt)

    # Extract list from "Selected Experts:"
    selected_experts = []
    for line in response.splitlines():
        if line.startswith("Selected Experts:"):
            try:
                raw = line.split(":", 1)[1].strip()
                expert_list = eval(raw)  # turns '[Intent Expert, Keyterm Expert]' into list
                selected_experts = [e.lower() for e in expert_list]
            except:
                pass
            break

    return selected_experts






# ---------- Synthesis Function ----------
def generate_combined_analysis(dialogue, pos_output, ner_output, topicseg_output, langdetect_output,  coref_output,
                               intent=None, key=None, persu=None, senti=None ):
    prompt = f"""You are a trained virtual agent.

Your job is to respond to user dialogue in a way that sounds like a helpful, respectful, and professional human agent.  
You are given internal expert insights to guide your understanding, including a mix of high-confidence and lower-confidence sources.

### You will receive:
Primary (High-Confidence) Expert Outputs:
- Intent: What the user is trying to do or ask for  
- Keyterms: Important phrases the user mentioned  
- Sentiment: The emotional tone of the message  
- Persuasion: How the user is expressing or reinforcing their view  

Additional (Supporting) Expert Outputs:
- POS: Part-of-speech tags for each word  
- NER: Named entities detected in the input  
- Topic Segmentation: Where the topic changes within the sentence or paragraph  
- Language Detection: The language of the input  
- Coreference Resolution: What pronouns or vague terms refer to

### Important:
- Do **not** include or repeat any of the expert outputs or the dialogue itself.
- Use the insights only to inform your internal understanding.
- Respond with **only one professional, human-sounding agent reply**.
- No bullet points, analysis, labels, or reference to the expert systems.

–––– Few-shot Examples ––––

**Example 1**  
Dialogue: "Why does this feature never work? It’s so frustrating."

Intent: Problem with feature functionality  
Keyterms: "feature", "never work", "frustrating"  
Sentiment: Negative  
Persuasion: Expressed through emotional frustration  
POS: Why/WRB does/VBZ this/DT feature/NN never/RB work/VB ?/.  
NER: None  
Topic Segmentation: 1. Functionality issue 2. Emotional response  
Language Detection: English  
Coreference Resolution: "this feature" = the app feature being referenced

**Agent Reply:**  
I’m sorry that feature isn’t working as expected—let’s get this sorted for you as quickly as possible.

––––

**Example 2**  
Dialogue: "Do you even test this before releasing? It's full of bugs."

Intent: Complaint about quality/testing  
Keyterms: "test", "bugs", "releasing"  
Sentiment: Strongly negative  
Persuasion: Accusatory, uses rhetorical question  
POS: Do/VBP you/PRP even/RB test/VB this/DT before/IN releasing/VBG ?/.  
NER: None  
Topic Segmentation: 1. Doubt over testing 2. Frustration with result  
Language Detection: English  
Coreference Resolution: "this" = recent update or product release; "it" = the software

**Agent Reply:**  
I completely understand how frustrating that must be. I’ll make sure your feedback reaches our team so we can resolve these issues.

––––

**Example 3**  
Dialogue: "Can you guys add an option to export in PDF format?"

Intent: Feature request  
Keyterms: "export", "PDF format", "option"  
Sentiment: Neutral  
Persuasion: Polite and constructive  
POS: Can/MD you/PRP guys/NNS add/VB an/DT option/NN to/TO export/VB in/IN PDF/NN format/NN ?/.  
NER: PDF/PRODUCT  
Topic Segmentation: Single topic — feature enhancement  
Language Detection: English  
Coreference Resolution: No ambiguous references

**Agent Reply:**  
That’s a great suggestion—PDF export could be really useful. I’ll share this with our team for consideration.

––––

**Example 4**  
Dialogue: "You say it's for our benefit, but it just feels like more red tape."

Intent: Expressing doubt or disagreement  
Keyterms: "benefit", "red tape"  
Sentiment: Skeptical  
Persuasion: Uses contrast to highlight distrust  
POS: You/PRP say/VBP it/PRP 's/VBZ for/IN our/PRP$ benefit/NN ,/, but/CC it/PRP just/RB feels/VBZ like/IN more/JJR red/JJ tape/NN ./.  
NER: None  
Topic Segmentation: 1. Claimed benefit 2. Perceived burden  
Language Detection: English  
Coreference Resolution: "it" = the policy/process being discussed

**Agent Reply:**  
I hear your concern, and you’re right—it shouldn’t feel like added work. We’ll do what we can to make the process genuinely helpful.

––––

**Example 5**  
Dialogue: "Honestly, this is the most useful app I’ve ever used."

Intent: Praise  
Keyterms: "most useful", "app", "ever used"  
Sentiment: Very positive  
Persuasion: Personal experience  
POS: Honestly/RB ,/, this/DT is/VBZ the/DT most/RBS useful/JJ app/NN I/PRP ’ve/VBP ever/RB used/VBN ./.  
NER: app/PRODUCT  
Topic Segmentation: Single topic — positive feedback  
Language Detection: English  
Coreference Resolution: "this" = the app being used

**Agent Reply:**  
That’s amazing to hear—thank you! We're thrilled the app has been so helpful for you.

––––

**Example 6**  
Dialogue: "It’s annoying how I have to log in every single time."

Intent: Frustration with login experience  
Keyterms: "log in", "every time", "annoying"  
Sentiment: Mildly negative  
Persuasion: Based on repeated inconvenience  
POS: It/PRP ’s/VBZ annoying/JJ how/WRB I/PRP have/VBP to/TO log/VB in/RP every/DT single/JJ time/NN ./.  
NER: log in/PROCESS  
Topic Segmentation: Single topic — authentication friction  
Language Detection: English  
Coreference Resolution: "It" = the login process or app behavior

**Agent Reply:**  
That does sound inconvenient—I'll check whether there’s a setting or update that can simplify your login experience.

––––

Now, based on the input below, respond exactly as a trained agent would.

**Do not repeat or reference the dialogue or the expert fields in your reply.  
Just return the final agent-style response. Nothing else.**

Dialogue: {dialogue}  
Intent: {intent}  
Keyterms: {key}  
Sentiment: {senti}  
Persuasion: {persu}  
POS: {pos_output}  
NER: {ner_output}  
Topic Segmentation: {topicseg_output}  
Language Detection: {langdetect_output}  
Coreference: {coref_output}  

Agent Reply:"""

    return generate(prompt)





# ---------- Main Selector Function ----------
def process_input_with_selector_model(sentence: str) -> str:
    selected_experts = route_experts(sentence)
    print(f"Selected Experts: {selected_experts}")

    # Initialize variables
    intent = keyterms = sentiment = persuasion = None

    # Call only selected experts
    if "intent expert" in selected_experts:
        intent = intent_expert(sentence)
    if "keyterm expert" in selected_experts:
        keyterms = keyterms_expert(sentence)
    if "sentiment expert" in selected_experts:
        sentiment = sentiment_expert(sentence)
    if "persuasion expert" in selected_experts:
        persuasion = persuassion_expert(sentence)

    pos_tag = pos(sentence)
    ner_tag = ner(sentence)
    corefer= co_reference(sentence)
    detect= detect_language(sentence)
    segment= topic_segment(sentence)
    # Combine everything
    return generate_combined_analysis(
    dialogue=sentence,
    pos_output=pos_tag,
    ner_output=ner_tag,
    topicseg_output=segment,
    langdetect_output=detect,
    coref_output=corefer,
    intent=intent,
    key=keyterms,
    persu=persuasion,
    senti=sentiment
)


In [63]:
result=list()
for i in range(1,5):
    res = csv_load(i)
    # res.pop(0)
    result.extend(res)  # Use extend to flatten the list
    
len(result)


46

In [66]:
i=1
for sentence in result:
    final_output = process_input_with_selector_model(sentence)
    res = convert_structured_to_jsonl(final_output,i)
    i+=1
    print(sentence)

Selected Experts: ['keyterm expert', 'sentiment expert']
User: Hi, I'm looking to get motor insurance for my new electric vehicle. It's a 2024 Tesla Model 3.
Selected Experts: ['sentiment expert', 'persuasion expert']
Agent: Great choice! The Tesla Model 3 is an excellent vehicle. Since you've opted for an EV, are you particularly interested in coverage specific to electric vehicles, like battery protection?
Selected Experts: ['sentiment expert', 'persuasion expert']
User: Yes, battery protection is definitely a concern. It's a big investment, and I want to make sure it's covered.
Selected Experts: ['intent expert', 'persuasion expert']
Agent: Absolutely. The battery is the heart of your Tesla. With Tata AIG, you get rapid claims resolution combining thorough coverage with rapid claims resolution. It integrates technology with traditional risk management practices, ensuring that claims are processed quickly and effectively.
Selected Experts: ['keyterm expert', 'intent expert']
User: Wh

In [68]:
import json
import re

# Function to clean markdown and formatting from text
def clean_text(text):
    # Remove markdown symbols and line breaks
    cleaned = re.sub(r'[*`_>#\\\-\r\n]+', ' ', text)
    cleaned = re.sub(r'\s+', ' ', cleaned)  # Collapse multiple spaces into one
    return cleaned.strip()

# Input and output file paths
input_file = "/DATA/rohan_kirti/niladri/dataset3/router/router_response.jsonl"   # Replace with your actual input filename
output_file = "/DATA/rohan_kirti/niladri/dataset3/router/cleaned_output.jsonl"

# Process each line
with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        data = json.loads(line)
        data["answer"] = clean_text(data["answer"])
        outfile.write(json.dumps(data) + "\n")

print(f"Cleaned data written to {output_file}")


Cleaned data written to /DATA/rohan_kirti/niladri/dataset3/router/cleaned_output.jsonl
