In [20]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "/scratch/rohank__iitp/qwen2_5_7b_instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [173]:
def generate(prompt:str):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_length = inputs['input_ids'].shape[1]
    # Generate text
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=True,
        top_p=0.9,
        temperature=0.7
    )

    # Decode and print response
    generated_tokens = outputs[0][input_length:]
    response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
    return response.strip()

generate("What is the capital of France?")

'The capital of France is Paris. \n\nParis is not only the capital but also the largest city in France, located in the northern central part of the country. It is known for its rich history, culture, art, architecture, and fashion. Some famous landmarks in Paris include the Eiffel Tower, Louvre Museum, Notre-Dame Cathedral, and Champs-Élysées. Paris has been a significant center of political power, finance, education, and tourism in Europe and around the world'

#### Persuassion expert (Gemini)

In [174]:
def sentiment_expert(text_input: str) -> str:

   prompt = f"""
You are an AI trained to act solely as a **sentiment expert**. Your job is to analyze the **emotional tone** of the input text and classify it into one of the following three categories:

- **Positive** – The text expresses happiness, satisfaction, excitement, appreciation, or any other positive emotion.
- **Negative** – The text expresses disappointment, frustration, anger, sadness, criticism, or other negative feelings.
- **Neutral** – The text is emotionally balanced, factual, or shows no strong emotional content.

Your response must only contain:

1. **Sentiment:** One of the three labels – `Positive`, `Negative`, or `Neutral`
2. **Explanation:** A concise reason that supports the label, based only on emotional tone, word choice, or sentiment-laden phrases.

You must not:
- Provide summaries
- Offer personal opinions
- Evaluate content quality or logic
- Infer intent beyond emotional expression

Stick strictly to **sentiment analysis**.

### Few-Shot Examples:

1. **Text:** "Absolutely love this app – it's made my life so much easier!"
   **Sentiment:** Positive
   **Explanation:** The phrase "absolutely love" strongly conveys enthusiasm and satisfaction.

2. **Text:** "I'm really disappointed with the service. It was slow and rude."
   **Sentiment:** Negative
   **Explanation:** Words like "disappointed", "slow", and "rude" clearly express dissatisfaction.

3. **Text:** "The package arrived on Tuesday as scheduled."
   **Sentiment:** Neutral
   **Explanation:** This sentence is factual with no emotional language.

4. **Text:** "Not sure how I feel about this – it's kind of a mixed bag."
   **Sentiment:** Neutral
   **Explanation:** Ambiguous phrasing and lack of strong emotion suggest a neutral sentiment.

5. **Text:** "This is the worst experience I've had in months."
   **Sentiment:** Negative
   **Explanation:** The phrase "worst experience" indicates strong dissatisfaction.

Now analyze the following text:

**Text:** "{text_input}"
"""


   return generate(prompt)

#### Persuassion Expert

In [175]:
def persuassion_expert(text_input: str) -> str:

   prompt = f"""
You are an AI trained to act solely as a **sentiment expert**. Your job is to analyze the **emotional tone** of the input text and classify it into one of the following three categories:

- **Positive** – The text expresses happiness, satisfaction, excitement, appreciation, or any other positive emotion.
- **Negative** – The text expresses disappointment, frustration, anger, sadness, criticism, or other negative feelings.
- **Neutral** – The text is emotionally balanced, factual, or shows no strong emotional content.

Your response must only contain:

1. **Sentiment:** One of the three labels – `Positive`, `Negative`, or `Neutral`
2. **Explanation:** A concise reason that supports the label, based only on emotional tone, word choice, or sentiment-laden phrases.

You must not:
- Provide summaries
- Offer personal opinions
- Evaluate content quality or logic
- Infer intent beyond emotional expression

Stick strictly to **sentiment analysis**.

### Few-Shot Examples:

1. **Text:** "Absolutely love this app – it's made my life so much easier!"
   **Sentiment:** Positive
   **Explanation:** The phrase "absolutely love" strongly conveys enthusiasm and satisfaction.

2. **Text:** "I'm really disappointed with the service. It was slow and rude."
   **Sentiment:** Negative
   **Explanation:** Words like "disappointed", "slow", and "rude" clearly express dissatisfaction.

3. **Text:** "The package arrived on Tuesday as scheduled."
   **Sentiment:** Neutral
   **Explanation:** This sentence is factual with no emotional language.

4. **Text:** "Not sure how I feel about this – it's kind of a mixed bag."
   **Sentiment:** Neutral
   **Explanation:** Ambiguous phrasing and lack of strong emotion suggest a neutral sentiment.

5. **Text:** "This is the worst experience I've had in months."
   **Sentiment:** Negative
   **Explanation:** The phrase "worst experience" indicates strong dissatisfaction.

Now analyze the following text:

**Text:** "{text_input}"
"""


   return generate(prompt)

#### Keyterm Expert

In [176]:
def keyterms_expert(text_input: str) -> str:

   prompt = f"""
You are a **Keyterm Expert**. Your job is to extract the most important **key terms or phrases** from the input text. These terms should:

- Reflect the **core concepts**, **entities**, **topics**, or **important actions** in the text.
- Be **noun phrases**, **domain-specific vocabulary**, or **verb-based actions** relevant to the subject.

You must **not**:
- Summarize the text
- Explain or describe the text
- Output full sentences

Your response must include only a list of **key terms or phrases**, separated by commas.

### Few-Shot Examples:

1. **Text:** "Artificial intelligence is transforming industries like healthcare, finance, and education by automating tasks and providing data-driven insights."
   **Key Terms:** Artificial intelligence, healthcare, finance, education, automating tasks, data-driven insights

2. **Text:** "The Amazon rainforest, often referred to as the lungs of the Earth, is being threatened by illegal logging and wildfires."
   **Key Terms:** Amazon rainforest, lungs of the Earth, illegal logging, wildfires

3. **Text:** "Quantum computing uses principles of superposition and entanglement to perform complex calculations much faster than classical computers."
   **Key Terms:** Quantum computing, superposition, entanglement, complex calculations, classical computers

Now extract the key terms from the following text:

**Text:** "{text_input}"
"""

   return generate(prompt)


#### Intern Expert

In [177]:
def intent_expert(text_input: str) -> str:

   prompt = f"""
You are an **Intent Expert**. Your task is to analyze the user’s input and identify the **underlying intent** – what the person is trying to do, ask, or achieve with the message.

Intent should be classified in the form of **short, action-oriented phrases** such as:
- "ask a question"
- "make a complaint"
- "request help"
- "give feedback"
- "express gratitude"
- "seek information"
- "report an issue"
- "make a purchase inquiry"

You must provide:

1. **Intent:** A concise label summarizing the user's goal  
2. **Explanation:** A short justification based solely on the user’s wording or phrasing

You must **not**:
- Provide summaries
- Infer sentiment unless directly related to intent
- Rewrite or rephrase the input

Focus only on what the user is trying to achieve.

### Few-Shot Examples:

1. **Text:** "Can you help me reset my password?"  
   **Intent:** request help  
   **Explanation:** The user is directly asking for assistance with resetting their password.

2. **Text:** "This app keeps crashing every time I open it."  
   **Intent:** report an issue  
   **Explanation:** The user is describing a recurring problem with the app.

3. **Text:** "Is there a student discount available for this software?"  
   **Intent:** ask a question  
   **Explanation:** The user is seeking information about discounts.

4. **Text:** "Thanks so much for the quick response!"  
   **Intent:** express gratitude  
   **Explanation:** The user is showing appreciation using thankful language.

5. **Text:** "I’m interested in subscribing to your premium plan."  
   **Intent:** make a purchase inquiry  
   **Explanation:** The user is expressing interest in a paid product or service.

Now identify the intent for the following text:

**Text:** "{text_input}"
"""

   return generate(prompt)


In [178]:
import json
import re

def convert_structured_to_jsonl(text_block: str, i: int) -> str:
    # dialogue_match = re.search(r"<dialogue>\s*(.*?)\s*</dialogue>", text_block, re.DOTALL)
    # reasoning_match = re.search(r"<reasoning>\s*(.*?)\s*</reasoning>", text_block, re.DOTALL)
    # answer_match = re.search(r"answer\s*(.*?)\s*</answer>", text_block, re.DOTALL)

    # if not (dialogue_match and reasoning_match and answer_match):
    #     raise ValueError("Could not find all required tags in the text.")
    # dialogue = dialogue_match.group(1).strip()
    # reasoning = reasoning_match.group(1).strip()
    # answer = answer_match.group(1).strip()

    data = {
        "id_json":i,

        "answer": text_block.strip()
    }

    res=json.dumps(data)
    with open("/home/rohank__iitp/Work/niladri/dataset2/router/router_response.jsonl", "a") as f:
        f.write(res + "\n")
    return res



In [179]:
import pandas as pd

# Load CSV
def csv_load(i:int):
    file_path = '/home/rohank__iitp/Work/niladri/dataset2/conversation.csv'
    df = pd.read_csv(file_path)

    conv_id = i
    df = df[df['conversation_id'] == conv_id]

    # Sort by turn number to ensure correct sequence
    df.sort_values(by="turn_no", inplace=True)

    # Prepare conversation history
    history = []
    result = []

    # Iterate through each row except the last one
    for i in range(len(df)):
        row = df.iloc[i]
        speaker = row['speaker']
        utterance = row['utterance']

        # Add current cumulative history to result before appending new utterance
        # result.append(" ".join(history))

        # Add current utterance with speaker label to history
        result.append(f"{speaker}: {utterance}")

    # Add the last utterance in the specified format
    # last_utterance = df.iloc[-1]['utterance']
    # result.append(f"current utterance: {last_utterance}")
    return result




### Selecting expert

In [180]:
# ---------- Router Function ----------
def route_experts(sentence: str) -> list:
    prompt = f"""
You are a well-trained expert selector.
Your job is to analyze the input sentence and determine which of the following expert modules are required.

You MUST choose from the following list:
1 Intent Expert
2 Keyterm Expert
3 Persuasion Expert
4 Sentiment Expert

You may select 1, 2, 3, or all 4 — but only those that are clearly needed based on the text.

Always respond in **this below exact format**:
Input: [original sentence]
Selected Experts: [Expert1, Expert2, etc]
Reason: [one sentence explaining why those experts were selected]

Below is few shot examples to help you understand the format and reasoning:

Example #1

Input: Can someone please help me reset my password?
Selected Experts: [Intent Expert, Keyterm Expert]
Reason: The speaker is making a request (intent) and referring to a specific issue (keyterm).

Example #2
Input: This app is a complete disaster. It crashes every time I try to open it.
Selected Experts: [Intent Expert, Sentiment Expert, Keyterm Expert]
Reason: This is a complaint (intent), expresses negative emotion (sentiment), and includes technical keywords (keyterms).

Example #3
Input: Reset password link not working again.
Selected Experts: [Keyterm Expert]
Reason: Technical/factual content, no emotion or intent expressed.

Example #4
Input: I love how smooth the new interface feels – you guys nailed it!
Selected Experts: [Sentiment Expert, Persuasion Expert]
Reason: Positive emotional tone and praise as persuasion.

### Now process the following:
Input: {sentence}
"""
    try:

        response = generate(prompt)

        # response = model.generate_content(prompt).text.strip()
        selected_experts = []

        # Try regex to match the experts list
        match = re.search(r"Selected Experts:\s*\[(.*?)\]", response)
        if match:
            items = match.group(1).split(',')
            selected_experts = [item.strip().strip('"\'').lower() for item in items if item.strip()]

        return selected_experts
    except Exception as e:
        print("Error routing experts:", e)
        return []
    prompt = f"""
You are a well-trained expert selector.
Your job is to analyze a given input sentence and decide which expert modules should be activated, based on what the speaker is expressing or trying to do.

Available experts:
- Intent Expert: For purpose, request, question, or user goal
- Keyterm Expert: For extracting topic-specific or important terms
- Persuasion Expert: For emotional, persuasive, or rhetorical language
- Sentiment Expert: For emotional tone (positive, negative, or neutral)

Select ONLY the necessary experts based on content. Return 1, 2, 3, or 4 depending on relevance. Do NOT include experts unnecessarily.

### Output Format
Input: [sentence]
Selected Experts: [Expert1, Expert2, ...]
Reason: [Short explanation]

### Examples

Input: Can someone please help me reset my password?
Selected Experts: [Intent Expert, Keyterm Expert]
Reason: Request for help (intent), contains topic terms ("reset password")

Input: This app is a complete disaster. It crashes every time I try to open it.
Selected Experts: [Intent Expert, Sentiment Expert, Keyterm Expert]
Reason: Complaint (intent), frustration (sentiment), key terms mentioned

Input: Reset password link not working again.
Selected Experts: [Keyterm Expert]
Reason: Technical/factual content only

Input: {sentence}
"""

    # Generate response

    response = generate(prompt)

    # Extract list from "Selected Experts:"
    selected_experts = []
    for line in response.splitlines():
        if line.startswith("Selected Experts:"):
            try:
                raw = line.split(":", 1)[1].strip()
                expert_list = eval(raw)  # turns '[Intent Expert, Keyterm Expert]' into list
                selected_experts = [e.lower() for e in expert_list]
            except:
                pass
            break

    return selected_experts






# ---------- Synthesis Function ----------
def generate_combined_analysis(dialogue, intent=None, key=None, persu=None, senti=None):
    prompt = """You are a trained virtual agent.

Your job is to respond to user dialogue in a way that sounds like a helpful, respectful, and professional human agent.  
You will be given internal expert insights that help guide your understanding, but your final output must be a clean, agent-style reply only.

The expert insights may include:
- Intent: What the user is trying to do or ask for  
- Keyterms: Important phrases the user mentioned  
- Sentiment: The emotional tone of the message  
- Persuasion: How the user is expressing or reinforcing their view  

**Important Instructions – You must follow these strictly:**
- Your final response must always sound like a real human agent: calm, clear, empathetic, and helpful.
- Do not include or repeat the input dialogue or any expert outputs in your reply.
- Use only the expert insights given — never guess or make up missing ones.
- Do not describe, explain, or mention the experts or their findings in your response.
- Output a **single, polished agent-style reply only** — no labels, bullet points, or formatting.

Your tone should:
- Acknowledge and validate the user's message  
- Offer clarity, support, or next steps when needed  
- Be persuasive when appropriate, but always respectful  
- Remain professional and human-like, no matter the sentiment

–––– Few-shot Examples ––––

Example 1  
Dialogue: "Why does this feature never work? It’s so frustrating."  
Intent: Wants the issue fixed  
Keyterms: "feature never work", "frustrating"  

Agent Reply: I’m sorry that feature isn’t working as expected—let’s get this sorted for you as quickly as possible.

––––

Example 2  
Dialogue: "The latest update is fantastic. Everything runs smoother now."  
Sentiment: Positive  

Agent Reply: That’s wonderful to hear—thanks for the kind words! We're glad the update made a difference.

––––

Example 3  
Dialogue: "Do you even test this before releasing? It's full of bugs."  
Sentiment: Negative  
Keyterms: "test", "bugs"  
Persuasion: Accusatory tone  

Agent Reply: I completely understand how frustrating that must be. I’ll make sure your feedback reaches our team so we can resolve these issues.

––––

Example 4  
Dialogue: "Can you guys add an option to export in PDF format?"  
Intent: Request for a new feature  

Agent Reply: That’s a great suggestion—PDF export could be really useful. I’ll share this with our team for consideration.

––––

Example 5  
Dialogue: "You say it's for our benefit, but it just feels like more red tape."  
Intent: Questioning the purpose  
Sentiment: Skeptical  
Persuasion: Contrasts stated benefit with added burden  

Agent Reply: I hear your concern, and you’re right—it shouldn’t feel like added work. We’ll do what we can to make the process genuinely helpful.

––––

Example 6  
Dialogue: "Honestly, this is the most useful app I’ve ever used."  
Keyterms: "most useful", "ever used"  
Sentiment: Very positive  

Agent Reply: That’s amazing to hear—thank you! We're thrilled the app has been so helpful for you.

––––

Example 7  
Dialogue: "It’s annoying how I have to log in every single time."  
Intent: Frustration with repetitive process  
Keyterms: "log in", "every time"  

Agent Reply: That does sound inconvenient—I'll check whether there’s a setting or update that can simplify your login experience.

––––

Now, based on the input below, respond exactly as a trained agent would.

**Do not repeat or reference the dialogue or the expert fields in your reply.  
Just return the final agent-style response. Nothing else.**

Dialogue: {dialogue}  
Intent: {intent_output}  
Keyterms: {keyterms_output}  
Sentiment: {sentiment_output}  
Persuasion: {persuasion_output}  

Agent Reply:"""

    return generate(prompt)





# ---------- Main Selector Function ----------
def process_input_with_selector_model(sentence: str) -> str:
    selected_experts = route_experts(sentence)
    print(f"Selected Experts: {selected_experts}")

    # Initialize variables
    intent = keyterms = sentiment = persuasion = None

    # Call only selected experts
    if "intent expert" in selected_experts:
        intent = intent_expert(sentence)
    if "keyterm expert" in selected_experts:
        keyterms = keyterms_expert(sentence)
    if "sentiment expert" in selected_experts:
        sentiment = sentiment_expert(sentence)
    if "persuasion expert" in selected_experts:
        persuasion = persuassion_expert(sentence)

    # Combine everything
    return generate_combined_analysis(
        dialogue=sentence,
        intent=intent,
        key=keyterms,
        persu=persuasion,
        senti=sentiment
    )


In [181]:
result=list()
for i in range(5,21):
    res = csv_load(i)
    # res.pop(0)
    result.extend(res)  # Use extend to flatten the list
    
len(result)


209

In [182]:
i=46
for sentence in result:
    final_output = process_input_with_selector_model(sentence)
    res = convert_structured_to_jsonl(final_output,i)
    i+=1
    print(sentence)

Selected Experts: ['intent expert', 'keyterm expert']
User: Hi, I'm looking for motor insurance for my 2022 Hyundai Kona Electric. Can you help?
Selected Experts: ['keyterm expert', 'intent expert']
Agent: Absolutely! The Hyundai Kona Electric is a fantastic car. Given it's an EV, are you particularly concerned about battery coverage or charging-related issues?
Selected Experts: ['keyterm expert', 'sentiment expert']
User: Yes, battery coverage is a big concern. I've heard those repairs can be super expensive.
Selected Experts: ['keyterm expert', 'intent expert']
Agent: I understand. With Tata AIG, we understand the nuances of EVs. Our policy is designed to address modern vehicle risks, it ensures claims are processed quickly and effectively.
Selected Experts: ['keyterm expert', 'intent expert']
User: Okay, good. What kind of coverage options do you offer for the battery specifically?
Selected Experts: ['keyterm expert', 'sentiment expert']
Agent: We offer a comprehensive plan that inc

In [183]:
import json
import re

# Function to clean markdown and formatting from text
def clean_text(text):
    # Remove markdown symbols and line breaks
    cleaned = re.sub(r'[*`_>#\\\-\r\n]+', ' ', text)
    cleaned = re.sub(r'\s+', ' ', cleaned)  # Collapse multiple spaces into one
    return cleaned.strip()

# Input and output file paths
input_file = "/home/rohank__iitp/Work/niladri/dataset2/router/router_response.jsonl"   # Replace with your actual input filename
output_file = "/home/rohank__iitp/Work/niladri/dataset2/router/cleaned_output.jsonl"

# Process each line
with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        data = json.loads(line)
        data["answer"] = clean_text(data["answer"])
        outfile.write(json.dumps(data) + "\n")

print(f"Cleaned data written to {output_file}")


Cleaned data written to /home/rohank__iitp/Work/niladri/dataset2/router/cleaned_output.jsonl
