In [1]:
print("Loading model and tokenizer...")
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name = "microsoft/Phi-3-medium-128k-instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


Loading model and tokenizer...


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 6/6 [00:08<00:00,  1.44s/it]


In [2]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.7,
    "do_sample": True,
}

Device set to use cuda:0


In [3]:
def generate(sentence:str):
    
    prompt = f"""You are given a sentence.  
Your task is to identify and label the persuasion strategy it uses from the defined list of six strategies.  
You must also clearly explain why that strategy was chosen, and your explanation MUST be enclosed inside <reason> and </reason> tags exactly.  

Guidelines for labeling:  
- Read the sentence carefully and determine its primary persuasion approach.  
- Only select ONE label from the given six strategies.  
- The label should match the strategy whose description aligns most closely with the intent and tone of the sentence.  
- When writing the reasoning, explain the specific features, keywords, or message style in the sentence that make it fit the chosen persuasion strategy.  
- The reasoning MUST be enclosed inside <reason> and </reason> tags — this is mandatory.  
- Be concise but clear, ensuring the explanation makes it obvious why that persuasion type applies.  

Persuasion strategies:  
1. Logical appeal – Uses factual reasoning, comparisons, or cost-benefit arguments to persuade.  
2. Credibility appeal – Highlights trustworthiness, reputation, or reliability of the provider or source.  
3. Emotional appeal – Focuses on feelings of safety, comfort, reassurance, or peace of mind.  
4. Personal appeal – Directly tailors the recommendation to the user’s stated needs or preferences.  
5. Persona appeal – Aligns the recommendation with the user’s lifestyle, habits, or personal identity.  
6. Default – Provides neutral, purely informative content without persuasion.  

Output format:  
Label: <strategy name>  
<reason>Reasoning for why this strategy was chosen, based on sentence features.</reason>  

Few-shot examples:  

Example 1:  
Sentence: "This plan saves you $200 annually compared to your current policy while offering the same coverage."  
Label: Logical appeal  
<reason>The plan presents a cost comparison and equivalent coverage, which is a fact-based logical argument.</reason>  

Example 2:  
Sentence: "Our company has been rated #1 in customer satisfaction for five years in a row."  
Label: Credibility appeal  
<reason>It highlights proven reputation and customer satisfaction history, which builds trust and credibility.</reason>  

Example 3:  
Sentence: "With this policy, you can relax knowing your family is protected in any situation."  
Label: Emotional appeal  
<reason>The focus is on feelings of security and reassurance, aiming to evoke emotional comfort.</reason>  

Example 4:  
Sentence: "Since you mentioned wanting full coverage for long road trips, this plan includes unlimited roadside assistance."  
Label: Personal appeal  
<reason>The recommendation is tailored to the user's stated need for long-trip coverage and roadside support.</reason>  

Example 5:  
Sentence: "If you enjoy weekend camping trips, this plan covers damage from off-road driving."  
Label: Persona appeal  
<reason>The plan connects to the user's lifestyle and activities, linking features to camping and off-road travel.</reason>  

Example 6:  
Sentence: "This policy covers liability, collision, and comprehensive damage."  
Label: Default  
<reason>The policy coverage is purely informative and describes coverage without persuasive elements.</reason>  

Now, for the sentence provided below, identify the persuasion strategy and give the reasoning inside <reason> tags as instructed.  

Sentence: "{sentence}"  
Label:  
<reason></reason>  
"""
   
    messages = [      
        {"role": "system", "content": "you are a helpful assistant. Label the sentence based on Persuassion Strategy and provide the reason"},
        {"role": "user",  "content": prompt},
    ]

    output = pipe(messages, **generation_args)
    response= output[0]['generated_text']
    return response.strip()


In [5]:
print(generate("Our comprehensive motor insurance covers accidents, theft, and damage, with premiums starting from just $1.20 a day — less than your morning coffee."))

Label: Logical appeal  
<reason>The sentence presents a factual comparison between the cost of the insurance premium and a daily expense (morning coffee), which is a logical and cost-benefit argument.</reason>


In [15]:
import pandas as pd
import os
import re

# Regex patterns to parse the extract() output
LABEL_RE = re.compile(r"Label\s*:\s*(.+?)(?:\s*(?:\n|$|<))", re.IGNORECASE | re.DOTALL)
REASON_RE = re.compile(r"<reason>(.*?)</reason>", re.IGNORECASE | re.DOTALL)

def parse_extraction(text: str) ->tuple:
    """Extract label and reason from the returned string."""
    if not text:
        return "", ""
    m_label = LABEL_RE.search(text)
    m_reason = REASON_RE.search(text)
    label = m_label.group(1).strip() if m_label else ""
    reason = m_reason.group(1).strip() if m_reason else ""
    return label, reason

def process_csv_with_pandas(input_path: str, output_path: str, start_from: int = 0):
    """
    Reads input CSV, processes each row, and writes to output CSV incrementally.
    Requires an extract(sentence) or generate(sentence) function in scope.

    Args:
        input_path: Path to input CSV
        output_path: Path to output CSV
        start_from: Row index (0-based) to start processing from
    """
    # Check if we are resuming or starting fresh
    if os.path.exists(output_path):
        processed_df = pd.read_csv(output_path)
        processed_count = len(processed_df)
    else:
        processed_df = pd.DataFrame()
        processed_count = 0

    # Read the whole input file
    df = pd.read_csv(input_path)
    if "new_agent_reply" not in df.columns:
        raise KeyError("Input CSV must contain a 'new_agent_reply' column.")

    # Determine actual starting point
    start_index = max(start_from, processed_count)

    # Process remaining rows one by one
    for idx in range(start_index, len(df)):
        sentence = str(df.loc[idx, "new_agent_reply"]).strip()
        result = generate(sentence)  # <-- your function here
        label, reason = parse_extraction(result)

        # Append label & reason to dataframe
        row = df.loc[[idx]].copy()
        row["label"] = label
        row["reason"] = reason

        # Append to the processed dataframe
        processed_df = pd.concat([processed_df, row], ignore_index=True)

        # Write back to CSV immediately so you can check progress
        processed_df.to_csv(output_path, index=False)
        os.sync()  # Force write to disk on some systems

    print(f"Processing complete. Output saved to {output_path}")



In [22]:
input_path="/DATA/rohan_kirti/niladri/Expert Datatset/Book1.csv"
output_path="/DATA/rohan_kirti/niladri/Expert Datatset/Engagement/results.csv"
process_csv_with_pandas(input_path, output_path, start_from=4)

Processing complete. Output saved to /DATA/rohan_kirti/niladri/Expert Datatset/Engagement/results.csv
