In [1]:
from transformers import pipeline
import torch


pipe = pipeline(
    "text2text-generation", 
    model="/scratch/rohank__iitp/gemma3_4b_it",
    device_map="auto",
    torch_dtype=torch.bfloat16
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
The model 'Gemma3ForConditionalGeneration' is not supported for text2text-generation. Supported models are ['PeftModelForSeq2SeqLM', 'BartForConditionalGeneration', 'BigBirdPegasusForConditionalGeneration', 'BlenderbotForConditionalGeneration', 'BlenderbotSmallForConditionalGeneration', 'EncoderDecoderModel', 'FSMTForConditionalGeneration', 'GPTSanJapaneseForConditionalGeneration', 'GraniteSpeechForConditionalGeneration', 'LEDForConditionalGeneration', 'LongT5ForConditionalGeneration', 'M2M100ForConditionalGeneration', 'MarianMTModel', 'MBartForConditionalGeneration', 'MT5ForConditionalGeneration', 'MvpForConditionalGeneration', 'NllbMoeForConditionalGeneration', 'PegasusForConditionalGeneration', 'PegasusXForConditionalGeneration', 'PLBartForConditionalGeneration', 'ProphetNetForConditionalGeneration', 'Qwen2AudioForConditionalGeneration', 'SeamlessM4TForTextToText', 'SeamlessM4Tv2ForTextToText', 'SwitchTransformersForConditionalGeneration', 'T5ForConditionalG

In [2]:
def generate(prompt:str) ->str:
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]

    # Build prompt
    formatted_input = ""
    for message in messages:
        if isinstance(message["content"], list): 
            for item in message["content"]:
                if item["type"] == "text":
                    formatted_input += f"{message['role']}: {item['text']}\n"
        else:
            formatted_input += f"{message['role']}: {message['content']}\n"
    formatted_input += "assistant:"

    # Generate response
    output = pipe(formatted_input, max_new_tokens=100)[0]["generated_text"]

    # Extract only the assistant's response
    assistant_response = output[len(formatted_input):].strip()

    return assistant_response.strip()


generate("Tell about finance")  # Example usage

"Finance is a broad field that encompasses the management of money and investments. Here's a breakdown of key areas within finance:\n\n**1. Personal Finance:**\n\n*   **Budgeting:** Creating a plan for how to spend your money.\n*   **Saving:** Setting aside money for future goals.\n*   **Investing:** Growing your money through assets like stocks, bonds, and real estate.\n*   **Debt Management:** Paying off loans and avoiding excessive debt.\n*"

In [6]:
import json
import re

def convert_structured_to_jsonl(text_block: str, i: int) -> str:
    data = {
        "id_json":i,

        "answer": text_block.strip()
    }

    res=json.dumps(data)
    with open("/home/rohank__iitp/Work/niladri/gemma/single/single_response.jsonl", "a") as f:
        f.write(res + "\n")
    return res



In [4]:
import pandas as pd
from typing import Optional, List

In [7]:
def csv_load(i: Optional[int] = None,
            speaker_col: str = "speaker",
            history_col: str = "history",
            convo_col: str = "conversation_id") -> List[str]:

    file_path='/home/rohank__iitp/Work/niladri/gemma/conversation.csv'
    df = pd.read_csv(file_path)

    # Optional filtering by conversation_id
    if i is not None:
        df = df[df[convo_col] == i]

    # Filter to only User rows and get the history column as list
    result = df[df[speaker_col] == 'User'][history_col].tolist()

    return result


#### Single gemma

In [20]:
def gemini_response(dialogue):

    prompt = f"""
Instruction:
Continue the conversation as the insurance agent. Respond appropriately to the latest user message. 
And please be brief.

    Give the reply for this query: {dialogue}
    Agent Reply:"""
    return generate(prompt)


In [21]:
result = list()
for i in range(1,21):
    res = csv_load(i)
    # res.pop(0)
    result.extend(res)  # Use extend to flatten the list    
    
len(result)

122

In [22]:
i=1
for sentence in result:
    final_output = gemini_response(sentence)
    res = convert_structured_to_jsonl(final_output, i)
    print(sentence)
    i += 1

conversation history:
current utterance: Hi, I'm looking to get motor insurance for my new electric vehicle. It's a 2024 Tesla Model 3.
conversation history:
User: Hi, I'm looking to get motor insurance for my new electric vehicle. It's a 2024 Tesla Model 3.
Agent: Great choice! The Tesla Model 3 is an excellent vehicle. Since you've opted for an EV, are you particularly interested in coverage specific to electric vehicles, like battery protection?
current utterance: Yes, battery protection is definitely a concern. It's a big investment, and I want to make sure it's covered.
conversation history:
User: Hi, I'm looking to get motor insurance for my new electric vehicle. It's a 2024 Tesla Model 3.
Agent: Great choice! The Tesla Model 3 is an excellent vehicle. Since you've opted for an EV, are you particularly interested in coverage specific to electric vehicles, like battery protection?
User: Yes, battery protection is definitely a concern. It's a big investment, and I want to make sure

In [24]:
import json
import re

# Function to clean markdown and formatting from text
def clean_text(text):
    # Remove markdown symbols and line breaks
    cleaned = re.sub(r'[*`_>#\\\-\r\n]+', ' ', text)
    cleaned = re.sub(r'\s+', ' ', cleaned)  # Collapse multiple spaces into one
    return cleaned.strip()

# Input and output file paths
input_file = "/home/rohank__iitp/Work/niladri/gemma/single/single_response.jsonl"   # Replace with your actual input filename
output_file = "/home/rohank__iitp/Work/niladri/gemma/single/cleaned_output.jsonl"

# Process each line
with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        data = json.loads(line)
        data["answer"] = clean_text(data["answer"])
        outfile.write(json.dumps(data) + "\n")

print(f"Cleaned data written to {output_file}")


Cleaned data written to /home/rohank__iitp/Work/niladri/gemma/single/cleaned_output.jsonl
