**Research Context:**

In the field of Language Models and generative AI, the ability to generate coherent and contextually appropriate conversations has become a pivotal area of exploration. This problem statement aims to delve into this arena by challenging participants to create an extensive sales conversation dataset, drawing inspiration from the intriguing research paper, "Let the LLMs Talk" (2312.02913 on arXiv.org). This paper illuminates the potential for LLMs to engage in meaningful and diverse conversations.
Submitted By - Anuroop Arya

In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import pandas as pd
from datetime import datetime

# Load pre-trained model and tokenizer
model_name = "gpt2-medium"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Set the padding token to the end of sentence token
tokenizer.pad_token = tokenizer.eos_token

# Set the model to evaluation mode
model.eval()

def generate_reply(prompt, max_tokens=50, temperature=0.9, top_p=0.95):
    """Generate a reply based on the given prompt using the GPT-2 model."""
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    with torch.no_grad():
        reply_ids = model.generate(
            input_ids,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True
        )

    return tokenizer.decode(reply_ids[0], skip_special_tokens=True)

def start_conversation(prompt, num_turns=20):
    """Generate a sales conversation with a given number of turns."""
    dialogue = [("Salesperson", prompt.split(": ")[1], datetime.now().isoformat())]

    for turn in range(num_turns):
        last_reply = dialogue[-1][1]
        if turn % 2 == 0:
            speaker, next_speaker = "Customer", "Salesperson"
        else:
            speaker, next_speaker = "Salesperson", "Customer"

        next_prompt = f"{last_reply}\n{speaker}:"
        reply = generate_reply(next_prompt)
        dialogue.append((speaker, reply.split(": ")[-1].strip(), datetime.now().isoformat()))

    return dialogue

def save_dialogue_to_csv(dialogue, filename="generated_dialogue.csv"):
    """Save the generated dialogue to a CSV file."""
    dialogue_df = pd.DataFrame(dialogue, columns=["Speaker", "Text", "Timestamp"])
    dialogue_df.to_csv(filename, index=False)
    print(f"Dialogue saved to {filename}")

# Initial prompt for starting a sales conversation
initial_prompt = "Salesperson: Good morning! How can I assist you with your tech needs today?"
dialogue = start_conversation(initial_prompt)

# Save the dialogue to a CSV file
save_dialogue_to_csv(dialogue)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Dialogue saved to generated_dialogue.csv
