In [71]:
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

# Load data from CSV
df = pd.read_csv("chatbot_data_2.csv")

# Create unique dialogues from the CSV data
data = set()
for _, row in df.iterrows():
    customer_message = f"Customer: {row['context']} Agent: {row['agent_action']}"
    data.add(customer_message)

# Save the unique data to a text file
with open('customer_service_data_csv.txt', 'w') as f:
    for item in data:
        f.write("%s\n" % item)


In [72]:
type(data)

data
df.iterrows

<bound method DataFrame.iterrows of                                                context  \
0    User: Is product X included in the current pro...   
1    User inquires about loyalty program points ear...   
2          User: How do I sign up for your newsletter?   
3    User wants to dispute a charge on their credit...   
4    User inquires about the company's social respo...   
..                                                 ...   
124  User wants to know about accepted payment meth...   
125  User: I have a question about your company's p...   
126  User: Can you provide assistance with a techni...   
127  User asks about product compatibility with ano...   
128  User needs help connecting a device to their W...   

                                          agent_action  
0    Let me find out for you! What promotion are yo...  
1    Explain how points are calculated and offer op...  
2    You can sign up for our newsletter on our webs...  
3    Provide instructions on how to ini

In [73]:

# Step 3: Fine-tune the GPT-2 model
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="customer_service_data_csv.txt",  # Path to save the preprocessed data
    block_size=128  # Adjust as needed
)
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False,
)

training_args = TrainingArguments(
    output_dir="./gpt2-fine-tuned",  # Directory to save the fine-tuned model
    overwrite_output_dir=True,
    num_train_epochs=5,  # Adjust as needed
    per_device_train_batch_size=4,  # Adjust batch size based on available memory
    save_steps=10_000,
    save_total_limit=2,
)





In [74]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
)

trainer.train()

# Step 4: Save the fine-tuned model
trainer.save_model("./gpt2-fine-tuned")


                                              
100%|██████████| 30/30 [03:13<00:00,  6.45s/it]


{'train_runtime': 193.406, 'train_samples_per_second': 0.595, 'train_steps_per_second': 0.155, 'train_loss': 2.2226834615071613, 'epoch': 5.0}


In [84]:
import random
from transformers import GPT2LMHeadModel, GPT2Tokenizer
 
# Load the fine-tuned GPT-2 model and tokenizer
model_path = "./gpt2-fine-tuned"
tokenizer_path = "gpt2"
model = GPT2LMHeadModel.from_pretrained(model_path)
tokenizer = GPT2Tokenizer.from_pretrained(tokenizer_path)
 
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
 
# One-shot inference function
def one_shot_inference(customer_input):
    # Providing a single example as context
    example_context = "User: I want to update my address. Agent: I can help you with that. Please provide your new address."
    prompt = f"{example_context} User: {customer_input} Agent:"
    inputs = tokenizer(prompt, return_tensors='pt')
    output = model.generate(
        inputs["input_ids"],
        max_length=inputs["input_ids"].shape[1] + 5,  # Adjust max_length if needed
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        num_return_sequences=1,
        temperature=0.9,
        top_k=50,
        top_p=0.95,
        no_repeat_ngram_size=2  # Prevents repeating n-grams
    )
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True).replace(prompt, '').strip()
    decoded_output = decoded_output.split("User:")[0].strip()
    return decoded_output
 
# Example usage
customer_input = "user order is delayed by several days and its at our delivery hub."
agent_action = one_shot_inference(customer_input)
print(f'User: {customer_input}')
print(f'Agent: {agent_action}')



User: user order is delayed by several days and its at our delivery hub.
Agent: Explain the delay and offer
