In [1]:
# Install the required packages

!pip uninstall -y transformers accelerate
!pip install transformers[torch] accelerate -U
!pip install datasets --upgrade  # Upgrade datasets and its dependencies, including pyarrow

# Import necessary libraries
import pandas as pd
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
import torch

# Verify the package versions
import transformers
import accelerate
import pyarrow

print("Transformers version:", transformers.__version__)
print("Accelerate version:  ", accelerate.__version__)
print("PyArrow version:     ", pyarrow.__version__)

Found existing installation: transformers 4.24.0
Uninstalling transformers-4.24.0:


ERROR: Exception:
Traceback (most recent call last):
  File "E:\Anaconda\lib\shutil.py", line 816, in move
    os.rename(src, real_dst)
PermissionError: [WinError 5] Access is denied: 'e:\\anaconda\\lib\\site-packages\\transformers-4.24.0.dist-info\\' -> 'C:\\Users\\kumar\\AppData\\Local\\Temp\\pip-uninstall-6p12hrbe'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "E:\Anaconda\lib\site-packages\pip\_internal\cli\base_command.py", line 160, in exc_logging_wrapper
    status = run_func(*args)
  File "E:\Anaconda\lib\site-packages\pip\_internal\commands\uninstall.py", line 98, in run
    uninstall_pathset = req.uninstall(
  File "E:\Anaconda\lib\site-packages\pip\_internal\req\req_install.py", line 660, in uninstall
    uninstalled_pathset.remove(auto_confirm, verbose)
  File "E:\Anaconda\lib\site-packages\pip\_internal\req\req_uninstall.py", line 373, in remove
    moved.stash(path)
  File "E:\Anaconda\lib\site-packages\pip

Defaulting to user installation because normal site-packages is not writeable
Collecting transformers[torch]
  Downloading transformers-4.41.2-py3-none-any.whl (9.1 MB)
     ---------------------------------------- 9.1/9.1 MB 3.7 MB/s eta 0:00:00
Collecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
     -------------------------------------- 309.4/309.4 kB 4.8 MB/s eta 0:00:00
Collecting safetensors>=0.4.1
  Downloading safetensors-0.4.3-cp310-none-win_amd64.whl (287 kB)
     -------------------------------------- 287.4/287.4 kB 3.5 MB/s eta 0:00:00
Collecting tokenizers<0.20,>=0.19
  Downloading tokenizers-0.19.1-cp310-none-win_amd64.whl (2.2 MB)
     ---------------------------------------- 2.2/2.2 MB 4.6 MB/s eta 0:00:00
Installing collected packages: safetensors, tokenizers, accelerate, transformers
Successfully installed accelerate-0.31.0 safetensors-0.4.3 tokenizers-0.19.1 transformers-4.41.2




Transformers version: 4.41.2
Accelerate version:   0.31.0


In [2]:
# Step 1: Load the dataset

dataset = load_dataset("Kaludi/Customer-Support-Responses")
print(dataset)
print(dataset['train'][0])

Downloading data:   0%|          | 0.00/12.4k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/74 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['query', 'response'],
        num_rows: 74
    })
})
{'query': "My order hasn't arrived yet.", 'response': 'We apologize for the inconvenience. Can you please provide your order number so we can investigate?'}


In [3]:
# Step 2: Tokenize the dataset

tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Add a padding token
tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})

def preprocess_function(examples):
    inputs = examples['query']
    targets = examples['response']
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = dataset.map(preprocess_function, batched=True)

# Split the dataset into train and test sets
train_test_split = tokenized_datasets['train'].train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/74 [00:00<?, ? examples/s]



In [12]:
# Step 3: Load and train the model

from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
import torch

# Load the pretrained GPT-2 model
model = AutoModelForCausalLM.from_pretrained("gpt2")

# Add the padding token to the tokenizer and resize the model's token embeddings
tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
model.resize_token_embeddings(len(tokenizer))

# Define the preprocessing function
def preprocess_function(examples):
    inputs = examples['query']
    targets = examples['response']
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply preprocessing to the dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True)

# Split the dataset into train and test sets
train_test_split = tokenized_datasets['train'].train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,  # Increase the number of epochs
    weight_decay=0.01,
    logging_dir='./logs',  # Enable logging
    logging_steps=10,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

# Check for GPU availability and move model to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Train the model
trainer.train()


Map:   0%|          | 0/74 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,3.8822,1.531229
2,1.2223,1.144381
3,1.1349,1.155222
4,0.9744,1.034374
5,0.9302,1.00727
6,0.8784,1.013443
7,0.8729,0.973376
8,0.8575,0.97531
9,0.8552,0.996601
10,0.8159,0.980501


TrainOutput(global_step=150, training_loss=1.1744779777526855, metrics={'train_runtime': 1286.3002, 'train_samples_per_second': 0.459, 'train_steps_per_second': 0.117, 'total_flos': 38540574720000.0, 'train_loss': 1.1744779777526855, 'epoch': 10.0})

In [None]:
# Step 4: Evaluate the model

eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")

# Function to generate responses
def generate_response(query):
    inputs = tokenizer(query, return_tensors="pt", padding=True, truncation=True)
    inputs = inputs.to(device)
    outputs = model.generate(**inputs, max_length=50, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Test the model with a sample query
test_query = "My order hasn't arrived yet."
print(f"Generated response for '{test_query}': {generate_response(test_query)}")

# Additional tests to verify model performance
additional_queries = [
    "How do I know if my order went through?",
    "How do I reset my password?",
    "My account has been locked.",
    "Can I get a copy of my receipt?"
]

for query in additional_queries:
    response = generate_response(query)
    print(f"Query: {query}\nResponse: {response}\n")


In [26]:
# Step 5: Interactive demo
from IPython.display import display, HTML
import ipywidgets as widgets

# Define the input and output text areas with additional styling
input_box = widgets.Textarea(
    value='',
    placeholder='Type your query here...',
    description='Query:',
    layout=widgets.Layout(width='50%', height='90px'),
    style={'description_width': 'initial'},
    disabled=False
)

output_box = widgets.Textarea(
    value='',
    placeholder='Response will be shown here...',
    description='Response:',
    layout=widgets.Layout(width='50%', height='90px'),
    style={'description_width': 'initial'},
    disabled=True
)

# Define a button with custom styling
button = widgets.Button(
    description="Generate Response",
    button_style='success',
    tooltip='Click to generate a response',
    icon='robot'
)

# Define a function to handle button clicks
def on_button_click(b):
    query = input_box.value
    response = generate_response(query)
    output_box.value = response

button.on_click(on_button_click)

# Display title
title = widgets.HTML(value="<h1 style='color: #4CAF50;'>Customer Support Chatbot</h1>")

# Create a vertical box layout for all components
ui = widgets.VBox([title, input_box, button, output_box])

display(ui)

VBox(children=(HTML(value="<h1 style='color: #4CAF50;'>Customer Support Chatbot</h1>"), Textarea(value='', des…