<a href="https://colab.research.google.com/github/jyotidabass/Building-conversational-agents-and-chatbots-using-Large-Language-Models-LLMs-/blob/main/Building_conversational_agents_and_chatbots_using_Large_Language_Models%C2%A0(LLMs).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Conversational Agent:**

A conversational agent is a type of AI model that can engage in natural-sounding conversations with humans. It can understand the context of the conversation and respond accordingly.

In [6]:
# Import necessary libraries
import pandas as pd
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Define dummy data
dummy_data = {
    "input_text": ["Hello, how are you?", "What is your name?", "I love reading books."],
    "output_text": ["I'm doing well, thanks!", "My name is AI Assistant.", "That's great! What kind of books do you like to read?"]
}

# Create a pandas dataframe from the dummy data
df = pd.DataFrame(dummy_data)

# Load the pre-trained LLM model and tokenizer
model_name = "t5-base"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define a function to generate responses using the LLM model
def generate_response(input_text):
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Test the conversational agent
for input_text, output_text in zip(df["input_text"], df["output_text"]):
    response = generate_response(input_text)
    print(f"Input: {input_text}")
    print(f"Expected Output: {output_text}")
    print(f"Generated Response: {response}")
    print()

Input: Hello, how are you?
Expected Output: I'm doing well, thanks!
Generated Response: , how are you? Hello, how are you?

Input: What is your name?
Expected Output: My name is AI Assistant.
Generated Response: What is your name?

Input: I love reading books.
Expected Output: That's great! What kind of books do you like to read?
Generated Response: reading books.



# **Chatbot:**

A chatbot is a type of AI model that can have a conversation with a human, but it's more focused on providing specific information or answering frequently asked questions.

In [7]:
# Import necessary libraries
import pandas as pd
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Define dummy data
dummy_data = {
    "input_text": ["What is the capital of France?", "What is the weather like today?", "How do I book a flight?"],
    "output_text": ["The capital of France is Paris.", "I'm not sure, but you can check online for the current weather.", "You can book a flight by visiting a travel website or contacting an airline directly."]
}

# Create a pandas dataframe from the dummy data
df = pd.DataFrame(dummy_data)

# Load the pre-trained LLM model and tokenizer
model_name = "t5-base"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define a function to generate responses using the LLM model
def generate_response(input_text):
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Test the chatbot
for input_text, output_text in zip(df["input_text"], df["output_text"]):
    response = generate_response(input_text)
    print(f"Input: {input_text}")
    print(f"Expected Output: {output_text}")
    print(f"Generated Response: {response}")
    print()

Input: What is the capital of France?
Expected Output: The capital of France is Paris.
Generated Response: a á á á á á 

Input: What is the weather like today?
Expected Output: I'm not sure, but you can check online for the current weather.
Generated Response: What is the weather like today?

Input: How do I book a flight?
Expected Output: You can book a flight by visiting a travel website or contacting an airline directly.
Generated Response: How do I book a flight?



# **Fine-tuning the LLM model:**

To improve the performance of the conversational agent and chatbot, you can fine-tune the pre-trained LLM model on your own dataset. Here's an example of how you can do this:

In [9]:
!pip install transformers datasets accelerate
import pandas as pd
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from sklearn.model_selection import train_test_split

# Load the pre-trained LLM model and tokenizer
model_name = "t5-base"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define dummy data
dummy_data = {
    "input_text": ["Hello, how are you?", "What is your name?", "I love reading books."],
    "output_text": ["I'm doing well, thanks!", "My name is AI Assistant.", "That's great! What kind of books do you like to read?"]
}

# Create a pandas dataframe from the dummy data
df = pd.DataFrame(dummy_data)

# Split the data into training and validation sets
train_text, val_text, train_labels, val_labels = train_test_split(df["input_text"], df["output_text"], random_state=42, test_size=0.2)

# Create a custom dataset class for our data
class ConversationalDataset(torch.utils.data.Dataset):
    def __init__(self, input_text, output_text, tokenizer):
        self.input_text = input_text
        self.output_text = output_text
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        # Use .iloc to access elements by integer location
        input_text = self.input_text.iloc[idx]
        output_text = self.output_text.iloc[idx]

        encoding = self.tokenizer.encode_plus(
            input_text,
            max_length=512,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors="pt",
        )

        decoding = self.tokenizer.encode_plus(
            output_text,
            max_length=512,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors="pt",
        )

        return {
            "input_ids": encoding["input_ids"].flatten(),
            "attention_mask": encoding["attention_mask"].flatten(),
            "labels": decoding["input_ids"].flatten(),
        }

    def __len__(self):
        return len(self.input_text)

# Create dataset instances for training and validation
train_dataset = ConversationalDataset(train_text, train_labels, tokenizer)
val_dataset = ConversationalDataset(val_text, val_labels, tokenizer)

# Create data loaders for training and validation
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=16)

# Set the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define the optimizer and scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

# Train the model
for epoch in range(5):
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        optimizer.zero_grad()

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_dataloader)}")

    model.eval()
    with torch.no_grad():
        total_loss = 0
        for batch in val_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss

            total_loss += loss.item()

        print(f"Validation Loss: {total_loss / len(val_dataloader)}")

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch 1, Loss: 18.28537940979004
Validation Loss: 19.83492660522461
Epoch 2, Loss: 19.13490104675293
Validation Loss: 19.34803009033203
Epoch 3, Loss: 16.941274642944336
Validation Loss: 18.874942779541016
Epoch 4, Loss: 16.27061653137207
Validation Loss: 18.46263885498047
Epoch 5, Loss: 15.144869804382324
Validation Loss: 18.14051055908203
