<a href="https://colab.research.google.com/github/mathu3004/Pearl_Path/blob/Chatbot/ChatbotModelTesting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import torch
from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer, DPRContextEncoder, DPRContextEncoderTokenizer
from transformers import T5ForConditionalGeneration, T5Tokenizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Initialize DPR models and tokenizers
question_encoder = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
context_encoder = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
context_tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')

# Initialize T5 model and tokenizer
t5_model = T5ForConditionalGeneration.from_pretrained('t5-base')
t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')

# Load the Excel sheets
restaurant_data = pd.read_csv('/content/Restaurants.csv')
attraction_data = pd.read_excel('Attractions.xlsx')
hotel_data = pd.read_csv('/content/Hotels.csv')

# Preprocess the data to create questions, contexts, and answers
def preprocess_data(data, category):
    questions = []
    contexts = []
    answers = []

    for index, row in data.iterrows():
        if category == 'restaurant':
            question = f"What type of cuisine does {row['name']} serve?"
            context = f"{row['name']} is a restaurant located at {row['address']} that serves {row['cuisines']} cuisine."
            answer = row['cuisines']
        elif category == 'attraction':
            question = f"What is the main attraction of {row['Name']}?"
            context = f"{row['Name']} is located at {row['Address']} and is known for its {row['Description']}."
            answer = row['Description']
        elif category == 'hotel':
            question = f"What amenities does {row['name']} offer?"
            context = f"{row['name']} is a hotel located at {row['address']} that offers amenities such as {row['all_amenities']}."
            answer = row['all_amenities']

        questions.append(question)
        contexts.append(context)
        answers.append(answer)

    return questions, contexts, answers

restaurant_questions, restaurant_contexts, restaurant_answers = preprocess_data(restaurant_data, 'restaurant')
attraction_questions, attraction_contexts, attraction_answers = preprocess_data(attraction_data, 'attraction')
hotel_questions, hotel_contexts, hotel_answers = preprocess_data(hotel_data, 'hotel')

# Combine the data
questions = restaurant_questions + attraction_questions + hotel_questions
contexts = restaurant_contexts + attraction_contexts + hotel_contexts
answers = restaurant_answers + attraction_answers + hotel_answers

# Split the data into training and testing sets
train_questions, test_questions, train_contexts, test_contexts, train_answers, test_answers = train_test_split(
    questions, contexts, answers, test_size=0.2, random_state=42
)

# Encode questions and contexts for training data
train_question_embeddings = []
train_context_embeddings = []

for question in train_questions:
    inputs = question_tokenizer(question, return_tensors='pt')
    outputs = question_encoder(**inputs)
    train_question_embeddings.append(outputs.pooler_output.detach().numpy())

for context in train_contexts:
    inputs = context_tokenizer(context, return_tensors='pt')
    outputs = context_encoder(**inputs)
    train_context_embeddings.append(outputs.pooler_output.detach().numpy())

train_question_embeddings = np.array(train_question_embeddings).squeeze()
train_context_embeddings = np.array(train_context_embeddings).squeeze()

# Compute similarity scores for training data
train_similarity_scores = np.dot(train_question_embeddings, train_context_embeddings.T)

# Retrieve top-k contexts for each question in training data
top_k = 1
train_retrieved_contexts = []
for i in range(len(train_questions)):
    top_context_indices = train_similarity_scores[i].argsort()[-top_k:][::-1]
    train_retrieved_contexts.append([train_contexts[idx] for idx in top_context_indices])

# Generate answers using T5 model for training data
train_generated_answers = []
for i, context in enumerate(train_retrieved_contexts):
    input_text = f"question: {train_questions[i]} context: {' '.join(context)}"
    inputs = t5_tokenizer(input_text, return_tensors='pt')
    outputs = t5_model.generate(**inputs)
    generated_answer = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
    train_generated_answers.append(generated_answer)

# Evaluate accuracy on training data
train_exact_match = accuracy_score(train_answers, train_generated_answers)
train_precision = precision_score(train_answers, train_generated_answers, average='weighted')
train_recall = recall_score(train_answers, train_generated_answers, average='weighted')
train_f1 = f1_score(train_answers, train_generated_answers, average='weighted')
train_conf_matrix = confusion_matrix(train_answers, train_generated_answers, labels=np.unique(train_answers))

print(f"Training Exact Match: {train_exact_match}")
print(f"Training Precision: {train_precision}")
print(f"Training Recall: {train_recall}")
print(f"Training F1 Score: {train_f1}")
print(f"Training Confusion Matrix:\n{train_conf_matrix}")

# Encode questions and contexts for testing data
test_question_embeddings = []
test_context_embeddings = []

for question in test_questions:
    inputs = question_tokenizer(question, return_tensors='pt')
    outputs = question_encoder(**inputs)
    test_question_embeddings.append(outputs.pooler_output.detach().numpy())

for context in test_contexts:
    inputs = context_tokenizer(context, return_tensors='pt')
    outputs = context_encoder(**inputs)
    test_context_embeddings.append(outputs.pooler_output.detach().numpy())

test_question_embeddings = np.array(test_question_embeddings).squeeze()
test_context_embeddings = np.array(test_context_embeddings).squeeze()

# Compute similarity scores for testing data
test_similarity_scores = np.dot(test_question_embeddings, test_context_embeddings.T)

# Retrieve top-k contexts for each question in testing data
test_retrieved_contexts = []
for i in range(len(test_questions)):
    top_context_indices = test_similarity_scores[i].argsort()[-top_k:][::-1]
    test_retrieved_contexts.append([test_contexts[idx] for idx in top_context_indices])

# Generate answers using T5 model for testing data
test_generated_answers = []
for i, context in enumerate(test_retrieved_contexts):
    input_text = f"question: {test_questions[i]} context: {' '.join(context)}"
    inputs = t5_tokenizer(input_text, return_tensors='pt')
    outputs = t5_model.generate(**inputs)
    generated_answer = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
    test_generated_answers.append(generated_answer)

# Evaluate accuracy on testing data
test_exact_match = accuracy_score(test_answers, test_generated_answers)
test_precision = precision_score(test_answers, test_generated_answers, average='weighted')
test_recall = recall_score(test_answers, test_generated_answers, average='weighted')
test_f1 = f1_score(test_answers, test_generated_answers, average='weighted')
test_conf_matrix = confusion_matrix(test_answers, test_generated_answers, labels=np.unique(test_answers))

print(f"Testing Exact Match: {test_exact_match}")
print(f"Testing Precision: {test_precision}")
print(f"Testing Recall: {test_recall}")
print(f"Testing F1 Score: {test_f1}")
print(f"Testing Confusion Matrix:\n{test_conf_matrix}")

# Visualization of Accuracy Metrics for Testing Data
metrics = ['Exact Match', 'Precision', 'Recall', 'F1 Score']
test_scores = [test_exact_match, test_precision, test_recall, test_f1]

plt.figure(figsize=(10, 6))
plt.bar(metrics, test_scores, color=['blue', 'green', 'red', 'purple'])
plt.ylim(0, 1)
plt.ylabel('Score')
plt.title('Accuracy Metrics for DPR and T5 Models on Testing Data')
plt.xticks(rotation=45)

for i, score in enumerate(test_scores):
    plt.text(i, score + 0.01, f'{score:.2f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Visualization of Confusion Matrix for Testing Data
plt.figure(figsize=(8, 6))
sns.heatmap(test_conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(test_answers), yticklabels=np.unique(test_answers))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix for Testing Data')
plt.show()


Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the

RuntimeError: The size of tensor a (1105) must match the size of tensor b (512) at non-singleton dimension 1