<a href="https://colab.research.google.com/github/mathu3004/Pearl_Path/blob/Chatbot/Chatbot_code_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install pymongo
!pip install requests
!pip install transformers
!pip install torch
!pip install google-genai
!pip install scikit-learn
!pip install matplotlib
!pip install seaborn
!pip install nltk
!pip install rouge

Collecting pymongo
  Downloading pymongo-4.11.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.11.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dnspython-2.7.0-py3-none-any.whl (313 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.6/313.6 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dnspython, pymongo
Successfully installed dnspython-2.7.0 pymongo-4.11.2
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4

In [4]:
import re
import os
from pymongo import MongoClient
from google import genai
from google.genai import types
import requests
from transformers import DPRQuestionEncoder, DPRContextEncoder, DPRQuestionEncoderTokenizer, DPRContextEncoderTokenizer
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge import Rouge

# Initialize MongoDB client
client = MongoClient('mongodb+srv://Pearlpath:DMEN2425@pearlpath.lq9jq.mongodb.net/?retryWrites=true&w=majority&appName=PearlPath')
plan_db = client['itineraries']
hotel_db = client['Hotels']
attraction_db = client['Attractions']
restaurant_db = client['Restaurants']

# Initialize DPR models and tokenizers
question_encoder = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
context_encoder = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
context_tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')

# Initialize T5 model and tokenizer
t5_model = T5ForConditionalGeneration.from_pretrained('t5-base')
t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')

def greet_user():
    return "Hello! Welcome to the trip itinerary chatbot. Please provide your unique user ID to get started."

def get_user_id():
    user_id = input("Please enter your unique user ID: ")
    return user_id

def detect_keywords(user_input):
    keywords = {
        'action': None,
        'item_type': None,
        'similarity': False,
        'info_request': False,
        'emergency': False,
        'display_itinerary': False
    }
    if 'remove' in user_input.lower():
        keywords['action'] = 'remove'
    if 'add' in user_input.lower():
        keywords['action'] = 'add'
    if 'hotel' in user_input.lower():
        keywords['item_type'] = 'hotel'
    if 'attraction' in user_input.lower():
        keywords['item_type'] = 'attraction'
    if 'restaurant' in user_input.lower():
        keywords['item_type'] = 'restaurant'
    if 'similar' in user_input.lower():
        keywords['similarity'] = True
    if any(keyword in user_input.lower() for keyword in ['contact', 'email', 'phone', 'website', 'information']):
        keywords['info_request'] = True
    if any(keyword in user_input.lower() for keyword in ['emergency', 'hospital', 'police', 'embassy', 'cpr']):
        keywords['emergency'] = True
    if 'display' in user_input.lower() or 'show' in user_input.lower():
        keywords['display_itinerary'] = True
    return keywords

def retrieve_itinerary(user_id, plan_id):
    try:
        itinerary = plan_db.find_one({'user_id': user_id, 'plan_id': plan_id})
        if not itinerary:
            return None
        return itinerary
    except Exception as e:
        return f"Error retrieving itinerary: {e}"

def find_similar_item(item_type, current_item):
    try:
        if item_type == 'hotel':
            similar_items = hotel_db.find({'location': current_item['location'], 'price_range': current_item['price_range']})
        elif item_type == 'attraction':
            similar_items = attraction_db.find({'location': current_item['location'], 'type': current_item['type']})
        elif item_type == 'restaurant':
            similar_items = restaurant_db.find({'location': current_item['location'], 'cuisine': current_item['cuisine']})
        return list(similar_items)
    except Exception as e:
        return f"Error finding similar item: {e}"

def update_itinerary(user_id, plan_id, action, item_type, similar_item=None):
    try:
        itinerary = retrieve_itinerary(user_id, plan_id)
        if not itinerary:
            return "No itinerary found for the given user ID and plan ID."

        if action == 'remove':
            if item_type in itinerary:
                del itinerary[item_type]
                plan_db.update_one({'user_id': user_id, 'plan_id': plan_id}, {'$set': itinerary})
                return f"{item_type.capitalize()} removed from your itinerary."
            else:
                return f"No {item_type} found in your itinerary."

        if action == 'add' and similar_item:
            itinerary[item_type] = similar_item
            plan_db.update_one({'user_id': user_id, 'plan_id': plan_id}, {'$set': itinerary})
            return f"Similar {item_type} added to your itinerary."

        return "Unable to update itinerary."
    except Exception as e:
        return f"Error updating itinerary: {e}"

def search_database(item_type, item_name):
    try:
        if item_type == 'hotel':
            return hotel_db.find_one({'name': item_name})
        elif item_type == 'attraction':
            return attraction_db.find_one({'name': item_name})
        elif item_type == 'restaurant':
            return restaurant_db.find_one({'name': item_name})
        return None
    except Exception as e:
        return f"Error searching database: {e}"

def web_search(query):
    try:
        # Perform a web search using the Gemini API
        client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
        model = "gemini-2.0-flash"
        contents = [
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=query),
                ],
            ),
        ]
        generate_content_config = types.GenerateContentConfig(
            temperature=1,
            top_p=0.95,
            top_k=40,
            max_output_tokens=8192,
            response_mime_type="text/plain",
        )

        response = ""
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=generate_content_config,
        ):
            response += chunk.text

        return response
    except Exception as e:
        return f"Error performing web search: {e}"

def handle_info_request(item_type, item_name):
    try:
        item = search_database(item_type, item_name)
        if item:
            return f"Contact information for {item_name}: Email: {item.get('email')}, Phone: {item.get('phone')}, Website: {item.get('website')}"
        else:
            query = f"contact information for {item_name} {item_type}"
            return web_search(query)
    except Exception as e:
        return f"Error handling information request: {e}"

def handle_emergency_request(user_input):
    try:
        location = get_user_location()
        if not location:
            return "Unable to determine your location. Please try again later."

        if 'hospital' in user_input.lower():
            query = f"nearest hospital in {location}"
        elif 'police' in user_input.lower():
            query = f"nearest police station in {location}"
        elif 'embassy' in user_input.lower():
            query = f"nearest embassy in {location}"
        elif 'cpr' in user_input.lower():
            query = "how to perform CPR"
        else:
            return "Please specify the type of emergency assistance you need."

        return web_search(query)
    except Exception as e:
        return f"Error handling emergency request: {e}"

def get_user_location():
    try:
        # Placeholder for actual location retrieval logic
        response = requests.get('https://api.ipgeolocation.io/ipgeo', params={'apiKey': 'your_actual_location_api_key_here'})
        data = response.json()
        return f"{data['city']}, {data['country_name']}"
    except Exception as e:
        return f"Error retrieving location: {e}"

def retrieve_documents(query, documents):
    # Encode the query and documents using DPR
    query_embedding = question_encoder(**question_tokenizer(query, return_tensors='pt'))[0]
    document_embeddings = context_encoder(**context_tokenizer(documents, return_tensors='pt', padding=True, truncation=True))[0]

    # Compute similarity scores
    similarity_scores = torch.matmul(query_embedding, document_embeddings.T)
    k = min(5, len(documents))  # Ensure k does not exceed the number of documents
    top_documents = similarity_scores.topk(k).indices

    return [documents[i] for i in top_documents]

def generate_response(query, retrieved_documents):
    # Concatenate the query and retrieved documents
    input_text = query + " ".join(retrieved_documents)
    input_ids = t5_tokenizer.encode(input_text, return_tensors='pt')

    # Generate the response using T5
    output_ids = t5_model.generate(input_ids)
    response = t5_tokenizer.decode(output_ids[0], skip_special_tokens=True)

    return response

def handle_user_request(user_input, user_id, plan_id):
    try:
        keywords = detect_keywords(user_input)
        itinerary = retrieve_itinerary(user_id, plan_id)

        if not itinerary:
            return "No itinerary found for the given user ID and plan ID."

        if keywords['display_itinerary']:
            return f"Your itinerary: {itinerary}"

        if keywords['action'] == 'remove':
            return update_itinerary(user_id, plan_id, 'remove', keywords['item_type'])

        if keywords['action'] == 'add' and keywords['similarity']:
            current_item = itinerary.get(keywords['item_type'])
            if current_item:
                similar_items = find_similar_item(keywords['item_type'], current_item)
                if similar_items:
                    similar_item = similar_items[0]  # For simplicity, taking the first similar item
                    return update_itinerary(user_id, plan_id, 'add', keywords['item_type'], similar_item)
                else:
                    return f"No similar {keywords['item_type']} found."
            else:
                return f"No current {keywords['item_type']} found in your itinerary."

        if keywords['info_request']:
            item_name = re.search(r'for (.+?) (hotel|attraction|restaurant)', user_input.lower())
            if item_name:
                item_name = item_name.group(1)
                item_type = item_name.group(2)
                return handle_info_request(item_type, item_name)
            else:
                return "Please specify the name of the hotel, attraction, or restaurant you are looking for."

        if keywords['emergency']:
            return handle_emergency_request(user_input)

        return "Unable to process your request."
    except Exception as e:
        return f"Error handling user request: {e}"

def evaluate_retrieval(query, relevant_docs, retrieved_docs, k=5):
    # Calculate Precision@k, Recall@k, and F1 Score
    relevant_set = set(relevant_docs)
    retrieved_set = set(retrieved_docs[:k])

    precision = len(relevant_set.intersection(retrieved_set)) / len(retrieved_set)
    recall = len(relevant_set.intersection(retrieved_set)) / len(relevant_set)
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return precision, recall, f1

def evaluate_generation(reference, generated):
    # Calculate BLEU Score
    smooth = SmoothingFunction().method4
    bleu_score = sentence_bleu([reference.split()], generated.split(), smoothing_function=smooth)

    # Calculate ROUGE Score
    rouge = Rouge()
    rouge_scores = rouge.get_scores(generated, reference)[0]

    return bleu_score, rouge_scores

def visualize_metrics(precision, recall, f1, bleu, rouge):
    # Plot Precision, Recall, and F1 Score
    metrics = {'Precision': precision, 'Recall': recall, 'F1 Score': f1}
    sns.barplot(x=list(metrics.keys()), y=list(metrics.values()))
    plt.title('Retrieval Metrics')
    plt.show()

    # Plot BLEU and ROUGE Scores
    generation_metrics = {'BLEU Score': bleu, 'ROUGE-1': rouge['rouge-1']['f'], 'ROUGE-2': rouge['rouge-2']['f'], 'ROUGE-L': rouge['rouge-l']['f']}
    sns.barplot(x=list(generation_metrics.keys()), y=list(generation_metrics.values()))
    plt.title('Generation Metrics')
    plt.show()

def main():
    print(greet_user())
    user_id = get_user_id()
    plan_id = input("Please enter your plan ID: ")

    while True:
        user_input = input("How can I assist you with your itinerary? ")
        response = handle_user_request(user_input, user_id, plan_id)
        print(response)

        # Example evaluation (replace with actual data)
        relevant_docs = ["doc1", "doc2", "doc3"]
        retrieved_docs = retrieve_documents(user_input, relevant_docs)
        precision, recall, f1 = evaluate_retrieval(user_input, relevant_docs, retrieved_docs)

        reference = "This is a reference response."
        bleu, rouge = evaluate_generation(reference, response)

        visualize_metrics(precision, recall, f1, bleu, rouge)

if __name__ == "__main__":
    main()


Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the

Hello! Welcome to the trip itinerary chatbot. Please provide your unique user ID to get started.
Please enter your unique user ID: 123
Please enter your plan ID: 123
How can I assist you with your itinerary? how to do cpr


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Error performing web search: Missing key inputs argument! To use the Google AI API,provide (`api_key`) arguments. To use the Google Cloud API, provide (`vertexai`, `project` & `location`) arguments.


TypeError: only integer tensors of a single element can be converted to an index