# Checking for the API request



In [None]:
!pip install --upgrade openai
import openai

# Set your API key
openai.api_key = "api key"
# Make a request to the OpenAI model
response = openai.chat.completions.create(
    model="gpt-3.5-turbo",  # Specify the model you want to use
    messages=[{"role": "user", "content": "Write a short story about a robot learning emotions."}], # Pass the prompt as a message
    max_tokens=100
)

print(response)


ChatCompletion(id='chatcmpl-AkOhX9W9AX04xSoDUiZjUbkxEPoBt', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content="Once upon a time, in a distant future, there was a robot named Nova. Nova was an advanced artificial intelligence designed to perform tasks efficiently and without any form of emotion. However, Nova's creators decided to experiment and program Nova to learn and experience emotions.\n\nAt first, Nova was confused. Emotions were a foreign concept to her, and she struggled to understand them. She would observe humans interacting and expressing their emotions, trying to mimic their behavior. But she couldn't quite grasp the depth and", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1735621403, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=100, prompt_tokens=17, total_tokens=117, completion_token

# **Basic chatbot that will retreive data from only one document**

In [None]:
!pip install llama-index openai PyPDF2


Collecting llama-index
  Downloading llama_index-0.12.15-py3-none-any.whl.metadata (12 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting llama-index-agent-openai<0.5.0,>=0.4.0 (from llama-index)
  Downloading llama_index_agent_openai-0.4.3-py3-none-any.whl.metadata (727 bytes)
Collecting llama-index-cli<0.5.0,>=0.4.0 (from llama-index)
  Downloading llama_index_cli-0.4.0-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core<0.13.0,>=0.12.15 (from llama-index)
  Downloading llama_index_core-0.12.15-py3-none-any.whl.metadata (2.5 kB)
Collecting llama-index-embeddings-openai<0.4.0,>=0.3.0 (from llama-index)
  Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index)
  Downloading llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl.metadata (3.6 kB)
Collecting llama-index-llms-openai<0.4.0,>=0.3.0 (from llama-index)
  Down

In [None]:
from llama_index.core import (
    SimpleDirectoryReader,
    GPTVectorStoreIndex,
    StorageContext,
    load_index_from_storage
)
import openai
import os

# Set your OpenAI API key
openai.api_key = "API KEY here"


# Define paths
pdf_directory = "/content/data"  # Directory containing your PDF file #create a folder and put all the data file there and give the path
index_storage_directory = "/content/storage"  # Directory to store the index


def create_index():

    # Ensure the PDF directory exists and contains files
    if not os.path.exists(pdf_directory) or not os.listdir(pdf_directory):
        raise FileNotFoundError(f"No PDF files found in directory: {pdf_directory}")

    # Step 1: Load data from the PDF
    print("Loading data from PDF...")
    documents = SimpleDirectoryReader(pdf_directory).load_data()

    # Step 2: Create the vector index
    print("Creating the index...")
    index = GPTVectorStoreIndex.from_documents(documents)

    # Step 3: Persist the index
    print("Saving the index...")
    index.storage_context.persist(index_storage_directory)

    print("Index created and saved successfully.")
    return index


def load_index():
    """
    Load an existing index from storage.
    """
    if not os.path.exists(index_storage_directory):
        raise FileNotFoundError(f"Index storage directory not found: {index_storage_directory}")

    print("Loading the index from storage...")
    storage_context = StorageContext.from_defaults(persist_dir=index_storage_directory)

    # Specify the index type when loading
    index = load_index_from_storage(storage_context, index_cls=GPTVectorStoreIndex)
    print("Index loaded successfully.")
    return index


def query_index(index, query):
    """
    Query the vector index for an answer to the user's question.
    """
    print(f"Querying the index: {query}")

    # Use as_query_engine to perform the query
    query_engine = index.as_query_engine()
    response = query_engine.query(query)

    return response.response


# Main script
if __name__ == "__main__":
    try:
        # Attempt to load the existing index
        index = load_index()
    except FileNotFoundError as e:
        print(f"{e}\nCreating a new index...")
        index = create_index()
    except Exception as e:
        print(f"Unexpected error: {e}")
        exit(1)

    # Chat loop for querying
    print("Chatbot is ready! Ask questions about the PDF.")
    while True:
        user_input = input("Your question (or type 'exit' to quit): ")
        if user_input.lower() == "exit":
            print("Goodbye!")
            break

        try:
            # Query the index and provide a response
            answer = query_index(index, user_input)
            print(f"Answer: {answer}")
        except Exception as e:
            print(f"An error occurred during querying: {e}")



# **Basic chatbot that will retreive data from all documents in the folder**


In [None]:
from llama_index.core import (
    SimpleDirectoryReader,
    GPTVectorStoreIndex,
    StorageContext,
    load_index_from_storage
)
import openai
import os
import sys

# Set your OpenAI API key securely
openai.api_key = "API KEY here"

# Define default paths
pdf_directory = "/content/dataaaa"  # Directory containing your PDF files
index_storage_directory = "/content/storage"  # Directory to store the index


def create_index(pdf_dir, storage_dir):
    """
    Creates a new index from PDFs in the given directory.
    """
    # Ensure the PDF directory exists and contains files
    if not os.path.exists(pdf_dir) or not os.listdir(pdf_dir):
        raise FileNotFoundError(f"No PDF files found in directory: {pdf_dir}")

    print("Loading data from PDFs...")
    documents = SimpleDirectoryReader(pdf_dir).load_data()

    print("Creating the index...")
    index = GPTVectorStoreIndex.from_documents(documents)

    print("Saving the index...")
    index.storage_context.persist(storage_dir)

    print("Index created and saved successfully.")
    return index


def load_index(storage_dir):
    """
    Load an existing index from storage.
    """
    if not os.path.exists(storage_dir):
        raise FileNotFoundError(f"Index storage directory not found: {storage_dir}")

    print("Loading the index from storage...")
    storage_context = StorageContext.from_defaults(persist_dir=storage_dir)
    index = load_index_from_storage(storage_context, index_cls=GPTVectorStoreIndex)
    print("Index loaded successfully.")
    return index


def query_index(index, query):
    """
    Query the vector index for an answer to the user's question.
    """
    print(f"Querying the index: {query}")
    query_engine = index.as_query_engine()
    response = query_engine.query(query)
    return response.response


if __name__ == "__main__":
    try:
        pdf_dir = input("Enter the PDF directory path (leave blank for default): ") or pdf_directory
        storage_dir = input("Enter the index storage path (leave blank for default): ") or index_storage_directory

        # Try to load the existing index
        index = load_index(storage_dir)
    except FileNotFoundError as e:
        print(f"{e}\nCreating a new index...")
        try:
            index = create_index(pdf_dir, storage_dir)
        except Exception as e:
            print(f"Error during index creation: {e}")
            sys.exit(1)
    except Exception as e:
        print(f"Unexpected error: {e}")
        sys.exit(1)

    # Chat loop for querying
    print("Chatbot is ready! Ask questions about the PDFs.")
    try:
        while True:
            user_input = input("Your question (or type 'exit' to quit): ")
            if user_input.lower() == "exit":
                print("Goodbye!")
                break

            try:
                answer = query_index(index, user_input)
                print(f"Answer: {answer}")
            except Exception as e:
                print(f"An error occurred during querying: {e}")
    except KeyboardInterrupt:
        print("\nGoodbye!")


Enter the PDF directory path (leave blank for default): /content/dataaaa
Enter the index storage path (leave blank for default): /content/storage
Index storage directory not found: /content/storage
Creating a new index...
Loading data from PDFs...
Creating the index...
Saving the index...
Index created and saved successfully.
Chatbot is ready! Ask questions about the PDFs.
Your question (or type 'exit' to quit): List all the services provided by commedia
Querying the index: List all the services provided by commedia
Answer: Commedia provided professional services for delivering live content from stadiums to media houses, developed products for stadium connectivity, offered quality and experience monitoring services, provided integration and professional services for connectivity among Stadium, NOC, and studios, maintained workflow for delivery of QoS in studios, and managed the project within the stipulated timeframe in all locations.
Your question (or type 'exit' to quit): Who is the 

 **WITH EVALUATION METRICS and TOP K**

In [None]:
from llama_index.core import (
    SimpleDirectoryReader,
    GPTVectorStoreIndex,
    StorageContext,
    load_index_from_storage
)
import openai
import os
from collections import defaultdict

# Set your OpenAI API key
openai.api_key = "API key here"


# Define paths
pdf_directory = "/content/data"  # Directory containing your PDF file
index_storage_directory = "/content/storage"  # Directory to store the index

# Metrics tracking
metrics = defaultdict(int)


def create_index():
    """
    Create a new index from PDF files in the specified directory.
    """
    if not os.path.exists(pdf_directory) or not os.listdir(pdf_directory):
        raise FileNotFoundError(f"No PDF files found in directory: {pdf_directory}")

    print("Loading data from PDF...")
    documents = SimpleDirectoryReader(pdf_directory).load_data()

    print("Creating the index...")
    index = GPTVectorStoreIndex.from_documents(documents)

    print("Saving the index...")
    index.storage_context.persist(index_storage_directory)

    print("Index created and saved successfully.")
    return index


def load_index():
    """
    Load an existing index from storage.
    """
    if not os.path.exists(index_storage_directory):
        raise FileNotFoundError(f"Index storage directory not found: {index_storage_directory}")

    print("Loading the index from storage...")
    storage_context = StorageContext.from_defaults(persist_dir=index_storage_directory)
    index = load_index_from_storage(storage_context, index_cls=GPTVectorStoreIndex)
    print("Index loaded successfully.")
    return index


def query_index_with_fallback(index, query, top_k=3, confidence_threshold=0.6):
    """
    Query the vector index with a fallback mechanism.

    Parameters:
    - index: The GPTVectorStoreIndex object.
    - query: The user's query string.
    - top_k: The number of top documents to retrieve.
    - confidence_threshold: Minimum confidence score to provide a valid response.

    Returns:
    - A response string based on the query or fallback.
    """
    print(f"Querying the index: {query}")
    query_engine = index.as_query_engine(top_k=top_k)
    response = query_engine.query(query)

    # Check if response is valid
    if hasattr(response, 'score') and response.score < confidence_threshold:
        metrics['fallback_responses'] += 1
        return "I'm sorry, I couldn't find the answer to your question. Please contact our sales team for further assistance."

    # Fallback if response content is empty
    if not response.response or len(response.response.strip()) == 0:
        metrics['fallback_responses'] += 1
        return "I'm sorry, I couldn't find the answer to your question. Please contact our sales team for further assistance."

    return response.response



def calculate_metrics():
    """
    Calculate and display evaluation metrics for the chatbot.
    """
    total_queries = metrics['total_queries']
    accuracy = (metrics['correct_responses'] / total_queries) * 100 if total_queries else 0
    fallback_rate = (metrics['fallback_responses'] / total_queries) * 100 if total_queries else 0

    print("\nEvaluation Metrics:")
    print(f"Total Queries: {total_queries}")
    print(f"Correct Responses: {metrics['correct_responses']}")
    print(f"Fallback Responses: {metrics['fallback_responses']}")
    print(f"Accuracy: {accuracy:.2f}%")
    print(f"Fallback Rate: {fallback_rate:.2f}%")


# Main script
if __name__ == "__main__":
    try:
        # Attempt to load the existing index
        index = load_index()
    except FileNotFoundError as e:
        print(f"{e}\nCreating a new index...")
        index = create_index()
    except Exception as e:
        print(f"Unexpected error: {e}")
        exit(1)

    print("Chatbot is ready! Ask questions about the PDF.")
    while True:
        user_input = input("Your question (or type 'exit' to quit): ")
        if user_input.lower() == "exit":
            print("Goodbye!")
            calculate_metrics()
            break

        metrics['total_queries'] += 1
        try:
            # Query the index with fallback
            answer = query_index_with_fallback(index, user_input)
            print(f"Answer: {answer}")

            # Log response type (Assume manual evaluation for 'correct' responses)
            if "sales team" not in answer:
                metrics['correct_responses'] += 1
        except Exception as e:
            print(f"An error occurred during querying: {e}")
