In [None]:
import os
from pinecone import Pinecone, ServerlessSpec
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
import numpy as np

pc = Pinecone(api_key="pcsk_2MT8co_AKcwXceTQng82iSZagaJMWa96bcx2bWgJ8wLwutPp7nmbKJYDBkz5gJxnu2PXZH")

index_name = "business-qa-index"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")

index = pc.Index(index_name)

def generate_vector(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model.roberta(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings.tolist()

predefined_qa = [
    {"question": "What services does Yardstick Software Solutions provide?", "answer": "Yardstick Software Solutions offers customized software solutions including app development, data analytics, and cloud services."},
    {"question": "What industries do you serve?", "answer": "We serve healthcare, retail, finance, logistics, education, and manufacturing sectors."},
    {"question": "Do you offer support after development?", "answer": "Yes, we provide ongoing support and maintenance for our solutions."},
    {"question": "What is the timeline for custom software development?", "answer": "The timeline depends on the project complexity, ranging from weeks to months."},
    {"question": "How can I get started with your services?", "answer": "To get started, contact us via our website, schedule a consultation, and we'll provide a proposal."},
    {"question": "Do you offer ERP solutions?", "answer": "Yes, we offer ERP solutions tailored to your business needs."},
    {"question": "What makes Yardstick Software Solutions different?", "answer": "What sets us apart is our client-focused approach, industry expertise, and commitment to delivering scalable, high-quality solutions."},
    {"question": "What are your cloud service offerings?", "answer": "We offer cloud services with platforms like AWS and Azure, ensuring scalability and secure infrastructure."},
    {"question": "Do you develop mobile apps?", "answer": "Yes, we develop mobile applications for both iOS and Android, tailored to your business requirements."},
    {"question": "How do you ensure software quality?", "answer": "We ensure software quality through rigorous testing (unit, integration, UAT) and agile development for iterative improvements."}
]

def index_data(corpus):
    for i, doc in enumerate(corpus):
        vector = generate_vector(doc["question"])
        assert isinstance(vector, list), "Vector should be a list"
        assert all(isinstance(val, float) for val in vector), "All values in the vector should be floats"
        index.upsert([(f"qa-{i}", vector, {"question": doc["question"], "answer": doc["answer"]})])

def get_answer(query):
    query_vector = generate_vector(query)
    result = index.query(vector=query_vector, top_k=1)

    if result["matches"]:
        doc_id = result["matches"][0]["id"]
        doc = predefined_qa[int(doc_id.split('-')[1])]
        return doc["answer"]
    else:
        return "Sorry, I couldn't find an answer to your question."

index_data(predefined_qa)

def is_greeting(user_input):
    greeting_keywords = ["hello", "hi", "hey", "greetings", "morning", "evening", "good day"]
    user_input_lower = user_input.lower()
    return any(greeting in user_input_lower for greeting in greeting_keywords)

def get_phone_number():
    while True:
        phone_number = input("Please enter your phone number: ")
        if phone_number.isdigit() and len(phone_number) == 10:
            print("Thank you for providing your phone number.")
            break
        else:
            print("Please enter a valid 10-digit phone number.")

def ask_question():
    print("Welcome to Yardstick Software Solutions.")
    while True:
        user_input = input("You are connected to a Bot Agent, Type Your Query! ")

        if user_input.lower() == 'exit':
            print("Goodbye!")
            break

        if is_greeting(user_input):
            print("Hello! How can I assist you today?")
            get_phone_number()
            continue

        print(f"\nYou asked: {user_input}")
        answer = get_answer(user_input)
        print(f"Answer: {answer}")

ask_question()


Welcome to Yardstick Software Solutions.
You are connected to a Bot Agent, Type Your Query! Hello
Hello! How can I assist you today?
Please enter your phone number: 8558074708
Thank you for providing your phone number.
You are connected to a Bot Agent, Type Your Query! What services does Yardstick Software Solutions provide?

You asked: What services does Yardstick Software Solutions provide?
Answer: Yardstick Software Solutions offers customized software solutions including app development, data analytics, and cloud services.
You are connected to a Bot Agent, Type Your Query! How can I get started with your services?

You asked: How can I get started with your services?
Answer: To get started, contact us via our website, schedule a consultation, and we'll provide a proposal.
You are connected to a Bot Agent, Type Your Query! How do you ensure software quality?

You asked: How do you ensure software quality?
Answer: We ensure software quality through rigorous testing (unit, integration

I Was tasked with integrating the OpenAI API for this project. However, upon attempting to use the API, I encountered access restrictions due to the paid nature of the service and was unable to obtain the necessary API key. Given these constraints, I proceeded with an alternative technique that involved utilizing Pinecone for vector indexing and Hugging Face's RoBERTa model for question answering. This approach allows me to generate vector embeddings for questions and store them in Pinecone, which is then queried to retrieve the most relevant answer.

By using this method, I was able to continue developing the Q&A bot while meeting the project’s core objectives without relying on paid API services. **bold text** **bold text**