In [27]:
#!pip install pinecone
#!pip install cohere

In [13]:
import cohere
import pinecone
from pinecone import Pinecone, ServerlessSpec

# Set up API keys
cohere_api_key = "Ur2O1JW9qQfbjO4c2yO0wGdwSdVdAXAQVn7FJ2Fk"
pinecone_api_key = "b7424a2d-a7fc-4be3-b1a6-fadeccc1953d"
pinecone_env = "us-east-1"

# Initialize Cohere
co = cohere.Client(cohere_api_key)

# Initialize Pinecone
pc = Pinecone(api_key=pinecone_api_key)

# Define index parameters
index_name = 'qa-chatbot'
dimension = 4096  # Ensure this matches your Cohere embedding dimension
metric = 'cosine'

# Check if the index already exists
if index_name not in pc.list_indexes().names():
    # Create the index
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric=metric,
        spec=ServerlessSpec(
            cloud='aws',
            region=pinecone_env
        )
    )
    print(f"Index '{index_name}' created.")
else:
    print(f"Index '{index_name}' already exists.")

# Connect to the index
index = pc.Index(index_name)

# Prepare data for indexing
general_documents = [
    "Our HR policies include flexible working hours for all employees.",
    "The IT department is responsible for hardware and software maintenance.",
    "We offer health insurance to all full-time employees.",
    "The marketing team handles social media, advertising, and customer engagement.",
    "The company offers a 30-day return policy for all electronic items.",
    "Our finance department manages budgeting, accounting, and payroll.",
    "The customer service team provides support through phone, email, and chat.",
    "We have a dedicated legal team to handle contracts and compliance issues.",
    "Our procurement department sources materials and negotiates supplier contracts.",
    "The R&D team focuses on product innovation and development.",
    "We conduct annual performance reviews to assess employee progress and development.",
    "The company offers professional development opportunities and training programs.",
    "We have a robust data security policy to protect company and customer information.",
    "Our sales team is responsible for driving revenue through client acquisition and retention.",
    "The logistics department manages inventory, warehousing, and distribution.",
    "We have a corporate social responsibility program that supports community initiatives.",
    "The business development team identifies new market opportunities and partnerships.",
    "Our operations team ensures the efficiency of day-to-day business processes.",
    "We offer employee wellness programs, including fitness memberships and mental health resources.",
    "The administrative staff handles office management, scheduling, and administrative support.",
    "We follow a strict confidentiality policy for handling sensitive company information.",
    "The project management office oversees project planning, execution, and delivery.",
    "We have a customer feedback system to continuously improve our products and services.",
    "The quality assurance team ensures that all products meet our quality standards.",
    "Our supply chain management team coordinates the flow of goods from suppliers to customers.",
    "We have a business continuity plan to ensure operations can continue during disruptions.",
    "The risk management team identifies and mitigates potential business risks.",
    "Our IT security team implements measures to protect against cyber threats and data breaches.",
    "We offer a range of employee benefits, including retirement plans and bonuses.",
    "The legal department ensures compliance with industry regulations and laws.",
    "We conduct market research to understand customer needs and industry trends.",
    "The product management team oversees product development and lifecycle management.",
    "We provide training programs for employees to enhance their skills and knowledge.",
    "The business strategy team develops and implements long-term business goals and plans.",
    "Our customer support team handles inquiries, complaints, and service requests.",
    "We maintain partnerships with key stakeholders and industry organizations.",
    "The IT infrastructure team manages the company's network, servers, and hardware.",
    "We have a protocol for managing and responding to customer complaints and issues.",
    "Our data analytics team provides insights to support decision-making and strategy.",
    "We have a code of conduct outlining ethical standards and behavior expectations."
]
queries_and_responses = [
    ("Hello", "Hi there! How can I help you today?"),
    ("How can I start using the service?", "To start using the service, follow these steps: ..."),
    # Add more queries and responses as needed
]

documents = general_documents + [q for q, _ in queries_and_responses] + [r for _, r in queries_and_responses]

chunk_size = 1536  # Adjust as necessary

# Function to chunk large documents into smaller ones
def chunk_text(text, chunk_size=1536):
    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

chunks = []
for document in documents:
    chunks.extend(chunk_text(document, chunk_size))

# Generate the embeddings
response = co.embed(texts=chunks, model="embed-english-v2.0")
print(f"Number of embeddings: {len(response.embeddings)}")

# Print vectors for debugging
for vector in response.embeddings[:5]:  # Print first 5 embeddings for verification
    print(vector)

metadata = [{"text": chunk} for chunk in chunks]

# Create vectors list with metadata
vectors = [(f"doc_{i}", response.embeddings[i], metadata[i]) for i in range(len(chunks))]

# Upsert vectors into Pinecone index
try:
    upsert_response = index.upsert(vectors=vectors)
    print("Upsert response:", upsert_response)
except Exception as e:
    print("Error during upsert:", e)

# Verify index status
info = index.describe_index_stats()
print("Index status:", info)

# Define search query function
def search_query(query, top_k=3):
    # Generate the query embedding
    query_embedding = co.embed(texts=[query], model="embed-english-v2.0").embeddings[0]
    
    # Perform the query on Pinecone index
    try:
        result = index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
        
        # Print the result for debugging
        print("Query result:", result)
        
        # Extract and return relevant results
        matches = result['matches']
        results = [(match['id'], match['score'], match.get('metadata', {}).get('text', 'No text available')) for match in matches]
        return results
    except Exception as e:
        print("Error during query:", e)
        return []

# Generate a response based on the retrieved text
def generate_response(query, retrieved_texts):
    # Combine retrieved texts into a single context
    context = " ".join(text for _, _, text in retrieved_texts)
    
    # Generate response using Cohere
    try:
        response = co.generate(
            prompt=f"Based on the following information, answer the question: {query}\n\n{context}",
            model="command-xlarge-nightly"  # Update to a valid model ID
        )
        return response.generations[0].text.strip()
    except Exception as e:
        print("Error during response generation:", e)
        return "Sorry, I couldn't generate a response at this time."

# Example query
query = "What is the HR policy for working hours?"
retrieved_texts = search_query(query)
response_text = generate_response(query, retrieved_texts)

# Print results
print("Chatbot Response:", response_text)


Index 'qa-chatbot' created.
Number of embeddings: 44
[-2.0859375, 0.32348633, 1.9306641, -2.0292969, 0.14501953, 0.21020508, 0.01184845, -1.3164062, -0.31933594, -1.1884766, 0.6503906, -0.17736816, -1.9912109, 1.7158203, 0.0463562, -1.7333984, -0.11816406, -0.38989258, -0.2932129, -2.0390625, 0.8964844, -0.40771484, 1.7353516, 1.109375, 2.8945312, 0.8432617, 0.34204102, -2.7050781, -0.88427734, 1.3427734, 0.001953125, 1.9873047, -1.3037109, -0.6508789, -1.2978516, 0.14074707, -1.1962891, 0.63427734, 0.35351562, 2.8144531, 0.9033203, 0.9326172, 0.50146484, -1.5791016, 0.6538086, -0.65283203, -0.9760742, 0.23510742, -1.4833984, -1.5654297, 1.7871094, -2.0703125, -1.4423828, -1.7148438, -1.2402344, 0.09161377, -1.5839844, 2.2167969, 0.04385376, -1.5478516, 0.3461914, 0.66845703, 1.7724609, -1.1289062, -0.9267578, -2.1269531, 1.8369141, -3.3125, -2.6757812, 1.0097656, 1.8623047, 0.5708008, 1.4716797, 0.34179688, 1.5693359, -0.6713867, 3.7578125, -0.56396484, -0.30786133, 0.5839844, -0.4345