In [22]:
!pip install openai==0.28.0 pinecone-client pandas



In [23]:
import pinecone
import openai
import os
import pandas as pd

In [24]:
pc=pinecone.Pinecone(api_key="")

In [25]:
index_name='business-docs'
indexes=[index['name'] for index in pc.list_indexes().indexes]
if index_name not in indexes:
    pc.create_index(name=index_name,spec=pinecone.ServerlessSpec(cloud='aws',region='us-east-1'),dimension=1536,metric='cosine')
else:
  print(f'Index already exists.')
pc.describe_index(index_name)


Index already exists.


{'deletion_protection': 'disabled',
 'dimension': 1536,
 'host': 'business-docs-gs93c9r.svc.aped-4627-b74a.pinecone.io',
 'metric': 'cosine',
 'name': 'business-docs',
 'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
 'status': {'ready': True, 'state': 'Ready'}}

In [26]:
# connecting to index
index= pc.Index('business-docs')

In [27]:
docs=pd.read_csv("business-docs.csv")
docs.head()

Unnamed: 0,id,text
0,1,Our all-natural skincare line is perfect for s...
1,2,The new electric vehicle offers enhanced range...
2,3,Discussed quarterly sales and performance metr...
3,4,Reviewed project timelines and set new milesto...
4,5,Reviewed marketing campaigns and their outcomes.


In [28]:
from openai import Embedding
openai.api_key=''

In [29]:

import time
from tenacity import retry, stop_after_attempt, wait_random_exponential
# attempt to bypass rate limit error using exponential backoff
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def embed_text_with_backoff(text):
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response['data'][0]['embedding']

# ******* Uncomment if need to load the business docs embeddings data to an index in pinecone ******

# for idx, row in docs.iterrows():
#     doc_embedding = embed_text_with_backoff(row['text'])
#     index.upsert([(str(idx), doc_embedding)])
#     time.sleep(5)


In [30]:
def retrieve_documents(query, top_k=3):
    # retrieve documents with highest similarity to query
    query_embedding = embed_text_with_backoff(query)
    # find embeddings with closest similarity to the query using cosine similarity metric
    result = index.query(vector=query_embedding, top_k=top_k)
    matched_docs = []
    for match in result['matches']:
        matched_docs.append((match['id'], match['score']))

    return matched_docs


In [31]:
def generate_answer(query, retrieved_docs):
    # Generate an answer using the retrieved documents and with the help of gpt-3.5 turbo
    relevant_texts = [docs.loc[int(doc_id), 'text'] for doc_id, score in retrieved_docs]
    context = "\n\n".join(relevant_texts)
    messages = [
        {"role": "user", "content": f"Answer the following question using the context below:\n\nContext: {context}\n\nQuestion: {query}\nAnswer:"}
    ]
    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo',
        messages=messages,
        max_tokens=200,
        temperature=0.7
    )

    return response['choices'][0]['message']['content'].strip()


In [32]:
def qa_bot(query):
    # Retrieve documents, generate answer
    retrieved_docs = retrieve_documents(query)
    print(retrieved_docs)
    answer = generate_answer(query, retrieved_docs)
    return answer

##***Example*** Queries

In [33]:
query = "What are the marketing trends in 2024?"
response = qa_bot(query)
print('Query:',query)
print('Response By QA Bot:',response)

[('57', 0.811000347), ('69', 0.811000347), ('75', 0.811000347)]
Query: What are the marketing trends in 2024?
Response By QA Bot: The marketing trends in 2024 are likely to focus heavily on digital marketing strategies to boost online sales. Businesses are expected to increase their efforts in areas such as social media marketing, influencer partnerships, personalized advertising, and data-driven campaigns to drive revenue through online channels. Additionally, the use of emerging technologies such as artificial intelligence and virtual reality may play a significant role in the marketing landscape in 2024.


In [34]:
query = "What sustainable practices are companies employing these days?"
response = qa_bot(query)
print('Query:',query)
print('Response By QA Bot:',response)

[('59', 0.863239586), ('50', 0.801693201), ('14', 0.801693201)]
Query: What sustainable practices are companies employing these days?
Response By QA Bot: Companies are employing sustainable practices such as reducing waste, using renewable energy sources, implementing recycling programs, and promoting eco-friendly products and packaging. Additionally, they are focusing on improving energy efficiency, reducing carbon emissions, and supporting environmental conservation efforts.


In [35]:
query = "List growth strategies used by Businesses in 2024 in bullet form."
response = qa_bot(query)
print('Query:',query)
print('Response By QA Bot:',response)

[('59', 0.805804372), ('49', 0.804618537), ('61', 0.804618537)]
Query: List growth strategies used by Businesses in 2024 in bullet form.
Response By QA Bot: - Focus on sustainability initiatives
- Appeal to eco-conscious consumers
- Develop action plans for entering new markets
- Implement green marketing strategies
- Partner with environmentally-friendly suppliers
- Offer eco-friendly products and services
- Invest in renewable energy sources
- Reduce carbon footprint
- Leverage technology for sustainable practices
- Collaborate with other eco-conscious businesses


In [36]:
query = "What are EV Vehicles used for?"
response = qa_bot(query)
print('Query:',query)
print('Response By QA Bot:',response)

[('62', 0.842327), ('1', 0.842327), ('68', 0.842327)]
Query: What are EV Vehicles used for?
Response By QA Bot: EV vehicles are used for transportation, offering enhanced range and quick-charging capabilities.
