In [1]:
from dotenv import load_dotenv, dotenv_values
import os

# Load environment variables from .env file
config = dotenv_values("C:/Users/SACHENDRA/Documents/Activeloop/.env")
load_dotenv("C:/Users/SACHENDRA/Documents/Activeloop/.env")

True

In [25]:
import os
import re
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import DeepLake

class DeepLakeLoader:
    def __init__(self, source_data_path):
        self.source_data_path = source_data_path
        self.file_name = os.path.basename(source_data_path) # What we'll name our database 
        self.data = self.split_data()
        if False:
            self.db = self.load_db()
        else:
            self.db = self.create_db()
    def split_data(self):  
        """  
        Preprocess the data by splitting it into passages.  
    
        If using a different data source, this function will need to be modified.  
    
        Returns:  
            split_data (list): List of passages.  
        """  
        with open(self.source_data_path, 'r') as f:  
            content = f.read()  
        split_data = re.split(r'(?=\d+\. )', content)
        if split_data[0] == '':  
            split_data.pop(0)  
        split_data = [entry for entry in split_data if len(entry) >= 30]  
        return split_data

# Their is a method that Splits data called split_data()
# Since we know the structure of our knowledge base, we use this method to split it into individual entries, 
# each representing an example of a customer objection. When we run our similarity search using the detected customer objection.

# After preprocessing the data, we check if we’ve already created a database for this data.
# One of the great things about Deep Lake is that it provides us with persistent storage, so we only need to create the database once.
# If you restart the app, the database doesn’t disappear!

# Creating and loading the database is super easy: 

    def load_db(self):  
        """  
        Load the database if it already exists.  
    
        Returns:  
            DeepLake: DeepLake object.  
        """  
        return DeepLake(dataset_path=f'deeplake/{self.file_name}', embedding_function=OpenAIEmbeddings(), read_only=True)  

    def create_db(self):  
        """  
        Create the database if it does not already exist.  
    
        Databases are stored in the deeplake directory.  
    
        Returns:  
            DeepLake: DeepLake object.  
        """  
        return DeepLake.from_texts(self.data, OpenAIEmbeddings(), dataset_path=f'deeplake/{self.file_name}')

    def query_db(self, query):  
        """  
        Query the database for passages that are similar to the query.  
    
        Args:  
            query (str): Query string.  
    
        Returns:  
            content (list): List of passages that are similar to the query.  
        """  
        results = self.db.similarity_search(query, k=3)  
        content = []  
        for result in results:  
            content.append(result.page_content)  
        return content
        


Just like that, our knowledge base becomes a vector database that we can now query.

We don’t want the metadata to be passed to the LLM, so we take the results of our similarity search and pull just the content from them. And that’s it! We now have our custom knowledge base stored in a Deep Lake vector database and ready to be queried!

Connecting Our Database to GPT-4
Now, all we need to do is connect our LLM to the database. First, we need to create a DeepLakeLoader instance with the path to our data

In [26]:
db = DeepLakeLoader("salestesting.txt")

deeplake/salestesting.txt loaded successfully.


Evaluating ingest: 100%|█████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00

Dataset(path='deeplake/salestesting.txt', tensors=['embedding', 'ids', 'metadata', 'text'])

  tensor     htype     shape     dtype  compression
  -------   -------   -------   -------  ------- 
 embedding  generic  (1, 1536)  float32   None   
    ids      text     (1, 1)      str     None   
 metadata    json     (1, 1)      str     None   
   text      text     (1, 1)      str     None   





In [28]:
text = """
Objection: "There's no money."
It could be that your prospect's business simply isn't big enough or generating enough cash right now to afford a product like yours. Track their growth and see how you can help your prospect get to a place where your offering would fit into their business.

Objection: "We don't have any budget left this year."
A variation of the "no money" objection, what your prospect's telling you here is that they're having cash flow issues. But if there's a pressing problem, it needs to get solved eventually. Either help your prospect secure a budget from executives to buy now or arrange a follow-up call for when they expect funding to return.

Objection: "We need to use that budget somewhere else."
Prospects sometimes try to earmark resources for other uses. It's your job to make your product/service a priority that deserves budget allocation now. Share case studies of similar companies that have saved money, increased efficiency, or had a massive ROI with you.
"""

# Split the text into a list using the keyword "Objection: "
objections_list = text.split("Objection: ")[1:]  # We ignore the first split as it is empty

# Now, prepend "Objection: " to each item as splitting removed it
objections_list = ["Objection: " + objection for objection in objections_list]



In [29]:
detected_objection = "We need to use that budget somewhere else"

results = db.query_db(detected_objection)


In [35]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage

chat = ChatOpenAI()

system_message = SystemMessage(content= """Reminder: You're SalesCopilot.Your goal is to help the user in their sales call with the customer. 
Using conversation transcripts, you'll help create responses and guide the user (labeled You).
Keep your responses helpful, concise, and relevant to the conversation.  
The transcripts may be fragmented, incomplete, or even incorrect. Do not ask for clarification, do your best to understand what
the transcripts say based on context. Be sure of everything you say.
Keep responses concise and to the point. Starting now, answer the user's question based on the transcript:""")

human_message = HumanMessage(content=f'Customer objection: {detected_objection} | Relevant guidelines: {results}')

response = chat([system_message, human_message])

In [36]:
print(response.content)

Response to customer objection: "We need to use that budget somewhere else": 
Acknowledge the customer's perspective and emphasize the value and benefits your product/service can bring to their business. Share relevant case studies of companies that have achieved cost savings, increased efficiency, or significant ROI by using your offering. Show how prioritizing your solution now can lead to long-term benefits for their business.
