In [4]:
from dotenv import load_dotenv
import os

from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document

from langchain_community.document_loaders import PyPDFLoader

from langchain_community.embeddings import OctoAIEmbeddings
from langchain_community.vectorstores import Milvus

import pandas as pd

# Set the maximum number of rows and columns to display
pd.set_option('display.max_rows', None)  # None means show all rows
pd.set_option('display.max_columns', None)  # None means show all columns
# Adjust the overall width for displaying DataFrames in the notebook
pd.set_option('display.width', 1000)


# Optionally, set the maximum width of each column to avoid truncation
pd.set_option('display.max_colwidth', None)  # None means no truncation


load_dotenv()
# print(os.environ["OCTOAI_API_TOKEN"])
os.environ["OCTOAI_API_TOKEN"] = os.getenv("OCTOAI_API_TOKEN", "default_value")

In [5]:
embeddings = OctoAIEmbeddings(
    endpoint_url="https://text.octoai.run/v1/embeddings")

In [7]:
pdf_file = "data/TheNewStrategicSelling.pdf"
pdf_loader = PyPDFLoader(pdf_file)
data = pdf_loader.load()

data_df = pd.DataFrame(data)
data_df.describe()

Unnamed: 0,0,1,2
count,334,334,334
unique,333,334,1
top,"(page_content, )","(metadata, {'source': 'data/TheNewStrategicSelling.pdf', 'page': 333})","(type, Document)"
freq,2,1,334


In [8]:
data_df.head(5)

Unnamed: 0,0,1,2
0,"(page_content, )","(metadata, {'source': 'data/TheNewStrategicSelling.pdf', 'page': 0})","(type, Document)"
1,"(page_content, STRATEGIC\tSELLING®\tis\ta\tService\tMark\tof\tMiller\tHeiman,\tInc.,\t1595\nMeadow\tWood\tLane,\tSuite\t2,\tReno,\tNV\t89502\nAll\trights\treserved.\tWithout\tthe\twritten\tauthorization\tof\tMiller\tHeiman,\tInc.,\tno\npart\tof\tthis\tbook,\twhether\ttext,\tdiagram,\tchart\tor\tother\tillustration,\tand\twhether\nor\tnot\tit\tcarries\ta\tfurther\tcopyright\tnotice,\tmay\tbe\t(i)\treproduced\tin\tany\tform\tor\nby\tany\tmeans,\telectronic\tor\tmechanical,\tincluding\tphotocopying,\trecording,\tor\nby\tany\tinformation\tstorage\tand\tretrieval\tsystem;\tor\t(ii)\tutilized\tfor\tany\tbusiness\npurpose\tincluding,\tbut\tnot\tlimited\tto,\tconducting\tany\ttraining\tseminar.\nGrand\tCentral\tPublishing\tEdition\nCopyright\t©\t1998\tby\tMiller\tHeiman,\tInc.\nCopyright\t©\t1995\tby\tMiller\tHeiman\t&\tAssociates,\tInc.\nCharts\tcopyright\t©\t1998\tby\tMiller\tHeiman\t&\tAssociates,\tInc.\nAll\trights\treserved.\tExcept\tas\tpermitted\tunder\tthe\tU.S.\tCopyright\tAct\tof\t1976,\nno\tpart\tof\tthis\tpublication\tmay\tbe\treproduced,\tdistributed,\tor\ttransmitted\tin\tany\nform\tor\tby\tany\tmeans,\tor\tstored\tin\ta\tdatabase\tor\tretrieval\tsystem,\twithout\tthe\nprior\twritten\tpermission\tof\tthe\tpublisher.\nThis\tGrand\tCentral\tPublishing\tedition\tis\tpublished\tby\tarrangement\twith\tWilliam\nMorrow\tand\tCompany,\t1350\tAvenue\tof\tthe\tAmericas,\tNew\tYork,\tNY\t10019\nBusiness\tPlus\nHachette\tBook\tGroup\n237\tPark\tAvenue\nNew\tYork,\tNY\t10017\nVisit\tour\tWeb\tsite\tat\t\nwww.HachetteBookGroup.com\n.\nThe\tBusiness\tPlus\tname\tand\tlogo\tis\ta\ttrademark\tof\tHachette\tBook\tGroup,\tInc.\nFirst\teBook\tEdition:\tMarch\t1986\nISBN:\t978-0-446-54878-6)","(metadata, {'source': 'data/TheNewStrategicSelling.pdf', 'page': 1})","(type, Document)"
2,"(page_content, Contents\n\t\nPREFACE\n\t\nFOREWORD\n\t\nIF\tIT\tAIN’T\tBROKE:\tTHE\t“WHY”\tBEHIND\tTHE\tNEW\tSTRATEGIC\nSELLING\n\t\nPART\t1:\tSTRATEGIC\tSELLING\n\t\nCHAPTER\t1:\tSUCCESSFUL\tSELLING\tIN\tA\tWORLD\tOF\tCONSTANT\nCHANGE\n\t\nCHAPTER\t2:\tSTRATEGY\tAND\tTACTICS\tDEFINED\n\t\nCHAPTER\t3:\tYOUR\tSTARTING\tPOINT:\tPOSITION\n\t\nCHAPTER\t4:\tA\tGLANCE\tAT\tTHE\tSTRATEGY\tBLUEPRINT:\tTHE\tSIX\tKEY\nELEMENTS\tOF\tSTRATEGIC\tSELLING\n\t\nPART\t2:\tBUILDING\tON\tBEDROCK:\tLAYING\tTHE\tFOUNDATION\tOF\nSTRATEGIC\tANALYSIS\n\t\nCHAPTER\t5:\tKEY\tELEMENT\t1:\tBUYING\tINFLUENCES\n\t)","(metadata, {'source': 'data/TheNewStrategicSelling.pdf', 'page': 2})","(type, Document)"
3,"(page_content, CHAPTER\t6:\tKEY\tELEMENT\t2:\tRED\tFLAGS/LEVERAGE\tFROM\nSTRENGTH\n\t\nCHAPTER\t7:\tBUYER\tLEVEL\tOF\tRECEPTIVITY\n\t\nCHAPTER\t8:\tKEY\tELEMENT\t3:\tTHE\tFOUR\tRESPONSE\tMODES\n\t\nCHAPTER\t9:\tTHE\tIMPORTANCE\tOF\tWINNING\n\t\nCHAPTER\t10:\tKEY\tELEMENT\t4:\tWIN-RESULTS\n\t\nPART\t3:\tCOMMON\tPROBLEMS,\tUNCOMMON\tSOLUTIONS\n\t\nCHAPTER\t11:\tGETTING\tTO\tTHE\tECONOMIC\tBUYING\tINFLUENCE:\nSTRATEGIES\tAND\tTACTICS\n\t\nCHAPTER\t12:\tTHE\tCOACH:\tDEVELOPING\tYOUR\tPRIME\nINFORMATION\tRESOURCE\n\t\nCHAPTER\t13:\tWHAT\tABOUT\tTHE\tCOMPETITION?\n\t\nPART\t4:\tSTRATEGY\tAND\tTERRITORY:\tFOCUSING\tON\tYOUR\tWIN-\nWIN\tCUSTOMERS\n\t\nCHAPTER\t14:\tKEY\tELEMENT\t5:\tIDEAL\tCUSTOMER\n\t\nCHAPTER\t15:\tYOUR\tIDEAL\tCUSTOMER\tPROFILE:\tDEMOGRAPHICS\nAND\tPSYCHOGRAPHICS\n\t\nPART\t5:\tSTRATEGY\tAND\tTERRITORY:\tMANAGING\tYOUR\nSELLING\tTIME\n\t)","(metadata, {'source': 'data/TheNewStrategicSelling.pdf', 'page': 3})","(type, Document)"
4,"(page_content, CHAPTER\t16:\tOF\tTIME,\tTERRITORY,\tAND\tMONEY\n\t\nCHAPTER\t17:\tKEY\tELEMENT\t6:\tTHE\tSALES\tFUNNEL\n\t\nCHAPTER\t18:\tPRIORITIES\tAND\tALLOCATION:\tWORKING\tTHE\nFUNNEL\n\t\nPART\t6:\tFROM\tANALYSIS\tTO\tACTION\n\t\nCHAPTER\t19:\tYOUR\tACTION\tPLAN\n\t\nCHAPTER\t20:\tSTRATEGY\tWHEN\tYOU\tHAVE\tNO\tTIME\n\t\nCHAPTER\t21:\tSTRATEGIC\tSELLING:\tA\tLIFETIME\tAPPROACH\n\t\nAFTER\tTWENTY\tYEARS:\tRESPONDING\tTO\tOUR\tCLIENTS’\tMOST\nCHALLENGING\tQUESTIONS\n\t\nAbout\tMiller\tHeiman\n\t\n\t\n\t)","(metadata, {'source': 'data/TheNewStrategicSelling.pdf', 'page': 4})","(type, Document)"


In [9]:
data_df['page_content'] = data_df[0].apply(lambda x: x[1])
data_df['metadata'] = data_df[1].apply(lambda x: x[1])

print(data_df['metadata'].head(5))

0    {'source': 'data/TheNewStrategicSelling.pdf', 'page': 0}
1    {'source': 'data/TheNewStrategicSelling.pdf', 'page': 1}
2    {'source': 'data/TheNewStrategicSelling.pdf', 'page': 2}
3    {'source': 'data/TheNewStrategicSelling.pdf', 'page': 3}
4    {'source': 'data/TheNewStrategicSelling.pdf', 'page': 4}
Name: metadata, dtype: object


In [10]:
# Iterate through dataframe and create a document for each row
documents = []
for i, row in data_df.iterrows():
    document = Document(
        page_content=row["page_content"],
        metadata={"page": row["metadata"]["page"]}
    )
    documents.append(document)

In [11]:
print(f"Loaded {len(documents)} documents")

Loaded 334 documents


In [12]:
print(f"Document: {documents[1]}")

Document: page_content='STRATEGIC\tSELLING®\tis\ta\tService\tMark\tof\tMiller\tHeiman,\tInc.,\t1595\nMeadow\tWood\tLane,\tSuite\t2,\tReno,\tNV\t89502\nAll\trights\treserved.\tWithout\tthe\twritten\tauthorization\tof\tMiller\tHeiman,\tInc.,\tno\npart\tof\tthis\tbook,\twhether\ttext,\tdiagram,\tchart\tor\tother\tillustration,\tand\twhether\nor\tnot\tit\tcarries\ta\tfurther\tcopyright\tnotice,\tmay\tbe\t(i)\treproduced\tin\tany\tform\tor\nby\tany\tmeans,\telectronic\tor\tmechanical,\tincluding\tphotocopying,\trecording,\tor\nby\tany\tinformation\tstorage\tand\tretrieval\tsystem;\tor\t(ii)\tutilized\tfor\tany\tbusiness\npurpose\tincluding,\tbut\tnot\tlimited\tto,\tconducting\tany\ttraining\tseminar.\nGrand\tCentral\tPublishing\tEdition\nCopyright\t©\t1998\tby\tMiller\tHeiman,\tInc.\nCopyright\t©\t1995\tby\tMiller\tHeiman\t&\tAssociates,\tInc.\nCharts\tcopyright\t©\t1998\tby\tMiller\tHeiman\t&\tAssociates,\tInc.\nAll\trights\treserved.\tExcept\tas\tpermitted\tunder\tthe\tU.S.\tCopyright\tAc

In [13]:
vector_store = Milvus.from_documents(
    documents=documents,
    embedding=embeddings,
    connection_args={"host": "localhost", "port": 19530},
    collection_name="millertime"
)

In [14]:
llm = OctoAIEndpoint(
    endpoint_url="https://text.octoai.run/v1/chat/completions",
    model_kwargs={
        "model": "mixtral-8x7b-instruct-fp16",
        "max_tokens": 1024,
        "presence_penalty": 0,
        "temperature": 0.01,
        "top_p": 0.9,
        "messages": [
            {
                "role": "system",
                "content": "You are a helpful assistant. Keep your responses limited to one short paragraph if possible.",
            },
        ],
    },
)

In [15]:
retriever = vector_store.as_retriever()

In [16]:
template = """Answer the question based only on the following context:
{context}

You have knowledge around Miller Heiman sales methodologies and overall business development strategies. Below is an instruction that describes a request from a sales agent looking for helpful information to provide a customer or help close a deal.
The target customer that the agent is speaking with has a job title of {job_title}. Apply the 6 core concepts of solution selling that can benefit a sales agent with these additional details:
- Decision Maker: {is_decision_maker}
- Sales Stage: {stage_of_sales}
- Main Challenges: {challenges}
- Deal Risk: {deal_risk}
- Deal Size: {deal_size}
- Custom Notes: {custom_notes}
- Potential Solution Features: {solution_features}
- Key Benefits: {key_benefits}
- Customization Options: {customization_options}
- Integration Points: {integration_points}
Write a response that appropriately completes the request.
Instruction:
{question}
Response: """

prompt = PromptTemplate.from_template(template)

In [17]:
data = {
    'context': retriever,
    'job_title': "VP of Marketing",
    'is_decision_maker': "Yes",
    'stage_of_sales': "Qualification",
    'challenges': "Competitor has a better product",
    'deal_risk': "High",
    'deal_size': "Large",
    'custom_notes': "Customer is interested in a long term partnership",
    'question': "What is the best introduction to a VP of Marketing at coca cola for our Tableau Server product?",
    'solution_features': "Advanced data visualization, real-time analytics",
    'key_benefits': "Enhanced decision-making capabilities, increased ROI",
    'customization_options': "Custom dashboards, branded reports",
    'integration_points': "CRM integration, social media analytics"
}

In [18]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


chain = (
    RunnablePassthrough()
    | prompt
    | llm
    | StrOutputParser()
)

In [19]:
output = chain.invoke(data)

display(print(output))

 When introducing our Tableau Server product to the VP of Marketing at Coca Cola, it's crucial to focus on how our solution can address their main challenges, such as the competitor's better product. Begin by emphasizing the key benefits of our solution, like enhanced decision-making capabilities and increased ROI, which directly address their concerns. Mention the potential solution features like advanced data visualization and real-time analytics, which can provide a competitive edge. Highlight the customization options, such as custom dashboards and branded reports, to show that our solution can be tailored to their specific needs. Lastly, discuss integration points, like CRM integration and social media analytics, to demonstrate how our product can seamlessly fit into their existing marketing technology stack. Since they are interested in a long-term partnership, reassure them of our commitment to their success and our ability to support their marketing strategies in the long run.


None