In [22]:
# LangChain components to use
from langchain import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter


# With CassIO, the engine powering the Astra DB integration in LangChain,
# you will also initialize the DB connection:
from PyPDF2 import PdfReader
from dotenv import load_dotenv
import os

In [23]:
#pdfs=['Rocketium - Banco_Pann_Discovery_transcript.pdf','Rocketium - Tesco_Transcript.pdf','Rocketium - Zenni_Process_Discovery_Transcript.pdf']
pdfs=['6sense - Alert_Media_Discovery.pdf','6sense - ComboCurve_Disco.pdf','6sense - disco_script_intradiem.pdf','6sense - IntelePeer Discover Call.pdf']
load_dotenv()

True

In [24]:
from typing_extensions import Concatenate
# read text from pdf
raw_text = ''
for pdf in pdfs:
    for i, page in enumerate(PdfReader(pdf).pages):
        content = page.extract_text()
        if content:
            raw_text += content


In [25]:
raw_text

"0:00\n|\nIsabel\ngood\nmorning.\n0:03\n|\nGlenn\nHey,\nhow's\nit\ngoing?\n0:06\n|\nIsabel\nIn\nmy\ngod,\nwe\nmatch\nin\nnow.\n0:09\n|\nGlenn\nAll\nright.\nI\nknow\nit\nfeels\nlike\nit's\nstill\nthere.\nHi,\nAllison.\n0:19\n|\nIsabel\nHow\nare\nyou?\nGood\nmorning?\n0:21\n|\nAlison\nGood\nmorning.\nHappy\nMonday.\n0:24\n|\nIsabel\nHappy\nMonday.\nAnd,\nwe\nwere\nlaughing.\nWe\nboth\nshowed\nup\nin\ngreen.\nWe're\nstill\nin\nthe\nsaint\npatrick's\nspirit\napparently.\n0:31\n|\nAlison\nNice.\n0:32\n|\nIsabel\nHow\nwas\nyour\nweekend?\n0:34\n|\nAlison\nIt\nwas\npretty\ngood.\nAlthough\nwith,\nyou\nknow,\nelementary\nschool\nchildren,\nit\nwas\nnot\nwild\nand\ncrazy\nby\nany\nmeans.\n0:40\n|\nGlenn\nYeah.\n0:41\n|\nIsabel\nTotally.\nThat\nis\nfair.\nThat\nis\nfair\nfor\nsure.\nWell,\nwhere\nare\nyou?\n0:46\n|\nAlison\nA\na.\n0:48\n|\nIsabel\nI'm\nin\nlos\nangeles.0:49\n|\nGlenn\nBut\nI'm\nactually\nin\nAustin.\n0:51\n|\nAlison\nNice.\nOkay.\nI\nam\nnorth\nof\nBoston.\n0:57\n|\nGlenn\nI\nlo

In [42]:
#llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')
llm = ChatOpenAI(temperature=0, model_name='gpt-4o-mini')

In [43]:
llm.get_num_tokens(raw_text)

46784

In [44]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=20)
chunks = text_splitter.create_documents([raw_text])

In [45]:
len(chunks)

13

In [46]:
chunks_prompt="""
This is a call recording between 2 businesses sales people (buyer and seller), generate a summary of the things discussed in the call 
Recording:`{text}'
Summary:
"""
map_prompt_template=PromptTemplate(input_variables=['text'],
                                    template=chunks_prompt)

In [47]:
final_combine_prompt='''
Generate a recap of the things discussed in the call in 4 bullet points (not in a paragraph)
Summary: `{text}`
Bullet Points:
'''
final_combine_prompt_template=PromptTemplate(input_variables=['text'],
                                             template=final_combine_prompt)

In [48]:
summary_chain = load_summarize_chain(
    llm=llm,
    chain_type='map_reduce',
    map_prompt=map_prompt_template,
    combine_prompt=final_combine_prompt_template,
    verbose=False
)
output = summary_chain.run(chunks)

In [49]:
print(output)

- **Casual Introductions**: Isabel, Glenn, and Alison engaged in friendly greetings, sharing personal backgrounds and discussing Isabel's interest in opportunities at Alert Media.

- **6sense Demo Focus**: The call centered on preparing for a demo of 6sense, addressing Alison's team's needs and the potential of 6sense to replace ZoomInfo for account data enrichment.

- **Data Quality Concerns**: Alison raised issues regarding data trustworthiness from ZoomInfo and emphasized the need for seamless integration, while Isabel highlighted 6sense's superior data quality.

- **Sales and Marketing Insights**: The discussion included Alison's marketing strategies, the importance of account prioritization, and the need for tools to enhance their inbound and outbound sales efforts.


In [50]:
chunks_prompt_qa="""
This is a call recording between 2 businesses sales people (buyer and seller), understand the pain points of the buyer and generate a question answer pair from the perpective of the seller (which the selller needs to ask the buyer)
Recording:`{text}'
Question-Answer Pair:
"""
map_prompt_template_qa=PromptTemplate(input_variables=['text'],
                                    template=chunks_prompt_qa)

In [51]:
final_combine_prompt_qa='''
Generate 10 question answer pairs from the perpective of the seller (which the selller needs to ask the buyer)
Question-Answer Pair: `{text}`
Question-Answer Pairs:
'''
final_combine_prompt_template_qa=PromptTemplate(input_variables=['text'],
                                             template=final_combine_prompt_qa)

In [52]:
summary_chain_qa = load_summarize_chain(
    llm=llm,
    chain_type='map_reduce',
    map_prompt=map_prompt_template_qa,
    combine_prompt=final_combine_prompt_template_qa,
    verbose=False
)
output_qa = summary_chain_qa.run(chunks)

In [53]:
print(output_qa)

Here are 10 question-answer pairs from the perspective of the seller, aimed at understanding the buyer's needs and challenges:

1. **Question:** What specific goals are you hoping to achieve with a new data solution, and how do you see our platform aligning with those objectives?
   **Answer:** We're aiming to improve our lead generation and account prioritization. We believe your platform can help us better identify in-market accounts and provide actionable insights to drive our sales efforts.

2. **Question:** Can you describe the current process your team uses to evaluate leads, and what improvements are you looking for in that process?
   **Answer:** Currently, we rely heavily on manual processes to evaluate leads, which is time-consuming and often leads to missed opportunities. We're looking for a more automated solution that can provide real-time insights and streamline our lead evaluation.

3. **Question:** How do you currently measure the success of your sales and marketing eff

In [54]:
chunks_prompt_objections="""
This is a call recording between 2 businesses sales people (buyer and seller), an objection is defined as when the buyer has a doubt regarding the product (specifically about the product that the seller is selling (something related to technology and not a hygiene objection like "i dont think so")) and he stops the seller and ask him about it.Identify where did the buyer had an objection with the seller (output the exact words of the buyer)
Recording:`{text}'
Objection:
"""
map_prompt_template_objections=PromptTemplate(input_variables=['text'],
                                    template=chunks_prompt_objections)

In [55]:
final_combine_prompt_objections='''
An objection is defined as when the buyer has a doubt regarding the product that the seller is selling (specifically about the product that the seller is selling (something related to technology and not a hygiene objection like "i dont think so")) and he stops the seller and ask him about it.Identify where did the buyer had an objection with the seller (output the exact words of the buyer). Give the topmost 4 objections
Objection: `{text}`
Objections:
'''
final_combine_prompt_template_objections=PromptTemplate(input_variables=['text'],
                                             template=final_combine_prompt_objections)

In [56]:
summary_chain_objections = load_summarize_chain(
    llm=llm,
    chain_type='map_reduce',
    map_prompt=map_prompt_template_objections,
    combine_prompt=final_combine_prompt_template_objections,
    verbose=False
)
output_objections = summary_chain_objections.run(chunks)

In [57]:
print(output_objections)

1. "I guess I don’t even know which company you bought, but like who did you buy, and to what extent can you, could you potentially serve as like a Zoom info replacement?"

2. "can we import our custom fields into your system so that we can filter like there’s a number of vendors that don’t allow you, like I just told you about, we do everything based on our account grading and our people grading. And there are a number of vendors who don’t actually let you import your fields and then filter that way. And so that’s basically a nonstarter for us."

3. "No, I think I'm just really trying to understand the difference between what's included in the ala carte sales versus marketing piece, and then sort of what licensing looks like. And then obviously like just rough pricing."

4. "what sort of things that are you thinking about to like grow pipeline with intent data? Like are you trying to just get a sense of like who's researching your category? Because they're so busy and has so many play