In [155]:
import dotenv
from transformers import LongformerTokenizer, LongformerModel
import torch
from langchain_pinecone import PineconeVectorStore
from langchain_anthropic import ChatAnthropic
from langchain.chains import RetrievalQA

# Load environment variables
dotenv.load_dotenv()

True

In [156]:
# Load Longformer model and tokenizer
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-large-4096')
model = LongformerModel.from_pretrained('allenai/longformer-large-4096')

In [158]:
# Define custom embedding class with embed_query method
class LongformerEmbedding:
    def __init__(self, tokenizer, model):
        self.tokenizer = tokenizer
        self.model = model

    def embed_query(self, text):
        inputs = self.tokenizer(text, return_tensors="pt", max_length=4096, truncation=True)
        with torch.no_grad():
            outputs = self.model(**inputs)
            embedding = outputs.last_hidden_state.mean(dim=1).squeeze().tolist()  # Mean pooling
        return embedding

# Instantiate the custom embedding class
longformer_embedder = LongformerEmbedding(tokenizer, model)

In [161]:
# Connect to Pinecone and initialize with the existing index
index_name = "hubspot-crm-txts"

# Initialize PineconeVectorStore with the custom embedding class
vector_store = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=longformer_embedder  # Pass the embedding class here
)

In [162]:
# Set up the Retrieval Chain with LangChain
llm = ChatAnthropic(model='claude-3-opus-20240229', temperature=0.0)

In [163]:
# Define the Retrieval-Augmented Generation (RAG) chain
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vector_store.as_retriever(),
    chain_type="stuff"
)

In [164]:
# Query the RAG Chain
query = "What are HubSpot Contacts?"
response = rag_chain.invoke(query)
print(response)

Found document with no `text` key. Skipping.
Found document with no `text` key. Skipping.
Found document with no `text` key. Skipping.
Found document with no `text` key. Skipping.


{'query': 'What are HubSpot Contacts?', 'result': "HubSpot Contacts are the individual people and companies that you store and manage within your HubSpot CRM. Some key things to know about HubSpot Contacts:\n\n- Each Contact record contains fields to store information like name, email address, phone number, company, lifecycle stage, and any custom properties you set up.\n\n- Contacts can be added manually, imported from files, or automatically synced from form submissions, email conversations, website activity, and integrations. \n\n- You can view a timeline of all interactions and activities for each Contact, including form submissions, page views, sales emails, calls, meetings, and notes.\n\n- Contacts can be segmented into lists based on their properties and activities for targeted marketing and sales outreach.\n\n- Contact records help track and personalize interactions through the buyer's journey and customer lifecycle in HubSpot.\n\nSo in summary, Contacts are the core records th