In [1]:
# Testing LangChain v1.0

In [2]:
# https://docs.langchain.com/oss/python/langchain/rag

In [2]:
from dotenv import dotenv_values
import os
from openai import OpenAI

config = dotenv_values(".env")

client = OpenAI(api_key=config["OPEN_AI_KEY"])

In [5]:
from langchain_openai import OpenAIEmbeddings  

model_name = 'text-embedding-3-large'  
embeddings = OpenAIEmbeddings(  
    model=model_name,  
    openai_api_key=config["OPEN_AI_KEY"]  
)

In [3]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=config["PINECONE_API_KEY"])

# pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))

In [4]:
# Function to embed text > to feed to Pinecone index.query

from openai import OpenAI

def get_embedding(text, model="text-embedding-3-large"):
    """
    Generates a vector embedding for the given text using OpenAI's model.
    """
    text = text.replace("\n", " ") # Best practice is to replace newlines
    response = client.embeddings.create(
        input=[text],
        model=model
    )
    # The embedding is in the 'data' array of the response
    return response.data[0].embedding

# Example usage:
sentence = "What does the knowledge source say about the speed of light?"
embedding_vector = get_embedding(sentence)

print(f"Original sentence: {sentence}")
print(f"Embedding vector (first 50 dimensions): {embedding_vector[:50]}...")
print(f"Vector dimension: {len(embedding_vector)}")


Original sentence: What does the knowledge source say about the speed of light?
Embedding vector (first 50 dimensions): [0.0032393874134868383, -0.015876037999987602, -0.0058910236693918705, 0.02307767979800701, -0.02295607700943947, -0.019956517964601517, 0.04064266011118889, 0.03507591038942337, 0.020929347723722458, 0.028860611841082573, 0.011248341761529446, 0.057883359491825104, 0.024172112345695496, -0.005850489251315594, -0.01335613988339901, 0.0789613351225853, 0.013254803605377674, 0.00015327133587561548, -0.016078710556030273, 0.02306416817009449, -0.019172849133610725, -0.009437798522412777, 0.018713457509875298, -0.020726675167679787, -0.02545570768415928, 0.0070732817985117435, 0.009768830612301826, 0.003894696244969964, -0.002193933352828026, 0.009755318984389305, 0.03245467692613602, -0.0036413553170859814, -0.005739019252359867, -0.015578784979879856, 0.0014896453358232975, -0.05180317535996437, -0.02125362493097782, -0.022266987711191177, -0.02749594673514366, -0.00792

In [5]:
# Pinecone - Debug

from pinecone import Pinecone

# pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))

# To get the unique host for an index, 
# see https://docs.pinecone.io/guides/manage-data/target-an-index
index = pc.Index(host="https://test-index-3-kuv1rfi.svc.aped-4627-b74a.pinecone.io")

# Index.search will not work with this index
# Integrated inference is not configured for this index

# results = index.query(
#     namespace="__default__", 
#     query={
#         "inputs": {"text": "Disease prevention"}
#     },
#     top_k=3,
#     include_metadata=True,
#     include_values=False
# )

# print(results)

# Pinecone index.query

response = index.query(
    namespace="__default__",
    vector=embedding_vector, 
    top_k=3,
    include_metadata=True,
    include_values=False
)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
matches = response['matches']

In [7]:
response.to_dict()

{'matches': [{'id': '4ba7ae68-92db-42e2-b5c7-c18141c84da3',
   'score': 0.578466415,
   'values': [],
   'metadata': {'_id': 'rec9',
    'category': 'physics',
    'text': 'The speed of light in a vacuum is approximately 299,792 km/s.'}},
  {'id': '01cf5e66-b57b-42c3-a46e-ef66cc490e86',
   'score': 0.348688155,
   'values': [],
   'metadata': {'_id': 'rec42',
    'category': 'physics',
    'text': 'The speed of sound is around 343 meters per second in air.'}},
  {'id': '45277ff9-a7b7-43e3-a480-6db9d2978300',
   'score': 0.290880233,
   'values': [],
   'metadata': {'_id': 'rec31',
    'category': 'astronomy',
    'text': 'The universe is expanding, according to the Big Bang theory.'}}],
 'namespace': '__default__',
 'usage': {'read_units': 1}}

In [58]:
import langchain
langchain.verbose = False
# langchain.debug = False
# langchain.llm_cache = False
from langchain.chat_models import init_chat_model

model = init_chat_model("gpt-4.1-mini", model_provider="openai", api_key=config["OPEN_AI_KEY"])

In [9]:
# Original

from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain.agents import create_agent


@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    retrieved_docs = vector_store.similarity_search(last_query)

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{docs_content}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

In [19]:
# v2

from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain.agents import create_agent


@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    
    # Get embedding for message
    embedding = get_embedding(last_query)

    # Perform search against Pinecone
    response = index.query(
        namespace="__default__",
        vector=embedding, 
        top_k=3,
        include_metadata=True,
        include_values=False
    )

    response = response.to_dict()
    
    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{response['matches']}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

In [20]:
from langchain.messages import SystemMessage, HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser

messages = {
    "messages": [
        {"role": "user", "content": "What is Moncler?"}
    ]
}


result = agent.invoke(messages)

In [21]:
result['messages'][-1].content

'The provided context does not include information about Moncler. However, Moncler is a luxury fashion brand known for its high-end outerwear, particularly its down jackets and skiwear. It was founded in 1952 in Italy and has become popular worldwide for its stylish and functional clothing. If you would like, I can help find more detailed information about Moncler.'

In [93]:
# Short-term Memory

In [94]:
from langgraph.checkpoint.memory import InMemorySaver

agent = create_agent(model, tools=[], middleware=[prompt_with_context], checkpointer=InMemorySaver())

In [104]:
messages = {
    "messages": [
        {"role": "user", "content": "I live in California"}
    ]
}


result = agent.invoke(messages, {"configurable": {"thread_id": "1"}})

In [105]:
print(result['messages'][-1])

content='Thank you for sharing that you live in California! Is there anything specific you would like to know or discuss about California?' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 386, 'total_tokens': 410, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-mini-2025-04-14', 'system_fingerprint': 'fp_4c2851f862', 'id': 'chatcmpl-CWGhEwx16QtacanFaEFGqkG0WoF57', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--9a450319-21d7-4f6c-89b5-af42e1378bd7-0' usage_metadata={'input_tokens': 386, 'output_tokens': 24, 'total_tokens': 410, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [22]:
# Testing chat using multiple namespaces

In [1]:
# Add record to new namespace

# Create a sample dataset

records = [
    { "_id": "rec1", "chunk_text": "Palantir Technologies Inc. is an American publicly traded company specializing in software platforms for data. Headquartered in Denver, Colorado, it was founded in 2003 by Peter Thiel, Stephen Cohen, Joe Lonsdale, Alex Karp, and Nathan Gettings. The current revenue for Palantir in 2024 is $2.87 billion, and their total net income is $462 million (2024). They currently have 3,936 employees (2024).", "category": "business" }
]

In [6]:
model_name = "text-embedding-3-large"

embeddings = OpenAIEmbeddings(model=model_name, api_key=config['OPEN_AI_KEY'])

In [5]:
# Function to embed text > to feed to Pinecone index.query

from openai import OpenAI

def get_embedding(text, model="text-embedding-3-large"):
    """
    Generates a vector embedding for the given text using OpenAI's model.
    """
    text = text.replace("\n", " ") # Best practice is to replace newlines
    response = client.embeddings.create(
        input=[text],
        model=model
    )
    # The embedding is in the 'data' array of the response
    return response.data[0].embedding

# Example usage:
sentence = "The Rolex Yachtmaster 40 was first released in 1992. The current reference number 126622 is $12,500 USD. It features a 40 mm oyster case in oystersteel and platinum."
embedding_vector = get_embedding(sentence)

print(f"Original sentence: {sentence}")
print(f"Embedding vector (first 50 dimensions): {embedding_vector[:50]}...")
print(f"Vector dimension: {len(embedding_vector)}")


Original sentence: The Rolex Yachtmaster 40 was first released in 1992. The current reference number 126622 is $12,500 USD. It features a 40 mm oyster case in oystersteel and platinum.
Embedding vector (first 50 dimensions): [0.006935261655598879, 0.037173446267843246, -0.013058535754680634, 0.03287992626428604, -0.036461565643548965, -0.048808224499225616, -0.016795901581645012, 0.03497106954455376, -0.029454005882143974, -0.01055583544075489, -0.00029424112290143967, 0.0012916716514155269, 0.03216804563999176, 0.002591685624793172, 0.037039969116449356, -0.017574520781636238, -0.008014203980565071, -0.0106058893725276, -0.09645964205265045, -5.852669710293412e-05, -0.03975400701165199, -0.022457566112279892, -0.041266750544309616, 0.009348977357149124, -0.02776329219341278, -0.008164365775883198, -0.006896330509334803, -0.002861421089619398, 0.005124974530190229, -0.018219660967588425, 0.0003378645924385637, -0.009421277791261673, -0.04707301780581474, 0.03474860638380051, -0.0082311

In [4]:
index = pc.Index(host="https://test-index-3-kuv1rfi.svc.aped-4627-b74a.pinecone.io")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
index.upsert(
    vectors=[{
        "id": "rec1",
        "values": embedding_vector,
        "metadata": {
            "category": "business",
            "text": sentence
        }
    }],
    namespace="text"
)

{'upserted_count': 1}

In [3]:
# Use multiple context in chat model

In [7]:
sentence = "What is the Rolex Yachtmaster?"

sentence_embedding = get_embedding(sentence)

In [8]:
# Pinecone - Debug

from pinecone import Pinecone

# pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))

# To get the unique host for an index, 
# see https://docs.pinecone.io/guides/manage-data/target-an-index
index = pc.Index(host="https://test-index-3-kuv1rfi.svc.aped-4627-b74a.pinecone.io")

response = index.query(
    namespace="text",
    vector=sentence_embedding, 
    top_k=3,
    include_metadata=True,
    include_values=False
)

In [38]:
response_2 = response

In [50]:
response_1['matches']

[{'id': 'ba5eac1d-b1b3-42a2-a6cf-4803afe2fddd',
  'metadata': {'_id': 'rec52',
               'category': 'business',
               'text': 'Palantir is a software company specializing in data '
                       'analytics and integration platforms, primarily serving '
                       'government and commercial clients. Its technology, '
                       'including the platforms Gotham and Foundry, helps '
                       'organizations analyze and integrate data from disparate '
                       'sources to improve decision-making.'},
  'score': 0.468770981,
  'values': []},
 {'id': '34e9e4b6-51ee-4de0-b080-399104d92073',
  'metadata': {'_id': 'rec11',
               'category': 'biology',
               'text': 'The human brain has approximately 86 billion neurons.'},
  'score': 0.175664902,
  'values': []},
 {'id': 'aeba9edb-e734-4e94-a281-cb3cc5213e34',
  'metadata': {'_id': 'rec51',
               'category': 'business',
               'text': 'Mak

In [51]:
response_2['matches']

[{'id': 'rec1',
  'metadata': {'category': 'business',
               'text': 'Palantir Technologies Inc. is an American publicly '
                       'traded company specializing in software platforms for '
                       'data. Headquartered in Denver, Colorado, it was founded '
                       'in 2003 by Peter Thiel, Stephen Cohen, Joe Lonsdale, '
                       'Alex Karp, and Nathan Gettings. The current revenue for '
                       'Palantir in 2024 is $2.87 billion, and their total net '
                       'income is $462 million (2024). They currently have '
                       '3,936 employees (2024).'},
  'score': 0.661186218,
  'values': []}]

In [54]:
response_3 = response_1['matches'] + response_2['matches']

In [56]:
response_3

[{'id': 'ba5eac1d-b1b3-42a2-a6cf-4803afe2fddd',
  'metadata': {'_id': 'rec52',
               'category': 'business',
               'text': 'Palantir is a software company specializing in data '
                       'analytics and integration platforms, primarily serving '
                       'government and commercial clients. Its technology, '
                       'including the platforms Gotham and Foundry, helps '
                       'organizations analyze and integrate data from disparate '
                       'sources to improve decision-making.'},
  'score': 0.468770981,
  'values': []},
 {'id': '34e9e4b6-51ee-4de0-b080-399104d92073',
  'metadata': {'_id': 'rec11',
               'category': 'biology',
               'text': 'The human brain has approximately 86 billion neurons.'},
  'score': 0.175664902,
  'values': []},
 {'id': 'aeba9edb-e734-4e94-a281-cb3cc5213e34',
  'metadata': {'_id': 'rec51',
               'category': 'business',
               'text': 'Mak

In [59]:
# v3

from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain.agents import create_agent


@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    
    # Get embedding for message
    embedding = get_embedding(last_query)

    # Perform search against Pinecone
    response_1 = index.query(
        namespace="__default__",
        vector=embedding, 
        top_k=3,
        include_metadata=True,
        include_values=False
    )

    response_2 = index.query(
        namespace="text",
        vector=embedding, 
        top_k=3,
        include_metadata=True,
        include_values=False
    )

    response = response_1['matches'] + response_2['matches']
    
    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{response}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

In [62]:
from langchain.messages import SystemMessage, HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser

messages = {
    "messages": [
        {"role": "user", "content": "What is Palantir and as of 2024, how many employees did they employ?"}
    ]
}


result = agent.invoke(messages)

In [63]:
result

{'messages': [HumanMessage(content='What is Palantir and as of 2024, how many employees did they employ?', additional_kwargs={}, response_metadata={}, id='79268b22-c269-49c6-bb61-1ca2fe31d695'),
  AIMessage(content='Palantir is a software company specializing in data analytics and integration platforms, primarily serving government and commercial clients. Its technology, including the platforms Gotham and Foundry, helps organizations analyze and integrate data from disparate sources to improve decision-making. As of 2024, Palantir employed 3,936 employees.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 63, 'prompt_tokens': 504, 'total_tokens': 567, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-mini-2025-04-14', 'system_fingerprint': 'fp_4c2851f862', 'id': 'ch