In [2]:
# Testing LangChain v1.0

In [2]:
# https://docs.langchain.com/oss/python/langchain/rag

In [1]:
from dotenv import dotenv_values
import os
from openai import OpenAI

config = dotenv_values(".env")

client = OpenAI(api_key=config["OPEN_AI_KEY"])

In [2]:
from langchain_openai import OpenAIEmbeddings  

model_name = 'text-embedding-3-large'  
embeddings = OpenAIEmbeddings(  
    model=model_name,  
    openai_api_key=config["OPEN_AI_KEY"]  
)

In [3]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=config["PINECONE_API_KEY"])

# pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))

In [49]:
# Function to embed text > to feed to Pinecone index.query

from openai import OpenAI

def get_embedding(text, model="text-embedding-3-large"):
    """
    Generates a vector embedding for the given text using OpenAI's model.
    """
    text = text.replace("\n", " ") # Best practice is to replace newlines
    response = client.embeddings.create(
        input=[text],
        model=model
    )
    # The embedding is in the 'data' array of the response
    return response.data[0].embedding

# Example usage:
sentence = "What does the knowledge source say about the speed of light?"
embedding_vector = get_embedding(sentence)

print(f"Original sentence: {sentence}")
print(f"Embedding vector (first 50 dimensions): {embedding_vector[:50]}...")
print(f"Vector dimension: {len(embedding_vector)}")


Original sentence: What does the knowledge source say about the speed of light?
Embedding vector (first 50 dimensions): [0.003222345607355237, -0.015915820375084877, -0.005860345438122749, 0.023022545501589775, -0.02298201248049736, -0.01999610662460327, 0.04061371460556984, 0.03504722937941551, 0.020901337265968323, 0.028859246522188187, 0.011187011376023293, 0.05788062512874603, 0.02419799193739891, -0.0058299461379647255, -0.013321731239557266, 0.07906568795442581, 0.013281198218464851, 0.0001265589817194268, -0.016037417575716972, 0.023117121309041977, -0.019198965281248093, -0.009450863115489483, 0.018739594146609306, -0.02071218378841877, -0.02549503743648529, 0.007086458150297403, 0.009727835655212402, 0.003914778586477041, -0.002171874511986971, 0.009781879372894764, 0.03245314210653305, -0.003654693951830268, -0.005694837309420109, -0.015578048303723335, 0.0014760639751330018, -0.05177370458841324, -0.02119857631623745, -0.02225242555141449, -0.02750815823674202, -0.0078971115

In [50]:
# Pinecone - Debug

from pinecone import Pinecone

# pc = Pinecone(api_key=os.environ.get('PINECONE_API_KEY'))

# To get the unique host for an index, 
# see https://docs.pinecone.io/guides/manage-data/target-an-index
index = pc.Index(host="https://test-index-3-kuv1rfi.svc.aped-4627-b74a.pinecone.io")

# Index.search will not work with this index
# Integrated inference is not configured for this index

# results = index.query(
#     namespace="__default__", 
#     query={
#         "inputs": {"text": "Disease prevention"}
#     },
#     top_k=3,
#     include_metadata=True,
#     include_values=False
# )

# print(results)

# Pinecone index.query

response = index.query(
    namespace="__default__",
    vector=embedding_vector, 
    top_k=3,
    include_metadata=True,
    include_values=False
)

In [51]:
matches = response['matches']

In [52]:
response.to_dict()

{'matches': [{'id': '4ba7ae68-92db-42e2-b5c7-c18141c84da3',
   'score': 0.578722,
   'values': [],
   'metadata': {'_id': 'rec9',
    'category': 'physics',
    'text': 'The speed of light in a vacuum is approximately 299,792 km/s.'}},
  {'id': '01cf5e66-b57b-42c3-a46e-ef66cc490e86',
   'score': 0.349006683,
   'values': [],
   'metadata': {'_id': 'rec42',
    'category': 'physics',
    'text': 'The speed of sound is around 343 meters per second in air.'}},
  {'id': '45277ff9-a7b7-43e3-a480-6db9d2978300',
   'score': 0.29104045,
   'values': [],
   'metadata': {'_id': 'rec31',
    'category': 'astronomy',
    'text': 'The universe is expanding, according to the Big Bang theory.'}}],
 'namespace': '__default__',
 'usage': {'read_units': 1}}

In [15]:
import langchain
langchain.verbose = False
# langchain.debug = False
# langchain.llm_cache = False
from langchain.chat_models import init_chat_model

model = init_chat_model("gpt-4.1-mini", model_provider="openai", api_key=config["OPEN_AI_KEY"])

In [16]:
# Original

from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain.agents import create_agent


@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    retrieved_docs = vector_store.similarity_search(last_query)

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{docs_content}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

In [71]:
# v2

from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain.agents import create_agent


@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    
    # Get embedding for message
    embedding = get_embedding(last_query)

    # Perform search against Pinecone
    response = index.query(
        namespace="__default__",
        vector=embedding, 
        top_k=3,
        include_metadata=True,
        include_values=False
    )

    response = response.to_dict()
    
    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{response['matches']}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

In [88]:
from langchain.messages import SystemMessage, HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser

messages = {
    "messages": [
        {"role": "user", "content": "What is Maka Projects?"}
    ]
}


result = agent.invoke(messages)

In [87]:
result['messages'][-1].content

'Maka Projects is a holding company that owns multiple multi-billion dollar companies including Chedr, Gymogul, and Coach Central.'

In [93]:
# Short-term Memory

In [94]:
from langgraph.checkpoint.memory import InMemorySaver

agent = create_agent(model, tools=[], middleware=[prompt_with_context], checkpointer=InMemorySaver())

In [104]:
messages = {
    "messages": [
        {"role": "user", "content": "I live in California"}
    ]
}


result = agent.invoke(messages, {"configurable": {"thread_id": "1"}})

In [105]:
print(result['messages'][-1])

content='Thank you for sharing that you live in California! Is there anything specific you would like to know or discuss about California?' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 24, 'prompt_tokens': 386, 'total_tokens': 410, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-mini-2025-04-14', 'system_fingerprint': 'fp_4c2851f862', 'id': 'chatcmpl-CWGhEwx16QtacanFaEFGqkG0WoF57', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--9a450319-21d7-4f6c-89b5-af42e1378bd7-0' usage_metadata={'input_tokens': 386, 'output_tokens': 24, 'total_tokens': 410, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
