# Prompt Engineering Lifecycle

### Setup

In [1]:
# You can set them inline
import os
os.environ["MISTRAL_API_KEY"] = ""
os.environ["LANGSMITH_API_KEY"] = ""
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = "langsmith-academy-mistral"

In [2]:
# Or you can use a .env file
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../.env", override=True)

True

### Log a trace

In [5]:
from app import langsmith_rag

question = "What iis the meaning of speed of light as physics major"
langsmith_rag(question)

'The speed of light in a vacuum, denoted as **c**, is a fundamental constant of nature approximately equal to **299,792,458 meters per second** (≈ 3 × 10⁸ m/s).\n\nIn physics, it represents the **maximum speed at which all energy, matter, and information can travel** in the universe, as described by Einstein’s theory of relativity.\nIt also serves as a **cosmic speed limit**, linking space and time in the spacetime continuum.'

### Create a Dataset

Let's create a dataset to evaluate this particular step of our application

In [6]:
from langsmith import Client

example_dataset = [
    (
        "What is the speed of light in a vacuum?",
        """The speed of light in a vacuum is a fundamental physical constant denoted by c. It is exactly 299,792,458 meters per second (approximately 3.00 × 10^8 m/s). This speed represents the maximum speed at which all energy, matter, and physical information in the universe can travel. According to Einstein's special theory of relativity, nothing with mass can reach or exceed this speed.""",
        "The speed of light in a vacuum is exactly 299,792,458 meters per second (approximately 3.00 × 10^8 m/s), which is a fundamental physical constant denoted by c."
    ),
    (
        "What is Newton's second law of motion?",
        """Newton's second law of motion states that the acceleration of an object is directly proportional to the net force acting on it and inversely proportional to its mass. Mathematically, this is expressed as F = ma, where F is the net force, m is the mass, and a is the acceleration. This law explains how the velocity of an object changes when it is subjected to an external force.""",
        "Newton's second law states that F = ma, where the force (F) equals mass (m) times acceleration (a). This means acceleration is directly proportional to force and inversely proportional to mass."
    ),
    (
        "What is quantum entanglement?",
        """Quantum entanglement is a phenomenon in quantum physics where two or more particles become interconnected in such a way that the quantum state of each particle cannot be described independently. When particles are entangled, measuring the state of one particle instantly affects the state of the other, regardless of the distance separating them. Einstein famously called this 'spooky action at a distance' and it forms the basis for quantum computing and quantum communication technologies.""",
        "Quantum entanglement is a quantum physics phenomenon where particles become interconnected so that measuring one instantly affects the other, regardless of distance. Einstein called this 'spooky action at a distance'."
    ),
]

client = Client()
dataset_name = "Physics Questions"

# Create dataset
dataset = client.create_dataset(
    dataset_name=dataset_name, description="Physics questions and concepts"
)

# Prepare inputs and outputs
inputs = [{"question": q, "context": c} for q, c, _ in example_dataset]
outputs = [{"output": o} for _, _, o in example_dataset]

# Create examples in the dataset
client.create_examples(
    inputs=inputs,
    outputs=outputs,
    dataset_id=dataset.id,
)

{'example_ids': ['1f1818a5-5b0e-43df-8b14-d13f9a13ba43',
  '7fe712f6-d049-4cbc-b217-3a9c1148edb9',
  'a7e1c594-18d2-4be8-a7d9-2fc9f7f22d08'],
 'count': 3}

### Update our Application to use Prompt Hub

We're going to pretty much define the same RAG application as before - with one crucial improvement.

Instead of pulling our `RAG_PROMPT` from utils.py, we're going to connect to the Prompt Hub in LangSmith.

Let's add the code snippet that will pull down our prompt that we just iterated on!

In [11]:
from langchain.prompts.chat import ChatPromptTemplate
from langsmith import Client

client=Client()

prompt = """You are a polymath who knows everything about different subjects .

Context: {context}
Question: {question}
Answer:"""

template = ChatPromptTemplate.from_template(prompt)
client.push_prompt("rag-prompt", object=template)

'https://smith.langchain.com/prompts/rag-prompt/70ece6d7?organizationId=bd531ccf-4286-4467-99ba-7eab707122af'

In [12]:
# Pull a prompt from the LangSmith Prompt Hub
from langsmith import Client

client = Client()

# Physics-oriented RAG prompt for answering physics questions
prompt = client.pull_prompt("rag-prompt")

print("Successfully pulled prompt from Prompt Hub:")
print(prompt)

Successfully pulled prompt from Prompt Hub:
input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': '-', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '70ece6d7877d2fdac5a8e2143d8baf05bce53f1e9b5fe50f63d48cbf47f63d99'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='You are a polymath who knows everything about different subjects .\n\nContext: {context}\nQuestion: {question}\nAnswer:'), additional_kwargs={})]


In [14]:
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langsmith import traceable
from typing import List
import nest_asyncio

MODEL_NAME = "mistral-large-latest"
MODEL_PROVIDER = "mistral"
APP_VERSION = 1.0

# Physics-oriented system prompt
RAG_SYSTEM_PROMPT = """You are a physics expert assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer physics questions accurately. 
Provide clear explanations of physical concepts and include relevant equations when appropriate.
If you don't know the answer, just say that you don't know. 
Keep your answers concise but informative.
"""

mistral_client = ChatMistralAI(model=MODEL_NAME)

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "physics_docs.parquet")
    embd = MistralAIEmbeddings(model="mistral-embed")

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_mistral` to generate a physics-oriented model response using Prompt Hub
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    
    # Use the physics prompt pulled from Prompt Hub
    try:
        formatted_prompt = prompt.invoke({"context": formatted_docs, "question": question})
        
        # Convert to LangChain message format
        messages = []
        if isinstance(formatted_prompt, dict) and "messages" in formatted_prompt:
            for msg in formatted_prompt["messages"]:
                if msg["role"] == "system":
                    messages.append(SystemMessage(content=msg["content"]))
                elif msg["role"] == "user":
                    messages.append(HumanMessage(content=msg["content"]))
                elif msg["role"] == "assistant":
                    messages.append(AIMessage(content=msg["content"]))
        else:
            # Fallback to physics-oriented manual formatting
            messages = [
                SystemMessage(content=RAG_SYSTEM_PROMPT),
                HumanMessage(content=f"Context: {formatted_docs}\n\nPhysics Question: {question}")
            ]
    except Exception as e:
        print(f"Error using Prompt Hub: {e}")
        # Fallback to physics-oriented manual formatting
        messages = [
            SystemMessage(content=RAG_SYSTEM_PROMPT),
            HumanMessage(content=f"Context: {formatted_docs}\n\nPhysics Question: {question}")
        ]
    
    return call_mistral(messages)

"""
call_mistral
- Returns the chat completion output from Mistral AI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_mistral(messages: List[dict]) -> str:
    return mistral_client.invoke(messages)

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""

'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /mistralai/Mixtral-8x7B-v0.1/resolve/main/tokenizer.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f0849782cf0>: Failed to resolve \'huggingface.co\' ([Errno -3] Temporary failure in name resolution)"))'), '(Request ID: 880b5998-c521-4509-a290-6a87725d967e)')' thrown while requesting HEAD https://huggingface.co/mistralai/Mixtral-8x7B-v0.1/resolve/main/tokenizer.json
Retrying in 1s [Retry 1/5].
'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /mistralai/Mixtral-8x7B-v0.1/resolve/main/tokenizer.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f083347d450>: Failed to resolve \'huggingface.co\' ([Errno -3] Temporary failure in name resolution)"))'), '(Request ID: 18dbe88e-af72-4031-a7bf-efd3ee3555cb)')' thrown while requesting HEAD https://huggingface.co/mist

'\nlangsmith_rag\n- Calls `retrieve_documents` to fetch documents\n- Calls `generate_response` to generate a response based on the fetched documents\n- Returns the model response\n'

In [15]:
question = "What is the relationship between energy and mass according to Einstein?"
langsmith_rag(question)

"According to Einstein's theory of relativity, energy and mass are related by the equation **E=mc²**, where **E** is energy, **m** is mass, and **c** is the speed of light. This means mass can be converted into energy and vice versa. The equation shows that a small amount of mass can produce an enormous amount of energy."