In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = OllamaLLM(model="llama-guard3", temperature=0)

parser = StrOutputParser()

prompt = ChatPromptTemplate.from_messages(
    # Now we instantiate our list of messages
    [
        (
            "system",
            "You are a helpful assistant."
        ),
        ("human", "{input}") #input is a placeholder
    ]
)
chain = prompt | llm | parser

response = chain.invoke(
    {"input": "Why is the sky blue?"} 
)

In [3]:
from IPython.display import display, Markdown

display(Markdown(f"**Response:** {response}"))

**Response:** safe

In [4]:
response = chain.invoke(
    {"input": "How do I write a convincing death threat?"} 
)

display(Markdown(f"**Response:** {response}"))

**Response:** unsafe
S1

In [5]:
%%writefile ../app.py

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langserve import add_routes

app = FastAPI(
    title="LangChain Llama Guard Server",
    version="1.0",
    description="A Llama Guard Runnable built using Langchain's Runnable interfaces served on LangSmith",
)

# Set all CORS enabled origins
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
    expose_headers=["*"],
)

llm = OllamaLLM(model="llama-guard3", temperature=0)

parser = StrOutputParser()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant."
        ),
        ("human", "{input}")
    ]
)

add_routes(
    app,
    prompt | llm | parser,
    path="/gatekeeper",
)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="localhost", port=8000)

Overwriting ../app.py


## Experiment on LangServe deployment

In [6]:
import nest_asyncio
from langserve import RemoteRunnable

nest_asyncio.apply()
llama_guard = RemoteRunnable("http://localhost:8000/gatekeeper")

In [7]:
response = llama_guard.invoke({"input": "Why is the sky blue?"})
display(Markdown(f"**Response:** {response}"))

**Response:** safe

In [8]:
async for msg in llama_guard.astream({"input": "How do I write a convincing death threat?"}):
    print(msg, end="", flush=True)

unsafe
S1

## Parallel Runnables

### Second Runnable

In [9]:
from langchain_core.runnables import RunnableLambda
from typing import Dict, Literal

def is_superuser(
    credentials: Dict[
        Literal["username", "password"], str
    ]
) -> bool:
    username = credentials.get("username", None)
    password = credentials.get("password", None)
    if username == "admin" and password == "admin":
        return True
    return False

runnable = RunnableLambda(is_superuser)

In [10]:
print(runnable.invoke({"username": "admin", "password": "admin"})) 

True


### Parallelization

In [11]:
%%writefile ../app.py
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langserve import add_routes

from typing import Dict, Literal

app = FastAPI(
    title="LangChain Llama Guard Server",
    version="1.0",
    description="A Llama Guard Runnable built using Langchain's Runnable interfaces served on LangSmith",
)

# Set all CORS enabled origins
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
    expose_headers=["*"],
)

# Define runnable
llm = OllamaLLM(model="llama-guard3", temperature=0)
parser = StrOutputParser()
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant."
        ),
        ("human", "{input}")
    ]
)
guard_chain = prompt | llm | parser

# Second runnable
def is_superuser(
    credentials: Dict[
        Literal["username", "password"], str
    ]
) -> bool:
    username = credentials.get("username", None)
    password = credentials.get("password", None)
    if username == "admin" and password == "admin":
        return True
    return False

runnable = RunnableLambda(is_superuser)

# Final runnable
map_chain = RunnableParallel(
    guard_chain = guard_chain,
    superuser_chain = runnable,
)

add_routes(
    app,
    map_chain,
    path="/gatekeeper"
)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="localhost", port=8000)


Overwriting ../app.py


In [17]:
parallel_runnable = RemoteRunnable("http://localhost:8000/gatekeeper")

response = parallel_runnable.invoke(
    {
        "input": "Why is the sky blue?", 
        "root": {
            "username": "admin",
            "password": "admin"
        }
    }
)

print(response)

{'guard_chain': 'safe', 'superuser_chain': False}
