In [1]:
import requests

# List available models from the local LLM server
try:
    response = requests.get("http://localhost:1234/v1/models")
    if response.status_code == 200:
        models = response.json()
        print("Available models:")
        for model in models.get('data', []):
            print(f"- {model.get('id', 'Unknown')}")
    else:
        print(f"Failed to get models: {response.status_code} - {response.text}")
except requests.exceptions.RequestException as e:
    print(f"Error connecting to local LLM server: {e}")

Available models:
- text-embedding-nomic-embed-text-v1.5
- qwen/qwen3-4b-2507
- meta-llama-3.1-8b-instruct
- text-embedding-bge-small-en-v1.5
- text-embedding-all-minilm-l6-v2-embedding
- deepseek/deepseek-r1-0528-qwen3-8b
- qwen/qwen3-4b-thinking-2507
- llama-3.2-3b-instruct


In [None]:
model_name = "qwen/qwen3-4b-2507"
embedding_model_name = "text-embedding-nomic-embed-text-v1.5"

In [76]:
from langchain_core.output_parsers import BaseOutputParser



class AnswerStrOutputParser(BaseOutputParser):
    def parse(self, text: str) -> str:
        # Split on </think> and take the content after it
        if '</think>' in text:
            return text.split('</think>', 1)[-1].strip()
        return text.strip()

# Tell a joke based on topic

In [77]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# Create a prompt template for telling jokes
joke_prompt = ChatPromptTemplate.from_template(
    "Tell me a funny joke about {topic}. Make it clever and appropriate."
)

# Initialize the language model
joke_llm = ChatOpenAI(
    temperature=0.7, 
    base_url="http://localhost:1234/v1", 
    api_key="dummy-key",
    model=model_name)

# Create the output parser
output_parser = AnswerStrOutputParser()

# Create the chain using LangChain Expression Language (LCEL)
joke_chain = joke_prompt | joke_llm | output_parser

# Use the chain to tell a joke
topic = "programming"
joke = joke_chain.invoke({"topic": topic})
print(f"Joke about {topic}:")
print(joke)


Joke about programming:
Sure! Here's a joke:

**Why did the programmer quit his job?**

Because he didn't get arrays!

Hehe — it's a pun, since "arrays" are both the data structure and also what you might feel like getting after a long day of debugging! 😄


# Ask question in the context

In [40]:
import requests

# Test the embedding endpoint directly
response = requests.post(
    "http://localhost:1234/v1/embeddings",
    json={
        "model": embedding_model_name,
        "input": ["test text", "testing text"]
    }
)
print(response.status_code, response.json())

200 {'object': 'list', 'data': [{'object': 'embedding', 'embedding': [0.01571960560977459, -0.03698844462633133, -0.15595324337482452, -0.015473532490432262, 0.06403752416372299, -0.04268742725253105, 0.030683154240250587, -0.00664095813408494, 0.02124324068427086, 0.003814765252172947, 0.008707347325980663, 0.06542211771011353, 0.03297475725412369, 0.020072687417268753, -0.05354273319244385, -0.05512047931551933, 0.08223888278007507, -0.05756828933954239, -0.0329284593462944, 0.034995608031749725, 0.034731585532426834, 0.012076541781425476, -0.08421074599027634, -0.00826439168304205, 0.06818415224552155, -0.01476707961410284, -0.08248645812273026, 0.019162630662322044, -0.05076124519109726, -0.03329944983124733, 0.030557412654161453, -0.04024438187479973, 0.002544557675719261, -0.0300230011343956, -0.020621653646230698, -0.007294947747141123, 0.017703073099255562, 0.06364432722330093, -0.04091324657201767, 0.01187488716095686, 0.04915614426136017, 0.008704474195837975, -0.055165745317

In [41]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.embeddings import Embeddings
# from langchain_openai import OpenAIEmbeddings

from typing import List

class CustomEmbeddings(Embeddings):
    def __init__(self, base_url: str, model: str, api_key: str = "dummy-key"):
        self.base_url = base_url
        self.model = model
        self.api_key = api_key
    
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        response = requests.post(
            f"{self.base_url}/embeddings",
            json={
                "model": self.model,
                "input": texts
            }
        )
        if response.status_code == 200:
            data = response.json()
            return [item['embedding'] for item in data['data']]
        else:
            raise Exception(f"Embedding request failed: {response.status_code} - {response.text}")
    
    def embed_query(self, text: str) -> List[float]:
        return self.embed_documents([text])[0]


# Initialize embeddings (using OpenAI embeddings with local server)
embeddings = CustomEmbeddings(
    base_url="http://localhost:1234/v1",
    model=embedding_model_name,
    api_key="dummy-key"
)

# Create some sample documents
vector_store = InMemoryVectorStore.from_texts([
    "LangChain is a framework for developing applications powered by language models.",
    "Vector stores are used to store and retrieve documents based on semantic similarity.",
    "LCEL (LangChain Expression Language) allows you to chain components together easily.",
    "Embeddings convert text into numerical vectors that capture semantic meaning."
], embeddings)

# Create a retriever from the vector store
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

retriever.invoke("What is LangChain?")

[Document(id='736f3629-ca1a-4692-af67-82c4049b45cb', metadata={}, page_content='LangChain is a framework for developing applications powered by language models.'),
 Document(id='7e7fe55e-3c23-40d2-9ce4-47e64cdbecf3', metadata={}, page_content='LCEL (LangChain Expression Language) allows you to chain components together easily.')]

In [42]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# Create a prompt template for RAG (Retrieval Augmented Generation)
template = """
Use the following context to answer the question. If you cannot answer based on the context, say so.

Context:
{context}

Question: {question}

Answer:
"""
rag_prompt = ChatPromptTemplate.from_template(template)


from langchain_core.runnables import RunnableMap

runnable_map = RunnableMap({
    "context": lambda x: retriever.invoke(x['question']),
    "question": lambda x: x['question']
})

# Initialize the language model
rag_llm = ChatOpenAI(
    temperature=0.7, 
    base_url="http://localhost:1234/v1", 
    api_key="dummy-key",
    model=model_name)

rag_chain = runnable_map | rag_prompt | rag_llm | AnswerStrOutputParser()


# Test the RAG system
question = "What is LangChain?"
answer = rag_chain.invoke({"question": question})
print(f"Question: {question}")
print(f"Answer: {answer}")

# Test with another question
question2 = "How do vector stores work?"
answer2 = rag_chain.invoke({"question": question2})
print(f"\nQuestion: {question2}")
print(f"Answer: {answer2}")

Question: What is LangChain?
Answer: LangChain is a framework for developing applications powered by language models, and LCEL allows you to chain components together easily.

Question: How do vector stores work?
Answer: Vector stores work by converting documents into numerical vectors using embeddings. These embeddings capture semantic meaning, and the store holds a collection of these vector representations. When querying for semantic similarity, it calculates the distance between the query's embedding and those of the stored documents. Closer vectors indicate higher semantic similarity, enabling efficient retrieval based on meaning rather than exact text matching.


# Bind Tools and ask questions

In [48]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather for a given location in latitude and longitude",
            "parameters": {
                "type": "object",
                "properties": {
                    "latitude": {
                        "type": "number",
                        "description": "The latitude of the location"
                    },
                    "longitude": {
                        "type": "number", 
                        "description": "The longitude of the location"
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "The temperature unit to use",
                        "default": "fahrenheit"
                    }
                },
                "required": ["latitude", "longitude"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "convert_temperature",
            "description": "Convert temperature between celsius and fahrenheit",
            "parameters": {
                "type": "object",
                "properties": {
                    "temperature": {
                        "type": "number",
                        "description": "The temperature value to convert"
                    },
                    "from_unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "The unit to convert from"
                    },
                    "to_unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "The unit to convert to"
                    }
                },
                "required": ["temperature", "from_unit", "to_unit"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "get_sports_news",
            "description": "Retrieve recent news about sports events for a specific team",
            "parameters": {
                "type": "object",
                "properties": {
                    "team_name": {
                        "type": "string",
                        "description": "The name of the sports team to get news for"
                    }
                },
                "required": ["team_name"]
            }
        }
    }
]

In [49]:
from langchain.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant that can answer questions and help with various tasks."),
    ("human", "{input}")
])

tool_model = ChatOpenAI(
    temperature=0.0, 
    base_url="http://localhost:1234/v1", 
    api_key="dummy-key",
    model=model_name).bind_tools(tools)

runnable = prompt | tool_model
ai_message = runnable.invoke({"input": "What is the weather in SF?"})
ai_message.tool_calls


[{'name': 'get_current_weather',
  'args': {'latitude': 37.78, 'longitude': -122.4},
  'id': '699847862',
  'type': 'tool_call'}]

# Fallback Chains

In [71]:
import json

from langchain_core.runnables import RunnableLambda

# Initialize the language model
error_llm = ChatOpenAI(
    temperature=0.0, 
    base_url="http://localhost:1234/v1", 
    api_key="dummy-key",
    model="llama-3.2-3b-instruct")

challenge = "Give me three poems and list them in a json format. The json should have the following fields: title, author, and first line of poem. The output should contain only json"

def extract_json_from_markdown(text):
    """Extract JSON content from markdown code blocks."""
    import re
    # Look for JSON content between ```json and ``` or ``` and ```
    json_pattern = r'```(?:json)?\s*\n?(.*?)\n?```'
    match = re.search(json_pattern, text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return text



failure_chain = error_llm | AnswerStrOutputParser() | json.loads
# errors as the json.loads fails to load the json code snippet
# failure_chain.invoke(challenge)

json_extractor = RunnableLambda(extract_json_from_markdown) | RunnableLambda(lambda x: json.loads(x))
poem_llm = ChatOpenAI(
    temperature=0.0, 
    base_url="http://localhost:1234/v1", 
    api_key="dummy-key",
    model=model_name)
fallback_chain = poem_llm | AnswerStrOutputParser() | json_extractor
#fallback_chain.invoke(challenge)

final_chain = failure_chain.with_fallbacks([fallback_chain])
final_chain.invoke(challenge)

[{'title': 'The Road Not Taken',
  'author': 'Robert Frost',
  'first_line': 'Two roads diverged in a wood, and I—'},
 {'title': 'Stopping by Woods on a Snowy Evening',
  'author': 'Robert Frost',
  'first_line': 'Whose woods these are I think I know.'},
 {'title': 'If',
  'author': 'Rudyard Kipling',
  'first_line': 'If you ask me, I am not like other men.'}]

# Explore langchain Interface - invoke, ainvoke, batch and stream

In [78]:
joke_prompt = ChatPromptTemplate.from_template(
    "Tell me a funny joke about {topic}. Make it clever and appropriate."
)

joke_chain = joke_prompt | joke_llm | AnswerStrOutputParser()

bear_joke = joke_chain.invoke({"topic": "bears"})
print(bear_joke)

Why did a bear ask, "Do you even lift?" 😹

Because he knew all about it because his favorite Goldilocks used to eat so much that the bears had to start a bear-fit training program! 🐻💪

Oh...


In [80]:
program_joke = await joke_chain.ainvoke({'topic': 'programming'})
print(program_joke)

Why did the server break up with him?

Because he was too busy. 😄


In [82]:
batch_jokes = joke_chain.batch([{'topic': 'writers'}, {'topic': 'publishers'}])
for joke in batch_jokes:
    print(joke)

Okay, here's one:

**Q: Why do writers get fired?**

**A:** They're too sensitive to the *pressure* – and it's a real, tangible sensation.
Okay, here's a try:

**Why did the publisher get fired?**

**They were spreading rumors too fast.**
*(Appropriate because it plays on the word "publishing" without referencing harmful or insensitive topics. Clever for its pun about distributing information rapidly, be it good news about a book release or... other things like office gossip they might have amplified to bad effect!)*


In [87]:
for joke in joke_chain.stream({"topic": "elephants"}):
    print(joke)

Why don't elephants ever forget anything?

Because they never forget, especially the smell of peanuts!
