### Necessary Imports & Declarations

In [1]:
from typing import Optional, TypeVar, Type

import instructor
from pydantic import BaseModel, Field
from openai import OpenAI, AsyncOpenAI
from langchain.schema.messages import BaseMessage
from langchain.schema.runnable import RunnableLambda
from langchain.adapters.openai import convert_message_to_dict

T = TypeVar('T', bound=BaseModel)

### Function to create an instructor integrated `runnable`

> Supports OpenAI compatible API. [Ollama, vLLM, ...]

In [2]:
def build_messages_map(__prompt_or_messages: str | list[BaseMessage]) -> list[dict[str, str]]:
    """Build OpenAI API compatible messages."""
    if isinstance(__prompt_or_messages, str):
        messages = [{'role': 'user', 'content': __prompt_or_messages}]
    else:
        messages = [convert_message_to_dict(msg) for msg in __prompt_or_messages]

    return {'messages': messages}

def create_instructor_runnable(
    model: str,
    response_model: Type[T],
    base_url: Optional[str] = None,
    api_key: Optional[str] = None,
    mode: Optional[instructor.Mode] = None,
    **kwargs
) -> RunnableLambda[str | list[BaseMessage], T]: 
    """
    Create an instructor integrated langchain runnable.
    @param model: Model to use.
    @param response_model: pydantic class to format output.
    @param base_url: Base URL to get chat completions.
    @param api_key: API key to use for requesting base URL.
    @param mode: instructor mode to use. [default: JSON]
    @param kwargs: Extra kwargs to pass to create method.
    """
    mode = mode or instructor.Mode.JSON
    client = instructor.patch(
        OpenAI(base_url=base_url, api_key=api_key), mode=mode)
    aclient = instructor.patch(
        AsyncOpenAI(base_url=base_url, api_key=api_key), mode=mode)

    load_kwargs = (
        lambda x: build_messages_map(x) 
        | kwargs 
        | {'model': model, 'response_model': response_model}
    )

    def func(__arg):
        return client.chat.completions.create(**load_kwargs(__arg))
    
    async def afunc(__arg):
        return await aclient.chat.completions.create(**load_kwargs(__arg))

    return RunnableLambda(func=func, afunc=afunc)

### Usage

- Create a Response Model that represents how the final output should be.
- Create a instructor runnable 
- Invoke the runnable

In [3]:
class Character(BaseModel):
    name: str
    age: int
    fact: str = Field(..., description="A fact about the character")

In [4]:
llm = create_instructor_runnable(
    model='llama2', 
    response_model=Character, 
    base_url="http://localhost:11434/v1", # Ollama default URL
    api_key="ollama" # not required, you can leave it empty
)

In [5]:
llm.invoke('Tell me about Elon Musk.')

Character(name='Elon Musk', age=49, fact='He is a billionaire entrepreneur and inventor known for his innovative ideas and technological advancements in the fields of electric cars, space exploration, and renewable energy.')

In [6]:
from langchain.schema.messages import HumanMessage
await llm.ainvoke([HumanMessage(content='Who is Ichigo Kurosaki.')])

Character(name='Ichigo Kurosaki', age=150, fact='He can transform into a Shinigami and play the drum.')