In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_huggingface import HuggingFacePipeline

model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
max_new_token = 1024
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype = torch.float16, device_map=None)
model_pipeline = pipeline(
    "text-generation",
    model = model,
    tokenizer = tokenizer,
    pad_token_id = tokenizer.eos_token_id,
    device = -1
)
llm = HuggingFacePipeline(
    pipeline = model_pipeline
)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.04it/s]
Device set to use cpu


1. Json Parser

In [3]:
from pydantic import BaseModel, Field

class Joke(BaseModel):
    setup: str = Field(description = "question to set up a joke")
    punchline:str = Field(description = "answer to resolve a joke")

In [4]:
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field

class Joke(BaseModel):
    setup: str = Field(description = "question to set up a joke")
    punchline: str = Field(description = "answer to resolve a joke")

parser = JsonOutputParser(pydantic_object = Joke)


2. Prompt Template

In [5]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template = "Answer user query.\n{format_instructions}\n{query}\n",
    input_variables = ["query"],
    partial_variables = {"format_instructions": parser.get_format_instructions()}
)


3. Chain

In [6]:
chain = prompt | llm | parser

joke_query = "Tell me a fun joke"
output = chain.invoke({"query": joke_query})
print(output)


{'properties': {'setup': {'description': 'question to set up a joke', 'title': 'Setup', 'type': 'string'}, 'punchline': {'description': 'answer to resolve a joke', 'title': 'Punchline', 'type': 'string'}}, 'required': ['setup', 'punchline']}
