# TypedDict

In [1]:
from typing import Literal, Optional, Tuple
from typing_extensions import TypedDict

series_unique = ['type A', 'type B', 'type C']
color_unique = ['ม่วง','คราม','น้ำเงิน','เขียว','เหลือง','แสด','แดง']
group_unique = ['ครอบโค้ง','หลังคา','ครอบข้าง','ครอบปิดปลาย','กระเบื้อง','ครอบสัน',]

class format_typedict(TypedDict):
    series: Optional[Literal[tuple(series_unique)]]
    color: Optional[Literal[tuple(color_unique)]]
    pgroup: Optional[Literal[tuple(group_unique)]]


# BaseModel

In [2]:
from pydantic import BaseModel, Field, field_validator

class format_basemodel(BaseModel):
    series: Optional[str] = Field(None, description="Series of product")
    color: Optional[str] = Field(None, description="color of product")
    pgroup: Optional[str] = Field(None, description="group name of product")
    
    @field_validator('series')
    def validate_series(cls, v):
        if v is not None and v not in series_unique:
            raise ValueError(f"Series must be one of {series_unique}")
        return v
    
    @field_validator('color')
    def validate_color(cls, v):
        if v is not None and v not in color_unique:
            raise ValueError(f"Color must be one of {color_unique}")
        return v
    
    @field_validator('pgroup')
    def validate_pgroup(cls, v):
        if v is not None and v not in group_unique:
            raise ValueError(f"Group must be one of {group_unique}")
        return v

# LLM

In [3]:
import os
from operator import itemgetter

from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [4]:
system_prompt = f"""You are a knowledgeable product assistant focus on extracting the series of a product, color of a product, and group of a product.
When responding to queries:
- Focus on the series of product and group of product first.
- the color of product is secondary important.
- You are not forced to respond, if you cannot find the series, color, or group of product.

Product Series: {series_unique}
Product Color: {color_unique}
Product Group: {group_unique}"""

In [5]:
llm = ChatOpenAI(model_name="gpt-4o-mini", openai_api_key=os.getenv("OPENAI_API_KEY"), temperature=0, max_tokens=64)
chain_typedict = ChatPromptTemplate.from_messages([("system", system_prompt),
                                                   ("human", "{input}")]) | llm.with_structured_output(format_typedict)
chain_basemodel = ChatPromptTemplate.from_messages([("system", system_prompt),
                                                    ("human", "{input}")]) | llm.with_structured_output(format_basemodel)

# Test

In [6]:
testing_data = ['กระเบื้อง type A สีแดง', 'หลังคาสีม่วง', 'มีหลังคาเท่าไหร่']

In [8]:
for i in testing_data:
    print(i)
    print(chain_typedict.invoke({'input': i}))
    print(chain_basemodel.invoke({'input': i}))
    print('---')

กระเบื้อง type A สีแดง
{'series': 'type A', 'color': 'แดง', 'pgroup': 'กระเบื้อง'}
series='type A' color='แดง' pgroup='กระเบื้อง'
---
หลังคาสีม่วง
{'series': 'type B', 'color': 'ม่วง', 'pgroup': 'หลังคา'}
series='type B' color='ม่วง' pgroup='หลังคา'
---
มีหลังคาเท่าไหร่
{'series': 'type A', 'color': 'ม่วง', 'pgroup': 'หลังคา'}
series='type B' color=None pgroup='หลังคา'
---


just for response format, do not concern with the responses. Lol

# Response in the List

In [10]:
from typing import Optional, TypedDict, List
from pydantic import BaseModel, Field, field_validator

# TypeDict
class format_typedict_list(TypedDict): # ress
    series: Optional[List[str]]
    color: Optional[List[str]]
    pgroup: Optional[List[str]]

# BaseModel
class format_basemodel_list(BaseModel): # res7
    series: Optional[List[str]] = Field(None, description="Series of product")
    color: Optional[List[str]] = Field(None, description="color of product")
    pgroup: Optional[List[str]] = Field(None, description="group name of product")

    @field_validator('series')
    def validate_series(cls, v):
        if v is not None:
            for item in v:
                if item not in series_unique:
                    raise ValueError(f"{item} must be one of {series_unique}")
        return v
    @field_validator('color')
    def validate_color(cls, v):
        if v is not None:
            for item in v:
                if item not in color_unique:
                    raise ValueError(f"{item} must be one of {color_unique}")
        return v
    @field_validator('pgroup')
    def validate_pgroup(cls, v):
        if v is not None:
            for item in v:
                if item not in group_unique:
                    raise ValueError(f"{item} must be one of {group_unique}")
        return v


In [13]:
chain_typedict_list = ChatPromptTemplate.from_messages([("system", system_prompt),
                                                        ("human", "{input}")]) | llm.with_structured_output(format_typedict_list)
chain_basemodel_list = ChatPromptTemplate.from_messages([("system", system_prompt),
                                                        ("human", "{input}")]) | llm.with_structured_output(format_basemodel_list)

In [15]:
testing_data_multiple = ['กระเบื้อง type A กับ Type C สีแดง', 'หลังคาสีม่วงกับสีแดง', 'มีหลังคาและกระเบื้องเท่าไหร่']
for i in testing_data_multiple:
    print(i)
    print(chain_typedict_list.invoke({'input': i}))
    print(chain_basemodel_list.invoke({'input': i}))
    print('---')


กระเบื้อง type A กับ Type C สีแดง
{'series': ['type A', 'type C'], 'color': ['แดง'], 'pgroup': ['กระเบื้อง']}
series=['type A', 'type C'] color=['แดง'] pgroup=['กระเบื้อง']
---
หลังคาสีม่วงกับสีแดง
{'series': ['type A', 'type B', 'type C'], 'color': ['ม่วง', 'แดง'], 'pgroup': ['หลังคา']}
series=['type B'] color=['ม่วง', 'แดง'] pgroup=['หลังคา']
---
มีหลังคาและกระเบื้องเท่าไหร่
{'series': ['type A', 'type B', 'type C'], 'color': [], 'pgroup': ['หลังคา', 'กระเบื้อง']}
series=None color=None pgroup=['หลังคา', 'กระเบื้อง']
---
