## About
This notebook contains examples of named entity recognition (NER) for products and queries with few-shot learning

In [75]:
import os
from langchain.chat_models import ChatOpenAI, ChatGooglePalm
from langchain.prompts import ChatPromptTemplate
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain import LLMChain
from langchain.llms import Replicate

from langchain.schema import StrOutputParser
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache
from langchain.callbacks import get_openai_callback

from langchain.prompts import (
    ChatPromptTemplate,
    FewShotChatMessagePromptTemplate,
)
from langchain.output_parsers import CommaSeparatedListOutputParser
from dotenv import load_dotenv


In [76]:
set_llm_cache(SQLiteCache(database_path=".langchain.db"))


In [77]:
load_dotenv("../.env") 


True

In [78]:
llm_openai = ChatOpenAI(model="gpt-4")


In [79]:
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from typing import Optional
from typing_extensions import Annotated
from enum import Enum
from typing import Union
import pandas as pd

## Output Formatting
- Defining possible values for entities to LLM (gender)
- Providing output schema

In [80]:
class Gender(str, Enum):
    male = 'male'
    female = 'female'
    unisex = 'unisex'
    other = 'other'
    not_given = 'not_given'


class ProductUnderstanding(BaseModel):
    brand: Optional[str] = Field(description="brand", )
    gender: Annotated[Union[Gender, None], Field(alias='Gender')] = None

    #gender: Optional[str] = Field(description="gender")
    product_type: Optional[str] = Field(description="product_type")
    color: Optional[str] = Field(description="color")
    size: Optional[str] = Field(description="size")

In [81]:
parser = PydanticOutputParser(pydantic_object=ProductUnderstanding)


## Prompt
A few shot prompt for extracting entities

In [82]:
template = """
You are e-commerce expert. Your task is to extract the attributes from customer query. 
Possible attributes are "product type", "brand", "gender", "color", "size".

Few shot Examples:

Input:
Query: yellow 35 inch baseball bat
Output: 
brand: None
gender: None
product_type: baseball bats
color: black
size: 35 inch

Input:   
Query: MOERDENG Men's Waterproof Ski Jacket Warm Winter Snow Coat Mountain Windbreaker Hooded Raincoat
Output: 
brand: MOERDENG
gender: Men
product_type: Ski Jacket
color: None
size: None

Input:   
Query: {query}
Output: 



Format instructions:
{format_instructions}
Answer: """

In [83]:
prompt = PromptTemplate(template=template, input_variables=["query"],
                                 partial_variables={"format_instructions":  parser.get_format_instructions()}
                                 )


In [84]:
print ( prompt.format(query = "nike men shoes" ) )



You are e-commerce expert. Your task is to extract the attributes from customer query. 
Possible attributes are "product type", "brand", "gender", "color", "size".

Few shot Examples:

Input:
Query: yellow 35 inch baseball bat
Output: 
brand: None
gender: None
product_type: baseball bats
color: black
size: 35 inch

Input:   
Query: MOERDENG Men's Waterproof Ski Jacket Warm Winter Snow Coat Mountain Windbreaker Hooded Raincoat
Output: 
brand: MOERDENG
gender: Men
product_type: Ski Jacket
color: None
size: None

Input:   
Query: nike men shoes
Output: 



Format instructions:
The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

## Example

In [85]:
#sample_product_title = "nike men shoes in green"
sample_product_title = "MOERDENG Men's Waterproof Ski Jacket Warm Winter Snow Coat Mountain Windbreaker Hooded Raincoat"

chain = prompt | llm_openai 

output= chain.invoke({"query": sample_product_title})

In [86]:
output

AIMessage(content='{"brand": "MOERDENG", "Gender": "male", "product_type": "Ski Jacket", "color": null, "size": null}')

In [87]:
product_info = parser.parse(output.content)
product_info

ProductUnderstanding(brand='MOERDENG', gender=<Gender.male: 'male'>, product_type='Ski Jacket', color=None, size=None)

In [88]:
product_info.gender

<Gender.male: 'male'>