# About

in this notebook, we will compare original llama vs our finetuned llama model on the attribute extraction task

# Imports

In [38]:
from dotenv import load_dotenv
import os

import os
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain import LLMChain
from langchain.llms import Replicate

from langchain.schema import StrOutputParser
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache
from langchain.callbacks import get_openai_callback

from langchain.prompts import (
    ChatPromptTemplate,
    FewShotChatMessagePromptTemplate,
)
from langchain.output_parsers import CommaSeparatedListOutputParser
import tiktoken
from langchain.llms import CTransformers
from langchain.llms.huggingface_pipeline import HuggingFacePipeline 
from langchain.llms import HuggingFaceHub
from pydantic import BaseModel, Field


from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from typing import Optional
from typing_extensions import Annotated
from enum import Enum
from typing import Union
import pandas as pd
import rich
import replicate

In [2]:
set_llm_cache(SQLiteCache(database_path=".langchain.db"))


In [3]:
load_dotenv("../.env") 


True

In [4]:
class Gender(str, Enum):
    male = 'male'
    female = 'female'
    other = 'other'
    not_given = 'not_given'


class ProductUnderstanding(BaseModel):
    brand: Optional[str] = Field(description="brand", )
    gender: Annotated[Union[Gender, None], Field(alias='Gender')] = None

    #gender: Optional[str] = Field(description="gender")
    product_type: Optional[str] = Field(description="product_type")
    color: Optional[str] = Field(description="color")
    size: Optional[str] = Field(description="size")

In [50]:
prompt_template = """
Extract attributes from the given e-commerce customer query.
Possible attributes are 'product type', 'brand', 'gender', 'color', 'size'.

Input:
{query} 

Output:
"""

In [51]:
parser = PydanticOutputParser(pydantic_object=ProductUnderstanding)


In [52]:
prompt = PromptTemplate(template=prompt_template, input_variables=["query"],
                                 #partial_variables={"format_instructions":  parser.get_format_instructions()}
                                 )


In [8]:
print ( prompt.format(query = "nike men shoes" ) )



Extract attributes from the given e-commerce customer query.
Possible attributes are 'product type', 'brand', 'gender', 'color', 'size'.

Input:
nike men shoes 

Output:





In [9]:
sample_product_title = "nike black men shoes"


## Gpt 4

In [10]:
llm_openai = ChatOpenAI(model="gpt-4")


In [11]:
#sample_product_title = "MOERDENG Men's Waterproof Ski Jacket Warm Winter Snow Coat Mountain Windbreaker Hooded Raincoat"

chain = prompt | llm_openai 

output= chain.invoke({"query": sample_product_title})

In [12]:
output

AIMessage(content="'product type': shoes,\n'brand': nike,\n'gender': men,\n'color': black")

In [58]:
llm_llama7b = Replicate(
    model="meta/llama-2-7b:77dde5d6c56598691b9008f7d123a18d98f40e4b4978f8a72215ebfc2553ddd8",
    verbose=True
)


llm_llama7b_chat = Replicate(
    model="meta/llama-2-7b-chat:13c3cdee13ee059ab779f0291d29054dab00a47dad8261375654de5540165fb0",
    verbose=True
)



llm_llama7b_finetuned = Replicate(
    model="npatta01/llama2-ecommerce-st:caf9251ac6bff5756b7e33d361af4def27aeded696bbaa45e1cc690b849c13ea",
    verbose=True,
     model_kwargs={"temperature": 0.75, "max_new_tokens": 128, "top_p": 0.9, "min_new_tokens": -1, "top_k": 50 },
)



gpt4

In [59]:
chain = prompt | llm_openai 

print ( chain.invoke({"query": sample_product_title}) )

content="'product type': shoes, 'brand': nike, 'gender': men, 'color': black"


llama 7b

In [60]:
chain = prompt | llm_llama7b 

print ( chain.invoke({"query": sample_product_title}) )

{'type': 'product_type', 'brand': 'nike', 'gender': 'men', 'color': 'black'}
"""
def extract(customerQuery):
   """Returns dictionary of extracted attributes."""
   
   # Extract product type, brand and gender
   if "nike" in customerQuery or "adidas" in customerQuery \
       or "puma" in customerQuery or "vans" in customerQuery \
       or "converse" in customerQuery:
       return {'type': 'product_type', 'brand': 'shoes


7b chat

In [61]:
chain = prompt | llm_llama7b_chat 

print ( chain.invoke({"query": sample_product_title}) )

Great, I'd be happy to help! Based on the given query "nike black men shoes", here are some attributes that can be extracted:

1. Product type: Shoes
2. Brand: Nike
3. Gender: Men
4. Color: Black
5. Size: None specified (as it is not mentioned in the query)

Is there anything else you would like me to assist you with?


7b finetuned model

In [93]:
# chain = prompt | llm_llama7b_finetuned 

# print ( chain.invoke({"query": sample_product_title}) )

In [94]:
# print ( prompt.format(query= sample_product_title) )

In [None]:

# output = replicate.run(
#   "npatta01/llama2-ecommerce-st:caf9251ac6bff5756b7e33d361af4def27aeded696bbaa45e1cc690b849c13ea",
#   input={
#     "debug": False,
#     "top_k": 50,
#     "top_p": 0.9,
#     "prompt": prompt.format(query= sample_product_title),
#     "temperature": 0.75,
#     "max_new_tokens": 128,
#     "min_new_tokens": -1
#   }
# )
# print("".join(list(output) ) )

![stuff](assets/finetuned_llm.jpg)
