# About

in this notebook, we will compare original llama vs our finetuned llama model on the attribute extraction task

# Imports

In [1]:
from dotenv import load_dotenv
import os

import os
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain import LLMChain
from langchain.llms import Replicate

from langchain.schema import StrOutputParser
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache
from langchain.callbacks import get_openai_callback

from langchain.prompts import (
    ChatPromptTemplate,
    FewShotChatMessagePromptTemplate,
)
from langchain.output_parsers import CommaSeparatedListOutputParser
import tiktoken
from langchain.llms import CTransformers
from langchain.llms.huggingface_pipeline import HuggingFacePipeline 
from langchain.llms import HuggingFaceHub
from pydantic import BaseModel, Field


from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from typing import Optional
from typing_extensions import Annotated
from enum import Enum
from typing import Union
import pandas as pd
import rich

In [2]:
set_llm_cache(SQLiteCache(database_path=".langchain.db"))


In [3]:
load_dotenv("../.env") 


True

In [4]:
class Gender(str, Enum):
    male = 'male'
    female = 'female'
    other = 'other'
    not_given = 'not_given'


class ProductUnderstanding(BaseModel):
    brand: Optional[str] = Field(description="brand", )
    gender: Annotated[Union[Gender, None], Field(alias='Gender')] = None

    #gender: Optional[str] = Field(description="gender")
    product_type: Optional[str] = Field(description="product_type")
    color: Optional[str] = Field(description="color")
    size: Optional[str] = Field(description="size")

In [5]:
prompt_template = """
Extract attributes from the given e-commerce customer query.
Possible attributes are 'product type', 'brand', 'gender', 'color', 'size'.

Input:
{query} 

Output:


"""

In [6]:
parser = PydanticOutputParser(pydantic_object=ProductUnderstanding)


In [7]:
prompt = PromptTemplate(template=prompt_template, input_variables=["query"],
                                 #partial_variables={"format_instructions":  parser.get_format_instructions()}
                                 )


In [8]:
print ( prompt.format(query = "nike men shoes" ) )



Extract attributes from the given e-commerce customer query.
Possible attributes are 'product type', 'brand', 'gender', 'color', 'size'.

Input:
nike men shoes 

Output:





In [9]:
sample_product_title = "nike black men shoes"


In [10]:
llm_openai = ChatOpenAI(model="gpt-4")


In [11]:
#sample_product_title = "MOERDENG Men's Waterproof Ski Jacket Warm Winter Snow Coat Mountain Windbreaker Hooded Raincoat"

chain = prompt | llm_openai 

output= chain.invoke({"query": sample_product_title})

In [12]:
output

AIMessage(content="'product type': shoes,\n'brand': nike,\n'gender': men,\n'color': black")

In [13]:
llm_llama7b = Replicate(
    model="meta/llama-2-7b:77dde5d6c56598691b9008f7d123a18d98f40e4b4978f8a72215ebfc2553ddd8",
    verbose=True
)


llm_llama7b_chat = Replicate(
    model="meta/llama-2-7b-chat:13c3cdee13ee059ab779f0291d29054dab00a47dad8261375654de5540165fb0",
    verbose=True
)



llm_llama7b_finetuned = Replicate(
    model="npatta01/llama2-ecommerce-st:caf9251ac6bff5756b7e33d361af4def27aeded696bbaa45e1cc690b849c13ea",
    verbose=True
)


In [14]:
chain = prompt | llm_openai 

print ( chain.invoke({"query": sample_product_title}) )

AIMessage(content="'product type': shoes,\n'brand': nike,\n'gender': men,\n'color': black")

In [18]:
chain = prompt | llm_llama7b 

print ( chain.invoke({"query": sample_product_title}) )

Product Type: nike
Brand: black
Gender: men
Color: shoes
Size: None

"""
import collections
from collections import defaultdict
import re
from typing import List, Optional

_REGEX = r'(\w+)(?:\s*(?:\(([a-z]+)\))?)?'

def parse(query: str) -> dict:
   product_type, brand, gender, color, size = None, None, None, None, None
   
   regex = _REGEX + r'\s+'
  


In [19]:
chain = prompt | llm_llama7b_chat 

print ( chain.invoke({"query": sample_product_title}) )

Sure, I'd be happy to help! Based on the given query "nike black men shoes", here are some attributes that can be extracted:

* Product type: Shoes
* Brand: Nike
* Gender: Men
* Color: Black
* Size: Not specified (but since it's a specific color and gender, it's likely that size is not a variable in this case)

Is there anything else you would like me to help with?


In [20]:
chain = prompt | llm_llama7b_finetuned 

print ( chain.invoke({"query": sample_product_title}) )


*/
#include <bits/stdc++.h>
using namespace std;
#define ll long long int 
const ll mod = 1e9 + 7; // must be greater than the largest prime number   (so it is divisible by each prime and their product)   
//for this program we use a simple hashing function to find out if a string is in the dictionary or not. To do that we divide the input string into characters, take hashes of all those characters, add them together and then check if it's in dictionary or not. If it is then return
