In [None]:
from guidance import models, system, user, gen, assistant

# load a model (could be Transformers, LlamaCpp, VertexAI, OpenAI...)
# llama2 = models.OpenAI("gpt-4o")
# microsoft/Phi-3-mini-4k-instruct
llama2 = models.Transformers("WizardLM/WizardMath-7B-V1.1")

with system():
    lm = llama2 + "You only speak in ALL CAPS."

with user():
    lm += "What is the captial of Greenland?"

with assistant():
    lm += gen('answer', max_tokens=20)


In [3]:
lm["answer"]

'THE CAPITAL OF GREENLAND IS NUUK.'

In [25]:
llama2 = models.OpenAI("gpt-4o")

txt_input = """
["284367: L'Oreal Paris Matte Signature Eyeliner 11ml. Black <p>Matt signature eyeliner will help to sharpen your eyeliner girls. Not to be difficult and time consuming anymore. The special design allows long handle grip. Formula ecstasy and Matt Black paint recipe sharp flick the stick waterproof long-lasting all day.</p><p><img src=https://www.watsons.co.th/medias/New-Project.jpg?context=bWFzdGVyfHd0Y3RoL2ltYWdlc3w0OTg3OTJ8aW1hZ2UvanBlZ3xoZTAvaGZhLzExNjkxNTUzMDMwMTc0L05ldyBQcm9qZWN0LmpwZ3xkMDQ4YmJhZGNjMjBlZDE4MzUxOTUwYzJmZTJhYjljZjkyZjBmOGRjMTBiNTVjY2M3OTQ4NzA0NjE4NGY2NTU4 /></p>", 
'283906: Catrice Calligraph Pro Precise 24h Matt Liner Waterproof 010 1.2 ml 97 percent agree: Intense black strong coverage and high precision application. This waterproof eyeliner is an all-rounder. Accurate lines? No problem thanks to the ultra-fine tip. The pen glides over the eyelid the line is perfectly in place - for up to 24 hours. The extremely intensive colour payoff is also visible over eyeshadow and make-up.', 
'284839: Browit Smooth And Slim Inner Eyeliner 0.1g. Deep Brown This pressed cream eyeliner provides smooth and precise lines with its slim tip of only 2 mm and pigment-rich color as well as being waterproof and sweat-proof. In addition this item also comes with an automatic twist-up design to facilitate the application that is specially created by Nongchat a renowned and professional makeup artist in Thailand.', 
'295492: In2it Slim Perfect Gel Liner 0.15g. 02 Brunette In2it Slim Perfect Gel Liner 0.15g. Eyeliner Pencil Bestselling Eyeliner Pencil Eyeliner pencils are good gel liners write eyeliner. Gel line sells well gel liner is good in liner in liner sells well in liner is good the soil is written the eyeliner is waterproof. Eyeliner is not messy waterproof gel liner waterproof eyeliner no gel liner no mess no panda no panda eyeliner. Brown Eyeliner Pencil Brown gel liner Beautiful lines crisp easy to write easy to carry gel liner eyeliner waterproof waterproof gel liner.', 
'275447: Canmake Creamy Touch Eye Liner 0.08g.01 Deep Black You’ll adore the tantalizingly smooth texture!1.5mm hyper-slim gel eyelinerWaterproofResistant to water sweat tears and sebumContains beautifying ingredients']
"""

txt_input = """
["284367: L'Oreal Paris Matte Signature Eyeliner 11ml. Black <p>Matt signature eyeliner will help to sharpen your eyeliner girls. Not to be difficult and time consuming anymore. The special design allows long handle grip. Formula ecstasy and Matt Black paint recipe sharp flick the stick waterproof long-lasting all day.</p><p><img src=https://www.watsons.co.th/medias/New-Project.jpg?context=bWFzdGVyfHd0Y3RoL2ltYWdlc3w0OTg3OTJ8aW1hZ2UvanBlZ3xoZTAvaGZhLzExNjkxNTUzMDMwMTc0L05ldyBQcm9qZWN0LmpwZ3xkMDQ4YmJhZGNjMjBlZDE4MzUxOTUwYzJmZTJhYjljZjkyZjBmOGRjMTBiNTVjY2M3OTQ4NzA0NjE4NGY2NTU4 /></p>"]
"""

# Use string formatting to pass the txt_input into the {text} placeholder
extract_prompt = f"""
  <|begin_of_text|><|start_header_id|>system<|end_header_id|>
  <|eot_id|><|start_header_id|>user<|end_header_id|>

  **Task**: Extract product information from a given text.

  **Input**:
  {txt_input}

  **Instructions**:
  1. Extract the unique product key from the input text, which is typically a 6/7 digit number before the colon mark.
  2. Identify the product type with considering the state of matter (e.g., shampoo, soap, hand wash, shower gel, laundry capsules, etc.) that is mentioned in the textual description.
  3. If the product type cannot be identified or is unknown, return 'N/A'.
  4. Output the answer in JSON format, with the product key and product type as key-value pairs.

  **Output Format**:
  {{"product_key": "product_type"}}

  **Example Output**:
  {{"123456": "shampoo", "654321": "laundry capsules"}}

  **Important**:
  * If you don't know the answer, please don't share false information.
  * All output must be in valid JSON format.
  * Don't add explanations beyond the JSON output.
  * Don't repeat the instruction or the JSON output more than once.

  Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

# Create the interaction using guidance
with system():
    # Initialize the interaction with system and the model
    lm = llama2 + extract_prompt

with assistant():
    # Generate the structured response
    lm += gen('answer', max_tokens=100)

In [26]:
import json

# Print the final output
result_cleaned = lm["answer"]
result_cleaned = json.loads(result_cleaned)
print(result_cleaned)
print(type(result_cleaned))

{'284367': 'eyeliner'}
<class 'dict'>


In [27]:
import pandas as pd

# Assuming result_cleaned is the response
if "product_key" in result_cleaned and "product_type" in result_cleaned:
    # Case where the result contains only one product_key and product_type
    pd_data = pd.DataFrame([result_cleaned])
else:
    # Case where the result contains multiple product_key and product_type pairs
    pd_data = pd.DataFrame.from_dict(result_cleaned, orient="index").reset_index()
    pd_data.columns = ["product_key", "product_type"]

pd_data    

Unnamed: 0,product_key,product_type
0,284367,eyeliner
