In [6]:
!pip install -U langchain
!pip install -U "langchain[openai]"
# Requires Python 3.10+



In [7]:
import os
from langchain.chat_models import init_chat_model
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

model = init_chat_model("gpt-4.1", api_key=OPENAI_API_KEY)



In [9]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4.1", temperature=0)
parser = StrOutputParser()

# ------------------ Agent 1: ANALYSIS ------------------
analysis_prompt = PromptTemplate(
    input_variables=["title", "desc1", "desc2", "desc3"],
    template="""
You are an information extraction assistant.

Product:
- Title: {title}

Descriptions:
1. {desc1}
2. {desc2}
3. {desc3}

Your tasks:
1. Identify all common details that appear in at least 2 descriptions or are clearly implied by the title.
2. Identify unique useful details that appear in only 1 description.
3. Identify all contradictions, including:
   - conflicts between descriptions,
   - conflicts between any description and the product title.

Return output in EXACTLY this JSON format:

{{
  "common_details": [...],
  "unique_details": {{
    "desc1": [...],
    "desc2": [...],
    "desc3": [...]
  }},
  "contradictions": [...]
}}

Rules:
- Do NOT invent facts.
- Only use information explicitly supported by the title or the descriptions.
- Keep each detail short and factual.
"""
)


analysis_chain = analysis_prompt | llm | parser


# ------------------ Agent 2: REWRITE ------------------
rewrite_prompt = PromptTemplate(
    input_variables=["title", "analysis"],
    template="""
You are a product description rewriting assistant.

Product title:
{title}

Here is a JSON analysis of this product based on three descriptions:
{analysis}

Write ONE unified product description (2–3 sentences) that:
- is clearly about the product in the title,
- keeps all correct and important details from the analysis,
- removes redundancy and contradictions,
- does NOT invent new facts.

Requirements:
- Use the exact product name from the title at the beginning of the description.
- Make the text sound natural and helpful for an online product page.

Return ONLY the fused description as plain text.
"""
)
rewrite_chain = rewrite_prompt | llm | parser


# ------------------ Agent 3: REFLECTION ------------------
reflect_prompt = PromptTemplate(
    input_variables=["title", "draft", "desc1", "desc2", "desc3"],
    template="""
You are a hallucination-detection assistant.

Product title:
{title}

Fused description to verify:
{draft}

Original descriptions:
[1] {desc1}
[2] {desc2}
[3] {desc3}

Your tasks:
1. Check whether the fused description contains any detail that is NOT supported by:
   - the product title, or
   - any of the three original descriptions.
2. Remove or correct any such hallucinated or conflicting details.
3. Keep all correct, useful details.
4. Rewrite the final version in 2–3 fluent sentences.

Rules:
- The description must clearly match the product in the title.
- Do NOT add new information beyond what the title and descriptions support.
- Return ONLY the final corrected description as plain text.
"""
)
reflection_chain = reflect_prompt | llm | parser


# ------------------ FULL PIPELINE (LCEL Version 2) ------------------
fusion_pipeline = (
    {
        "analysis": analysis_chain,
        "title": lambda x: x["title"],
        "desc1": lambda x: x["desc1"],
        "desc2": lambda x: x["desc2"],
        "desc3": lambda x: x["desc3"],
    }
    |
    {
        "draft": rewrite_chain,
        "title": lambda x: x["title"],
        "desc1": lambda x: x["desc1"],
        "desc2": lambda x: x["desc2"],
        "desc3": lambda x: x["desc3"],
    }
    |
    reflection_chain
)


In [3]:
import json

# Path to your dataset
json_path = "/home/hoangnam/fusion/amazon_baby_model_qwen_type_sample_descriptions.json"

# Load JSON
with open(json_path, "r") as f:
    data = json.load(f)

# Select the first item
item = data[0]

# Extract fields
title = item["title"]
desc1 = item["image_based_desc"]
desc2 = item["image_title_based_desc"]
desc3 = item["llm_based_desc"]

print("=== INPUT CHECK ===")
print("TITLE:", title)
print("DESC1:", desc1[:120], "...")
print("DESC2:", desc2[:120], "...")
print("DESC3:", desc3[:120], "...")
print("=" * 80)

result1 = analysis_chain.invoke({
    "title": title,
    "desc1": desc1,
    "desc2": desc2,
    "desc3": desc3
})
print(result1)

=== INPUT CHECK ===
TITLE: Arm's Reach Original Co-sleeper 100% Cotton White Sheet
DESC1: The product is a white rectangular object with a textured surface and a smooth, clean design. It appears to be a piece o ...
DESC2: Arm's Reach Original Co-sleeper 100% Cotton White Sheet is a soft and comfortable bedding option designed to provide a c ...
DESC3: Arm's Reach Original Co-sleeper 100% Cotton White Sheet is a soft and breathable bedding option designed for comfort and ...
{
  "common_details": [
    "Product is a sheet",
    "Designed for Arm's Reach Original Co-sleeper",
    "Made from 100% cotton",
    "White color",
    "Soft and comfortable",
    "Durable",
    "Breathable",
    "Suitable for babies or infants",
    "Provides a clean and neutral appearance"
  ],
  "unique_details": {
    "desc1": [
      "Described as a white rectangular object with a textured surface",
      "Described as possibly a piece of furniture or decor",
      "Minimalist aesthetic"
    ],
    "desc2": [

In [4]:
rewrite_result = rewrite_chain.invoke({
    "title": title,
    "analysis": result1  # truyền nguyên JSON string này vào prompt của Agent 2
})

print("\n=== AGENT 2 (REWRITE) ===")
print(rewrite_result)



=== AGENT 2 (REWRITE) ===
Arm's Reach Original Co-sleeper 100% Cotton White Sheet is a soft, breathable, and durable sheet designed specifically for the Arm's Reach Original Co-sleeper. Made from 100% cotton in a clean white color, it provides a gentle, comfortable surface for babies and infants, making it a versatile and cozy choice for any nursery.


In [5]:
reflection_result = reflection_chain.invoke({
    "title": title,
    "draft": rewrite_result,
    "desc1": desc1,
    "desc2": desc2,
    "desc3": desc3
})

print("=== AGENT 3 (REFLECTION) ===")
print(reflection_result)


=== AGENT 3 (REFLECTION) ===
Arm's Reach Original Co-sleeper 100% Cotton White Sheet is a soft, breathable, and durable sheet made from 100% cotton in a clean white color. Designed specifically for the Arm's Reach Original Co-sleeper, it provides a gentle and comfortable surface for babies, making it a versatile choice for any nursery.


In [10]:
sample = {
    "asin": "TEST-HARD-001",
    "title": "EcoSip Kids Stainless Steel Water Bottle 12oz with Straw Lid - Dinosaur Print",
    "image_based_desc": "The product appears to be a colorful plastic tumbler with a glossy finish and a screw-on lid. It has a tall, slim shape and a wraparound space-themed pattern with rockets and stars. The lid looks solid without any visible straw, suggesting it is a regular screw-top cup rather than a bottle with a straw. The overall design seems more suitable for older kids or adults on the go.",
    "image_title_based_desc": "EcoSip Kids Stainless Steel Water Bottle 12oz with Straw Lid - Dinosaur Print is a compact, kid-friendly bottle designed for everyday use. Made from BPA-free stainless steel, it features double-wall vacuum insulation to keep drinks cold for up to 12 hours. The flip-up straw lid is easy for children to use, and the fun dinosaur print encourages kids to stay hydrated. The bottle is recommended for cold drinks only and is best cleaned by hand washing to preserve the print and insulation.",
    "llm_based_desc": "EcoSip Kids Stainless Steel Water Bottle 12oz with Straw Lid - Dinosaur Print is a versatile insulated bottle for children, offering an 18oz capacity that works well for both hot and cold beverages. It includes two interchangeable lids: a straw lid for everyday use and a wide-mouth chug lid for sports or quick drinking. The dishwasher-safe design and durable construction make it easy for parents to clean and maintain, and the playful unicorn and dinosaur pattern helps it stand out in school or daycare. Some versions of this bottle are sold as a set with matching lunchbox and sticker sheet."
}

title = sample["title"]
desc1 = sample["image_based_desc"]
desc2 = sample["image_title_based_desc"]
desc3 = sample["llm_based_desc"]


In [11]:
# Agent 1
analysis_result = analysis_chain.invoke({
    "title": title,
    "desc1": desc1,
    "desc2": desc2,
    "desc3": desc3
})
print("=== AGENT 1 ===")
print(analysis_result)

# Agent 2
rewrite_result = rewrite_chain.invoke({
    "title": title,
    "analysis": analysis_result
})
print("\n=== AGENT 2 ===")
print(rewrite_result)

# Agent 3
reflection_result = reflection_chain.invoke({
    "title": title,
    "draft": rewrite_result,
    "desc1": desc1,
    "desc2": desc2,
    "desc3": desc3
})
print("\n=== AGENT 3 ===")
print(reflection_result)


=== AGENT 1 ===
{
  "common_details": [
    "Product is a kids' water bottle",
    "Stainless steel construction",
    "12oz capacity",
    "Straw lid",
    "Dinosaur print",
    "Designed for children"
  ],
  "unique_details": {
    "desc1": [
      "Colorful plastic tumbler",
      "Glossy finish",
      "Screw-on lid without visible straw",
      "Tall, slim shape",
      "Space-themed pattern with rockets and stars",
      "Design suitable for older kids or adults"
    ],
    "desc2": [
      "BPA-free stainless steel",
      "Double-wall vacuum insulation",
      "Keeps drinks cold for up to 12 hours",
      "Flip-up straw lid",
      "Encourages kids to stay hydrated",
      "Recommended for cold drinks only",
      "Hand wash to preserve print and insulation"
    ],
    "desc3": [
      "18oz capacity",
      "Works for hot and cold beverages",
      "Two interchangeable lids: straw lid and wide-mouth chug lid",
      "Dishwasher-safe",
      "Durable construction",
      "Playf

In [12]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model="gpt-4.1", temperature=0)
parser = StrOutputParser()

baseline_prompt = PromptTemplate(
    input_variables=["title", "desc1", "desc2", "desc3"],
    template="""
You are a product description writer.

Write one short product description (2–3 sentences) for the product below.

Title:
{title}

Descriptions from different sources:
1. {desc1}
2. {desc2}
3. {desc3}

Write a single fused description that combines the information above naturally.

Do NOT explain. Return only the description.
"""
)

baseline_chain = baseline_prompt | llm | parser

baseline_output = baseline_chain.invoke({
    "title": title,
    "desc1": desc1,
    "desc2": desc2,
    "desc3": desc3
})

print("=== BASELINE OUTPUT ===")
print(baseline_output)


=== BASELINE OUTPUT ===
Keep your child hydrated in style with the EcoSip Kids Stainless Steel Water Bottle 12oz with Straw Lid – Dinosaur Print. This BPA-free, double-wall insulated bottle keeps drinks cold for up to 12 hours and features a fun dinosaur design kids will love. The easy-to-use flip-up straw lid and compact, durable construction make it perfect for everyday adventures, while hand washing helps preserve its vibrant print.
