In [None]:
!pip install langchain==0.0.353 langchain-community ddgs
!pip install -U ddgs
!pip install -U bitsandbytes

In [None]:
!pip install -U gradio protobuf

### **Segmentation File**

In [3]:
%%writefile segmentation.py

import torch
import numpy as np
from PIL import Image
import cv2
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation, AutoImageProcessor, AutoModelForObjectDetection

# -----------------------------
# Device
# -----------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"

# -----------------------------
# Load SegFormer model
# -----------------------------
seg_processor = SegformerImageProcessor.from_pretrained("mattmdjaga/segformer_b2_clothes")
seg_model = SegformerForSemanticSegmentation.from_pretrained("mattmdjaga/segformer_b2_clothes").to(device).eval()

# -----------------------------
# Load YOLO model for garment detection
# -----------------------------
yolo_processor = AutoImageProcessor.from_pretrained("valentinafeve/yolos-fashionpedia")
yolo_model = AutoModelForObjectDetection.from_pretrained("valentinafeve/yolos-fashionpedia")

# -----------------------------
# Mapping & labels
# -----------------------------
segformer_map = {
    "shirt, blouse": 4,
    "top, t-shirt, sweatshirt": 4,
    "sweater": 4,
    "cardigan": 4,
    "jacket": 4,
    "vest": 4,
    "coat": 7,
    "dress": 7,
    "skirt": 5,
    "pants": 6,
    "shorts": 6,
}

garment_categories = list(segformer_map.keys())
arm_labels = [14, 15]  # left/right arm
sweatshirt_keywords = ["sweatshirt", "hoodie", "hooded", "pullover"]

# -----------------------------
# Functions
# -----------------------------
def detect_garment_label(garment_img_path, threshold=0.4):
    image = Image.open(garment_img_path).convert("RGB")
    inputs = yolo_processor(images=image, return_tensors="pt")
    with torch.no_grad():
        outputs = yolo_model(**inputs)
    target_sizes = torch.tensor([image.size[::-1]])
    results = yolo_processor.post_process_object_detection(outputs, threshold=threshold, target_sizes=target_sizes)[0]
    best_label, best_score = None, 0
    for score, label in zip(results["scores"], results["labels"]):
        class_name = yolo_model.config.id2label[label.item()]
        if class_name in garment_categories and score > best_score:
            best_label = class_name
            best_score = float(score)
    if best_label is None:
        return None, None
    return best_label, segformer_map[best_label]

def segment_user_image(user_img_path):
    img = Image.open(user_img_path).convert("RGB")
    inputs = seg_processor(images=img, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = seg_model(**inputs)
    logits = outputs.logits
    seg = torch.nn.functional.interpolate(
        logits, size=img.size[::-1], mode="bilinear", align_corners=False
    ).argmax(dim=1)[0].cpu().numpy()
    return img, seg

def refine_subcategory(yolo_label, garment_img_path):
    if yolo_label not in ["top, t-shirt, sweatshirt"]:
        return yolo_label
    name = garment_img_path.lower()
    for k in sweatshirt_keywords:
        if k in name:
            return "sweatshirt"
    if "tshirt" in name or "t-shirt" in name:
        return "t-shirt"
    return "top"

def get_cloth_mask(user_img_path, garment_img_path):
    # 1. التعرف على نوع القطعة
    garment_name_raw, seg_label = detect_garment_label(garment_img_path)
    if garment_name_raw is None:
        print("No garment detected.")
        return None, None, None
        
    garment_name = refine_subcategory(garment_name_raw, garment_img_path)
    print(f"Detected: {garment_name} | Label: {seg_label}")
    
    user_img, seg = segment_user_image(user_img_path)
    
    base_mask = (seg == seg_label).astype(np.uint8) * 255
    final_mask = base_mask.copy()
    
    
    long_sleeve_keywords = ["sweater", "cardigan", "jacket", "coat", "hoodie", "sweatshirt", "pullover"]
    
    is_long_sleeve = any(k in garment_name.lower() or k in garment_name_raw.lower() for k in long_sleeve_keywords)

    if seg_label == 4 and is_long_sleeve:
        print("   -> Long sleeve detected: Masking arms.")
        arms_mask = np.zeros_like(seg, dtype=np.uint8)
        for arm_label in arm_labels: # 14 & 15
            current_arm = (seg == arm_label).astype(np.uint8) * 255
            arms_mask = np.maximum(arms_mask, current_arm)
        
        kernel = np.ones((15, 15), np.uint8)
        dilated_arms = cv2.dilate(arms_mask, kernel, iterations=4)
        final_mask = np.maximum(final_mask, dilated_arms)
    
    else:
        print("   -> Short sleeve/Sleeveless: Keeping arms visible.")

    return user_img, Image.fromarray(final_mask), {"garment_name": garment_name}

def prepare_user_image_for_inpainting(original_image, mask_image):
    img_arr = np.array(original_image)
    mask_arr = np.array(mask_image.convert("L"))
    mask_area = mask_arr > 128
    noise = np.random.randint(0, 255, img_arr.shape, dtype=np.uint8)
    modified_img_arr = img_arr.copy()
    modified_img_arr[mask_area] = noise[mask_area]
    return Image.fromarray(modified_img_arr)

Overwriting segmentation.py


### **Inpainting File**

In [None]:
%%writefile inpainting.py
import torch
import gc
from diffusers import AutoPipelineForInpainting, AutoencoderKL
from PIL import Image

class Inpainter:
    def __init__(self, lora_path: str, device="cuda"):
        self.device = device
        self.lora_path = lora_path
        self.lora_scale = 0.4
        self.pipeline = None  

    def _load_pipeline(self):
        if self.pipeline is None:
            vae = AutoencoderKL.from_pretrained(
                "madebyollin/sdxl-vae-fp16-fix",
                torch_dtype=torch.float16
            )
            self.pipeline = AutoPipelineForInpainting.from_pretrained(
                "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
                vae=vae,
                torch_dtype=torch.float16,
                variant="fp16",
                use_safetensors=True
            ).to(self.device)
            # Load IP-Adapter
            self.pipeline.load_ip_adapter(
                "h94/IP-Adapter",
                subfolder="sdxl_models",
                weight_name="ip-adapter_sdxl.bin",
                low_cpu_mem_usage=True
            )
            # Load LoRA weights
            self.pipeline.load_lora_weights(self.lora_path)
            self.pipeline.set_ip_adapter_scale(0.9)

    def _unload_pipeline(self):
        if self.pipeline is not None:
            del self.pipeline
            self.pipeline = None
        
        # clean memory 
        gc.collect()
        torch.cuda.empty_cache()

    def inpaint(self, input_image: Image.Image, mask_image: Image.Image, ip_image: Image.Image, garment_name: str) -> Image.Image:
        self._load_pipeline()
        
        prompt = (
            f"A realistic {garment_name}, worn naturally by the person. "
            "Match the exact garment style, length, and sleeve type from the reference garment image. "
            "High quality cloth texture, correct folds, natural shadows, photorealistic."
        )
        negative_prompt = (
            "different color, incorrect garment, white clothes, deformed body, extra limbs, "
            "artifacts, distortions, unrealistic texture, blurry"
        )
        
        # image_creation
        try:
            result = self.pipeline(
                prompt=prompt,
                negative_prompt=negative_prompt,
                image=input_image,
                mask_image=mask_image,
                ip_adapter_image=ip_image,
                strength=1.0,
                guidance_scale=7.5,
                num_inference_steps=70, 
                cross_attention_kwargs={"scale": self.lora_scale}
            ).images[0]
        except Exception as e:
            raise e
        finally:
            #clean Memory in any case
            self._unload_pipeline()
            
        return result

### Example for try inpainting

In [None]:
from inpainting import Inpainter
from segmentation import get_cloth_mask, prepare_user_image_for_inpainting
from PIL import Image

user_img_path = "/kaggle/input/vton-final/lady (6).jpg"
garment_img_path = "/kaggle/input/vton-final/purple_shirt (1) (1) (1).png"

# Get mask and user image
user_img, final_mask, meta = get_cloth_mask(user_img_path, garment_img_path)
mask_image = final_mask.convert("L")
input_image = prepare_user_image_for_inpainting(user_img, mask_image)
ip_image = Image.open(garment_img_path).convert("RGB")

# Inpaint
inpainter = Inpainter(lora_path="/kaggle/input/vton-final/pytorch_lora_weights (14).safetensors")
result_image = inpainter.inpaint(input_image, mask_image, ip_image, meta["garment_name"])
result_image.save("/kaggle/working/final_tryon.png")

### **Memory Module**

In [None]:
%%writefile memory.py
import os, json

MEMORY_FILE = "vto_memory.json"

def load_memory_data():
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, "r", encoding="utf-8") as f:
            return json.load(f)
    return {}

def save_memory_data(data):
    with open(MEMORY_FILE, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

def add_to_memory(user_input, agent_response, images):
    memory = load_memory_data()
    memory.setdefault("conversations", []).append({
        "user_input": user_input,
        "agent_response": agent_response,
        "images": images
    })
    save_memory_data(memory)


### **Image Search Module**

In [None]:
%%writefile image_search.py
from langchain.tools import Tool
from ddgs import DDGS

def search_images(query: str, max_results: int = 5):
    results = []
    with DDGS() as ddgs:
        for r in ddgs.images(query):
            results.append({"title": r["title"], "url": r["image"]})
            if len(results) >= max_results:
                break
    return results

image_search_tool = Tool(
    name="Image Search",
    func=search_images,
    description="Search images on DuckDuckGo and return top 5 results"
)

### **LLM Module**

In [None]:
%%writefile llm.py
from langchain import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

def init_llm(model_name: str, token: str):
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        load_in_8bit=True,
        use_auth_token=token
    )
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=200,
        do_sample=True,
        temperature=0.7
    )
    return HuggingFacePipeline(pipeline=pipe)

### **Agent Runner Module**

In [None]:
%%writefile agent_runner.py
from langchain.agents import initialize_agent, AgentType
from langchain.memory import ConversationBufferMemory
from .image_search import image_search_tool
from .llm import llm_response
from .memory import add_to_memory

def build_agent(llm):
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    agent = initialize_agent(
        tools=[image_search_tool],
        llm=llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True,
        memory=memory,
        handle_parsing_errors=True
    )
    return agent

def run_agent(llm):
    agent = build_agent(llm)
    print("VTO Agent ready! Type 'exit' to quit.\n")

    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Goodbye!")
            break

        images = image_search_tool.func(user_input)
        try:
            agent_response = agent.run(user_input)
        except Exception as e:
            agent_response = f"Error: {e}"

        print("\nAgent:", agent_response)
        if images:
            print("\nImages found:")
            for i, img in enumerate(images, 1):
                print(f"{i}. {img['title']}: {img['url']}")

        add_to_memory(user_input, agent_response, images)
        print("\nMemory updated!\n")

### **vto Integration Module**

In [None]:
%%writefile vto_integration.py
from PIL import Image
from segmentation import get_cloth_mask, prepare_user_image_for_inpainting
from inpainting import Inpainter

class VTOAgentModule:
    def __init__(self, lora_path: str):
        # Initialize Inpainter
        self.inpainter = Inpainter(lora_path=lora_path)

    def process_tryon(self, user_img_path: str, garment_img_path: str):
        """
        Full VTO pipeline: segmentation → mask preparation → inpainting
        Returns inpainted PIL image
        """
        # get cloth mask and user image
        user_img, final_mask, meta = get_cloth_mask(user_img_path, garment_img_path)
        if user_img is None:
            raise ValueError("No garment detected in the reference image.")

        # Prepare mask and input image for inpainting
        mask_image = final_mask.convert("L")
        input_image = prepare_user_image_for_inpainting(user_img, mask_image)

        # Load IP adapter image
        ip_image = Image.open(garment_img_path).convert("RGB")

        #  Run inpainting
        result_image = self.inpainter.inpaint(
            input_image=input_image,
            mask_image=mask_image,
            ip_image=ip_image,
            garment_name=meta["garment_name"]
        )
        return result_image

### Example on try vto_integration

In [None]:
from vto_integration import VTOAgentModule

#paths
user_img_path = "/kaggle/input/vton-final/lady (6).jpg"
garment_img_path = "/kaggle/input/vton-final/purple_shirt (1) (1) (1).png"
lora_path = "/kaggle/input/vton-final/pytorch_lora_weights (14).safetensors"

# Initialize module
vto_module = VTOAgentModule(lora_path=lora_path)

# Run VTO
result_image = vto_module.process_tryon(user_img_path, garment_img_path)

# Save result
result_image.save("/kaggle/working/final_tryon.png")
print("Try-on result saved!")

### **Agent_vto Module**

In [None]:
%%writefile agent_vto.py
import os
import json
import torch
from PIL import Image
from langchain.memory import ConversationBufferMemory
from langchain.agents import Tool, AgentExecutor, AgentType, initialize_agent
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from ddgs import DDGS
from vto_integration import VTOAgentModule

MEMORY_FILE = "vto_memory.json"

# ===== MEMORY =====
def load_memory_data():
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, "r", encoding="utf-8") as f:
            return json.load(f)
    return {}

def save_memory_data(data):
    with open(MEMORY_FILE, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

def add_to_memory(user_input, agent_response, images):
    memory = load_memory_data()
    memory.setdefault("conversations", []).append({
        "user_input": user_input,
        "agent_response": agent_response,
        "images": images
    })
    save_memory_data(memory)

# ===== SEARCH TOOL =====
def search_images(query: str, max_results: int = 5):
    results = []
    try:
        with DDGS() as ddgs:
            for r in ddgs.images(query):
                results.append({"title": r["title"], "url": r["image"]})
                if len(results) >= max_results:
                    break
    except:
        pass
    return results

image_search_tool = Tool(
    name="Image Search",
    func=search_images,
    description="Search images on DuckDuckGo and return top 5 results"
)

# ===== LLM =====
def build_llm(model_name, token):
    tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)    
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="auto", 
            torch_dtype=torch.float16, 
            token=token
        )
    except:
        # Fallback to CPU if GPU is somehow full (unlikely with lazy loading)
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="cpu",
            token=token
        )
    
    pipe = pipeline(
        "text-generation", 
        model=model, 
        tokenizer=tokenizer, 
        max_new_tokens=200, 
        do_sample=True, 
        temperature=0.7
    )
    return HuggingFacePipeline(pipeline=pipe)

# ===== AGENT =====
def build_agent(llm):
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    agent = initialize_agent(
        tools=[image_search_tool],
        llm=llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        verbose=True,
        memory=memory,
        handle_parsing_errors=True
    )
    return agent

# ===== MAIN CLASS =====
class VTOAgent:
    def __init__(self, llm, lora_path: str):
        self.llm = llm
        self.agent = build_agent(llm)
        # Initialize VTO module (does NOT load heavy model yet)
        self.vto_module = VTOAgentModule(lora_path=lora_path)

    def handle_input(self, user_input: str, user_img_path: str = None, garment_img_path: str = None):
        images = search_images(user_input)
        agent_response = ""

        # --- Try-On Logic ---
        if user_img_path and garment_img_path:
            try:
                # Use the module to manage memory (Load -> Process -> Unload)
                inpainted_img = self.vto_module.process_tryon(user_img_path, garment_img_path)
                
                # Save output
                output_path = "/kaggle/working/final_tryon.png"
                inpainted_img.save(output_path)
                
                agent_response = f"VTO Success! Image saved at {output_path}"
                images.insert(0, {"title": "VTO Result", "url": output_path})
            except Exception as e:
                import traceback
                traceback.print_exc()
                agent_response = f"VTO Error: {e}"
        
        # --- Chat Logic ---
        else:
            try:
                agent_response = self.agent.run(user_input)
            except Exception as e:
                agent_response = f"Agent Error: {e}"
        # Save to memory
        add_to_memory(user_input, agent_response, images)
        return agent_response, images

In [None]:
#updated code of vto_agent
%%writefile vto_agent.py
import os
import json
from PIL import Image
from langchain.memory import ConversationBufferMemory
from langchain.agents import Tool, AgentExecutor, AgentType
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from ddgs import DDGS
from segmentation import get_cloth_mask, prepare_user_image_for_inpainting
from inpainting import Inpainter

MEMORY_FILE = "vto_memory.json"

def load_memory_data():
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, "r", encoding="utf-8") as f:
            return json.load(f)
    return {}

def save_memory_data(data):
    with open(MEMORY_FILE, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

def add_to_memory(user_input, agent_response, images):
    memory = load_memory_data()
    memory.setdefault("conversations", []).append({
        "user_input": user_input,
        "agent_response": agent_response,
        "images": images
    })
    save_memory_data(memory)

def search_images(query: str, max_results: int = 5):
    results = []
    with DDGS() as ddgs:
        for r in ddgs.images(query):
            results.append({"title": r["title"], "url": r["image"]})
            if len(results) >= max_results:
                break
    return results

image_search_tool = Tool(
    name="Image Search",
    func=search_images,
    description="Use this tool to search images on DuckDuckGo and return top 5 results"
)

def build_llm(model_name: str, token: str):
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="auto",
            load_in_8bit=True,
            use_auth_token=token
        )
    except Exception as e:
        print(f"Warning: 8-bit quantization failed ({e}), falling back to full precision.")
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map="auto",
            use_auth_token=token
        )

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=200,
        do_sample=True,
        temperature=0.7
    )
    return HuggingFacePipeline(pipeline=pipe)

class VTOAgent:
    def __init__(self, llm, lora_path: str):
        self.llm = llm
        self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        self.inpainter = Inpainter(lora_path=lora_path)
        self.agent = AgentExecutor.from_agent_and_tools(
            agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            tools=[image_search_tool],
            llm=self.llm,
            memory=self.memory,
            verbose=True
        )

    def handle_input(self, user_input: str, user_img_path: str = None, garment_img_path: str = None):
        images = search_images(user_input)
        try:
            agent_response = self.llm(user_input)
        except Exception as e:
            agent_response = f"Error: {e}"

        if user_img_path and garment_img_path:
            user_img, mask, meta = get_cloth_mask(user_img_path, garment_img_path)
            if user_img and mask:
                mask_image = mask.convert("L")
                input_image = prepare_user_image_for_inpainting(user_img, mask_image)
                ip_image = Image.open(garment_img_path).convert("RGB")
                try:
                    inpainted_img = self.inpainter.inpaint(
                        input_image, mask_image, ip_image, meta["garment_name"]
                    )
                    inpainted_path = "vto_result.png"
                    inpainted_img.save(inpainted_path)
                    images.insert(0, {"title": "VTO Result", "url": inpainted_path})
                except Exception as e:
                    agent_response += f" | Inpainting error: {e}"

        add_to_memory(user_input, agent_response, images)
        return agent_response, images

### **Example Usage**

In [None]:
from agent_vto import VTOAgent, build_llm
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

# Initialize LLM
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
token = user_secrets.get_secret("segmentation_test") 
llm = build_llm(model_name, token)

# Initialize VTO agent
lora_path = "/kaggle/input/vton-final/pytorch_lora_weights (14).safetensors"
agent = VTOAgent(llm, lora_path)

# --- Chat loop ---
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Goodbye!")
        break

    # Optional: specify image paths if user wants a try-on
    user_img_path = input("Path to user image (or press enter to skip): ").strip() or None
    garment_img_path = input("Path to garment image (or press enter to skip): ").strip() or None

    response, images = agent.handle_input(user_input, user_img_path, garment_img_path)
    print("\nAgent:", response)

    if images:
        print("\nImages found:")
        for i, img in enumerate(images, 1):
            print(f"{i}. {img['title']}: {img['url']}")

In [None]:
import gradio as gr
import os
from PIL import Image
from agent_vto import VTOAgent, build_llm
from kaggle_secrets import UserSecretsClient

# ---------------------------------------------------
# 0) SETUP & INIT
# ---------------------------------------------------
user_secrets = UserSecretsClient()
token = user_secrets.get_secret("segmentation_test")

model_name = "Qwen/Qwen2.5-1.5B-Instruct"
lora_path = "/kaggle/input/vton-final/pytorch_lora_weights (14).safetensors"

# Initializing
llm = build_llm(model_name, token)
agent = VTOAgent(llm, lora_path)

# ---------------------------------------------------
# 1) TRY-ON FUNCTION
# ---------------------------------------------------
def tryon_api(person_img, cloth_img):
    if person_img is None or cloth_img is None:
        return "Please upload both images.", None
    
    output_path = "/kaggle/working/final_tryon.png"
    if os.path.exists(output_path):
        os.remove(output_path)

    user_path = "/kaggle/working/user.png"
    cloth_path = "/kaggle/working/cloth.png"
    person_img.save(user_path)
    cloth_img.save(cloth_path)

    try:
        response, _ = agent.handle_input(
            user_input="perform try-on",
            user_img_path=user_path,
            garment_img_path=cloth_path
        )
    except Exception as e:
        return f"Agent Error: {e}", None

    if os.path.exists(output_path):
        final_img = Image.open(output_path)
        return f"Done! {response}", final_img
    else:
        return f"Failed. Log: {response}", None

# ---------------------------------------------------
# 2) AGENT FUNCTION
# ---------------------------------------------------
def agent_api(user_text):
    if not user_text: return "Type something...", []
    try:
        response, images = agent.handle_input(user_input=user_text)
        gallery_urls = []
        if images:
            for img in images:
                if isinstance(img, dict) and 'url' in img:
                    gallery_urls.append(img['url'])
                elif isinstance(img, str):
                    gallery_urls.append(img)
        return response, gallery_urls
    except Exception as e:
        return f"Error: {e}", []

# ---------------------------------------------------
# 3) GRADIO UI (DESIGN)
# ---------------------------------------------------
my_style = """
<style>
body {background-color: #f9fafb;}
button.secondary {background-color: #ecfeff !important; color: #0e7490 !important; border: 1px solid #0e7490 !important;}
.styled-header {
    background-color: #0d9488; 
    color: white;              
    font-weight: bold;
    font-size: 1.2em;
    margin-bottom: 10px;
    text-align: center;
    padding: 10px;            
    border-radius: 8px;        
}</style>
"""

with gr.Blocks() as demo:    
    
    gr.HTML(my_style)

    # --- Header ---
    gr.HTML("""
        <div style='text-align:center; font-family:"Poppins", sans-serif; margin-bottom: 30px; padding: 20px; background-color: white; border-radius: 10px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);'>
            <h1 style='color:#0d9488; font-size:2.5em; font-weight: bold; margin:0;'> Virtual Try-On + Smart Agent </h1>
            <p style='color:#666; font-size:1.1em; margin-top:10px;'>
                Upload your photo & a clothing item, or ask the AI Fashion Assistant!
            </p>
        </div>
    """)

    # --- SECTION 1: IMAGES (Side by Side) ---
    with gr.Row():
        with gr.Column():
            gr.HTML("<div class='styled-header'>Upload Person Image</div>")
            person_in = gr.Image(label="", type="pil", sources=["upload"], height=350)
        
        with gr.Column():
            gr.HTML("<div class='styled-header'>Upload Clothing Image</div>")
            cloth_in = gr.Image(label="", type="pil", sources=["upload"], height=350)

    # --- SECTION 2: BUTTON (Full Width) ---
    tryon_btn = gr.Button("Generate Try-On Image", variant="secondary")

    # --- SECTION 3: RESULT (Centered) ---
    with gr.Row():
        with gr.Column(): pass 
        with gr.Column(scale=2): 
            gr.HTML("<div class='styled-header' style='margin-top: 20px;'>Try-On Result</div>")
            result_out = gr.Image(label="", type="pil", interactive=False)
            status_out = gr.Textbox(label="Status", lines=1)
        with gr.Column(): pass 

    tryon_btn.click(
        fn=tryon_api,
        inputs=[person_in, cloth_in],
        outputs=[status_out, result_out]
    )

    # --- Divider ---
    gr.HTML("<hr style='margin: 40px 0; border: 0; border-top: 2px solid #e5e7eb;'>")

    # --- SECTION 4: AGENT ---
    gr.HTML("<div class='styled-header'>Ask the Smart Agent</div>")
    
    user_text = gr.Textbox(label="Enter your query", placeholder="Ask about fashion advice...")
    agent_btn = gr.Button("Ask Agent", variant="secondary")
    
    gr.HTML("<div class='styled-header' style='margin-top: 20px;'>Agent Response</div>")
    output_text = gr.Textbox(show_label=False, lines=4)
    image_gallery = gr.Gallery(label="Search Results", columns=5, height="auto")

    agent_btn.click(
        fn=agent_api,
        inputs=[user_text],
        outputs=[output_text, image_gallery]
    )

demo.launch(share=True, debug=True)


2025-11-30 18:36:28.482201: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764527788.503002     304 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764527788.509317     304 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

  return func(*args, **kwargs)
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.
Device set to use cuda:0


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://5dbcff26252876c012.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Detected: top | Label: 4
   -> Long sleeve detected: Masking arms.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 37000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


  0%|          | 0/70 [00:00<?, ?it/s]

Detected: top | Label: 4
   -> Long sleeve detected: Masking arms.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 37000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


  0%|          | 0/70 [00:00<?, ?it/s]

Detected: top | Label: 4
   -> Long sleeve detected: Masking arms.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 37000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


  0%|          | 0/70 [00:00<?, ?it/s]

Detected: top | Label: 4
   -> Long sleeve detected: Masking arms.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 37000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


  0%|          | 0/70 [00:00<?, ?it/s]

Detected: top | Label: 4
   -> Long sleeve detected: Masking arms.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 37000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


  0%|          | 0/70 [00:00<?, ?it/s]



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to use Image Search tool.
Action: Image Search
Action Input: black shirts[0m
Observation: [36;1m[1;3m[{'title': "3 Pack of Black T-Shirts | Men's 3 Pack of Black Tees | True Classic", 'url': 'https://www.trueclassictees.com/cdn/shop/files/4000_BLACK_3_a9efe98c-519b-4217-bd61-97b3f3dd800f.jpg?v=1692653907&;width=1000&em-format=auto'}, {'title': "Gildan Black Shirts for Men Casual Black T-shirts Men's Shirts in Black ...", 'url': 'https://i5.walmartimages.com/asr/4cd83dd9-a773-4fe4-9151-400f0e96c470_1.7ffbc580bae09e03a36936422f7b0789.jpeg'}, {'title': 'Spykar Full Sleeve Solid Black Shirt For Men', 'url': 'https://spykar.com/cdn/shop/products/MSHSOS2BC153Black_1.jpg?v=1705316292'}, {'title': "Men's Black Slim Shirt - Mid-Collar | Hawes & Curtis", 'url': 'https://images.hawesandcurtis.com/tr:q-80/BF/BFPGA220-A01-181157-800px-1040px.jpg'}, {'title': "Men's Black Shirts | Black Dress Shirts for Men | Moss", 'url': 'http

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 37000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


  0%|          | 0/70 [00:00<?, ?it/s]

Detected: top | Label: 4
   -> Long sleeve detected: Masking arms.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 37000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


  0%|          | 0/70 [00:00<?, ?it/s]