In [None]:
# !pip install transformers torch requests pillow

In [None]:
import json

with open('/content/sample_data/Female.json', 'r') as f:
    data = json.load(f)

print(f"Successfully loaded Male.json. First 5 items: {list(data.items())[:5]}")

In [None]:
# ! pip install googletrans==4.0.0-rc1

In [None]:
from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
import torch
from PIL import Image
import requests

processor = InstructBlipProcessor.from_pretrained(
    "Salesforce/instructblip-flan-t5-xl"
)

# Load model (VERY important: fp16 + device_map="auto")
model = InstructBlipForConditionalGeneration.from_pretrained(
    "Salesforce/instructblip-flan-t5-xl",
    torch_dtype=torch.float16,
    device_map="auto"
)

# device = "cuda" if torch.cuda.is_available() else "cpu"
# model.to(device)

url = "https://raw.githubusercontent.com/salesforce/LAVIS/main/docs/_static/Confusing-Pictures.jpg"
image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
prompt = "What is unusual about this image?"
inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    do_sample=False,
    num_beams=5,
    max_length=256,
    min_length=1,
    top_p=0.9,
    repetition_penalty=1.5,
    length_penalty=1.0,
    temperature=1,
)
generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
print(generated_text)

In [None]:
from googletrans import Translator
translator = Translator()
def translate_vietnamese_to_english(text):
    """Dịch tiếng Việt sang tiếng Anh"""
    try:
        if text and isinstance(text, str):
            translation = translator.translate(text, src='vi', dest='en')
            return translation.text
        return text
    except Exception as e:
        print(f"Translation error: {e}")
        return text

In [None]:
from io import BytesIO
from PIL import Image
import requests

def generate_description(image_url, title_vi, desc_vi):
    try:
        # Bước 1: Dịch tiếng Việt sang tiếng Anh
        title_en = translate_vietnamese_to_english(title_vi)
        desc_en = translate_vietnamese_to_english(desc_vi)

        print(f"English Title: {title_en}")
        print(f"English Desc: {desc_en}")

        # Bước 2: Tải và xử lý ảnh
        image = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")

        # Bước 3: Tạo prompt (giữ nguyên prompt của bạn)
        prompt = prompt = f'''
                            PRODUCT IMAGE ANALYSIS:
                            Product: {title_en}
                            Category: Apparel/Clothing
                            Description: {desc_en}

                            CRITICAL INSTRUCTIONS:
                            1. FOCUS STRICTLY ON THE GARMENT/CLOTHING ITEM:
                              - Describe ONLY the clothing item being sold
                              - Ignore ALL human models, animals, mannequins, or living beings
                              - Exclude background scenes, settings, and decorative elements
                              - Do not mention accessories, jewelry, or non-clothing items

                            2. VISUAL DESCRIPTION PRIORITIES:
                              - Garment type (shirt, dress, pants, etc.)
                              - Primary colors and color patterns
                              - Fabric texture and material appearance
                              - Design details (collars, sleeves, neckline, etc.)
                              - Style characteristics (formal, casual, traditional, etc.)
                              - Any visible patterns, prints, or embellishments

                            3. EXCLUSION LIST (IGNORE COMPLETELY):
                              - Human body parts, faces, or figures
                              - Animals and pets
                              - Background objects and scenery
                              - Other products not related to the main clothing item
                              - Lighting conditions and photographic effects

                            4. OUTPUT REQUIREMENTS:
                              - 2-3 concise, factual sentences
                              - Professional e-commerce tone
                              - Objective visual description only
                              - No subjective opinions or emotional language

                            CAPTION:
                            '''

        # Bước 4: Xử lý ảnh và text THEO CÁCH ĐÃ TEST THÀNH CÔNG
        inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda")

        # Bước 5: Sinh mô tả THEO CÁCH ĐÃ TEST THÀNH CÔNG
        with torch.no_grad():
            outputs = model.generate(
                **inputs,  # Sử dụng **inputs thay vì tách riêng
                do_sample=False,
                num_beams=5,
                max_length=256,
                min_length=1,
                repetition_penalty=1.5,
                length_penalty=1.0,
                temperature=1,
            )

        # Decode theo cách đã test thành công
        generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()
        return generated_text

    except Exception as e:
        print(f"Error processing {image_url}: {str(e)}")
        import traceback
        traceback.print_exc()
        return ""

# Duyệt qua tất cả products
for i, product in enumerate(data['products']):
    print(f"\n--- Processing product {i+1}/{len(data['products'])} ---")

    detail_desc = generate_description(
        product['pdp_image_url'],
        product['pdp_title_value'],
        product['pdp_desc_value']
    )

    product['detail_description'] = detail_desc
    print(f"Generated Description: {detail_desc}")

    # Tạm dừng nhẹ để tránh quá tải
    if (i + 1) % 10 == 0:
        print(f"Completed {i+1} products...")

print("\nAll products processed!")

In [None]:
with open('Female_updated.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print("Updated JSON saved successfully!")