In [None]:
%%writefile app.py
import streamlit as st
import torch
import numpy as np
from PIL import Image
from transformers import AutoTokenizer, AutoModelForCausalLM
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance
import open_clip

# Qdrant
client = QdrantClient(
    url="https://7e8950b7-f7cd-476b-9fe2-cfbabcc676d4.us-east4-0.gcp.cloud.qdrant.io:6333",
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.5-6feBUSirUh0qNrxH8ou2clwuKjY1e_lB_jE4DyUjA"
)

# CLIP embedding
clip_model_name = "ViT-B-32"
clip_pretrained = "openai"
clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(clip_model_name, pretrained=clip_pretrained)
clip_tokenizer = open_clip.get_tokenizer(clip_model_name)

clip_model.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model = clip_model.to(device)

def embed_query(text):
  tokenized = clip_tokenizer([text]).to(device)
  with torch.no_grad():
      features = clip_model.encode_text(tokenized)
  embedding = features[0].cpu().numpy()
  return embedding / np.linalg.norm(embedding)

from PIL import Image
def embed_image(image_file):
    image = Image.open(image_file).convert("RGB")
    processed = clip_preprocess(image).unsqueeze(0).to(device)
    with torch.no_grad():
        features = clip_model.encode_image(processed)
    embedding = features[0].cpu().numpy()
    return embedding / np.linalg.norm(embedding)

# load llm
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
login(token='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')

llama_model_id = "meta-llama/Llama-2-7b-chat-hf"
llama_tokenizer = AutoTokenizer.from_pretrained(llama_model_id)
llama_model = AutoModelForCausalLM.from_pretrained(
    llama_model_id,
    torch_dtype=torch.float16,
    device_map="auto"
).eval()


# generate answer function
def generate_llama_answer(prompt, max_tokens=512):
  inputs = llama_tokenizer(prompt, return_tensors="pt").to(llama_model.device)
  with torch.no_grad():
      outputs = llama_model.generate(
          **inputs,
          max_new_tokens=max_tokens,
          do_sample=True,
          temperature=0.7,
          top_p=0.9
      )
  decoded = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
  if "Answer:" in decoded:
      return decoded.split("Answer:")[-1].strip()
  else:
      return decoded.strip()


# prompt
def build_multimodal_prompt_with_image(user_query, hits, image_uploaded=True, max_products=3):
    context_blocks = []
    for i, h in enumerate(hits[:max_products]):
        p = h.payload
        name = p.get("Product_Name", "Unknown Product")
        price = p.get("Selling_Price", "N/A")
        about = p.get("About_Product", "No description provided.")
        image_url = p.get("Image_URL", "No image available.")

        block = f"""Product {i+1}:
Name: {name}
Price: ${price}
Key Info: {about}
Image: {image_url}"""
        context_blocks.append(block)

    context = "\n\n".join(context_blocks)

    # Add fallback few-shot example for image queries
    fallback_example = """Example:
User Query: [Image of an iPhone]
Context: No Apple or iPhone product appears in the list.

Answer:
Sorry, I couldn’t identify the product in the uploaded image based on the current context. Please try uploading a clearer image or rephrasing your query.
"""

    # Adjust instructions based on image presence
    instructions = f"""You are a real shopping assistant for an e-commerce platform.

A customer has asked a question that may involve an uploaded image, a text query, or both. You have relevant product information below. Answer questions in a warm and friendly manner.

Instructions:
- Your justifications for your product choice should only include informations and reasonings that are most relevant to the user query.
- Use only the product details from the context. Do not guess or invent any information. If nothing relevant is found, apologize and acknowledge that you cannot provide an answer.
- The user has NOT seen the product list or context. Your answer should be fully self-contained. Do NOT refer to “Product 1”, “Product 2”, ”query”, or “the context”.
- If helpful, you should better include product name and image URL in your answer.
- Keep word counts of the answer under 100.
"""

    prompt = f"""{instructions}

{fallback_example if image_uploaded else ""}

Context:
{context}

User Query:
{user_query}

Answer:"""

    return prompt

# Streamlit App
st.set_page_config(page_title="🏍️ Product Chatbot")
st.title("🏍️ Shopping Assistant")
st.markdown("Not sure what you’re looking for? Just upload an image or ask a question!")

user_query = st.text_input("Ask a product question:", placeholder="e.g., Is this board good for cruising?")
uploaded_image = st.file_uploader("Upload a product image (optional):", type=["jpg", "jpeg", "png"])

if st.button("Submit") and (user_query or uploaded_image):
    st.write("Generating response...")

    has_text = bool(user_query)
    has_image = uploaded_image is not None

    if has_text:
        text_emb = embed_query(user_query)
    else:
        text_emb = None

    if has_image:
        image_emb = embed_image(uploaded_image)
    else:
        image_emb = None

    # Select embedding and collection
    if has_text and has_image:
        query_vector = np.concatenate([text_emb, image_emb])
        collection_name = "combined_products"
    elif has_image:
        query_vector = image_emb
        collection_name = "image_products"
    elif has_text:
        query_vector = text_emb
        collection_name = "text_products"
    else:
        st.error("Please enter a question or upload an image.")
        st.stop()

    query_vector /= np.linalg.norm(query_vector)

    raw_result = client.query_points(
        collection_name=collection_name,
        query=query_vector.tolist(),
        limit=5,
        with_payload=True
    )
    hits = raw_result.points

    prompt = build_multimodal_prompt_with_image(user_query or "What is this product?", hits, image_uploaded=has_image)
    answer = generate_llama_answer(prompt)

    st.markdown("### 🧠 Assistant Answer")
    st.write(answer)



Overwriting app.py


In [None]:
from pyngrok import ngrok, conf
import subprocess
import time

# https://dashboard.ngrok.com/get-started/setup/windows
# https://dashboard.ngrok.com/agents

conf.get_default().auth_token = "2xFZbzzcjiXzs9PWN5EYc3YJT03_27V2oxMd2qhAYZDwDCned"
process = subprocess.Popen(["streamlit", "run", "app.py"])
time.sleep(8)
public_url = ngrok.connect(8501)
print("🌐 Your Streamlit app is live at:", public_url)

🌐 Your Streamlit app is live at: NgrokTunnel: "https://bd53-34-90-132-26.ngrok-free.app" -> "http://localhost:8501"


In [3]:
# pip install qdrant_client

Collecting qdrant_client
  Downloading qdrant_client-1.14.2-py3-none-any.whl.metadata (10 kB)
Collecting portalocker<3.0.0,>=2.7.0 (from qdrant_client)
  Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)
Downloading qdrant_client-1.14.2-py3-none-any.whl (327 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m327.7/327.7 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading portalocker-2.10.1-py3-none-any.whl (18 kB)
Installing collected packages: portalocker, qdrant_client
Successfully installed portalocker-2.10.1 qdrant_client-1.14.2


In [5]:
# pip install open-clip-torch

Collecting open-clip-torch
  Downloading open_clip_torch-2.32.0-py3-none-any.whl.metadata (31 kB)
Collecting ftfy (from open-clip-torch)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.9.0->open-clip-torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.9.0->open-clip-torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.9.0->open-clip-torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.9.0->open-clip-torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.9.0->open-clip-torch)
  Downloading nv

In [4]:
test= False

if test == True:
  # Previous work
  import torch
  import numpy as np
  from PIL import Image
  from transformers import AutoTokenizer, AutoModelForCausalLM
  from qdrant_client import QdrantClient
  from qdrant_client.models import PointStruct, VectorParams, Distance
  import open_clip

  # Qdrant
  client = QdrantClient(
      url="https://7e8950b7-f7cd-476b-9fe2-cfbabcc676d4.us-east4-0.gcp.cloud.qdrant.io:6333",
      api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.5-6feBUSirUh0qNrxH8ou2clwuKjY1e_lB_jE4DyUjA"
  )

  # CLIP embedding
  clip_model_name = "ViT-B-32"
  clip_pretrained = "openai"
  clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(clip_model_name, pretrained=clip_pretrained)
  clip_tokenizer = open_clip.get_tokenizer(clip_model_name)

  clip_model.eval()
  device = "cuda" if torch.cuda.is_available() else "cpu"
  clip_model = clip_model.to(device)

  def embed_query(text):
    tokenized = clip_tokenizer([text]).to(device)
    with torch.no_grad():
        features = clip_model.encode_text(tokenized)
    embedding = features[0].cpu().numpy()
    return embedding / np.linalg.norm(embedding)

  from PIL import Image
  def embed_image(image_file):
      image = Image.open(image_file).convert("RGB")
      processed = clip_preprocess(image).unsqueeze(0).to(device)
      with torch.no_grad():
          features = clip_model.encode_image(processed)
      embedding = features[0].cpu().numpy()
      return embedding / np.linalg.norm(embedding)

  # load llm
  from transformers import AutoTokenizer, AutoModelForCausalLM
  from huggingface_hub import login
  login(token='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')

  llama_model_id = "meta-llama/Llama-2-7b-chat-hf"
  llama_tokenizer = AutoTokenizer.from_pretrained(llama_model_id)
  llama_model = AutoModelForCausalLM.from_pretrained(
      llama_model_id,
      torch_dtype=torch.float16,
      device_map="auto"
  ).eval()


  # generate answer function
  def generate_llama_answer(prompt, max_tokens=512):
    inputs = llama_tokenizer(prompt, return_tensors="pt").to(llama_model.device)
    with torch.no_grad():
        outputs = llama_model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
    decoded = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "Answer:" in decoded:
        return decoded.split("Answer:")[-1].strip()
    else:
        return decoded.strip()


  # prompt
  def build_multimodal_prompt_with_image(user_query, hits, image_uploaded=True, max_products=3):
      context_blocks = []
      for i, h in enumerate(hits[:max_products]):
          p = h.payload
          name = p.get("Product_Name", "Unknown Product")
          price = p.get("Selling_Price", "N/A")
          about = p.get("About_Product", "No description provided.")
          image_url = p.get("Image_URL", "No image available.")

          block = f"""Product {i+1}:
  Name: {name}
  Price: ${price}
  Key Info: {about}
  Image: {image_url}"""
          context_blocks.append(block)

      context = "\n\n".join(context_blocks)

      # Add fallback few-shot example for image queries
      fallback_example = """Example:
  User Query: [Image of an iPhone]
  Context: No Apple or iPhone product appears in the list.

  Answer:
  Sorry, I couldn’t identify the product in the uploaded image based on the current context. Please try uploading a clearer image or rephrasing your query.
  """

      # Adjust instructions based on image presence
      instructions = f"""You are a real shopping assistant for an e-commerce platform.

  A customer has asked a question that may involve an uploaded image, a text query, or both. You have relevant product information below. Answer questions in a warm and friendly manner.

  Instructions:
  - Your justifications for your product choice should only include informations and reasonings that are most relevant to the user query.
  - Use only the product details from the context. Do not guess or invent any information. If nothing relevant is found, apologize and acknowledge that you cannot provide an answer.
  - The user has NOT seen the product list or context. Your answer should be fully self-contained. Do NOT refer to “Product 1”, “Product 2”, ”query”, or “the context”.
  - If helpful, you should better include product name and image URL in your answer.
  - Keep word counts of the answer under 100.
  """

      prompt = f"""{instructions}

  {fallback_example if image_uploaded else ""}

  Context:
  {context}

  User Query:
  {user_query}

  Answer:"""

      return prompt

  # images and query
  from google.colab import drive
  drive.mount('/content/drive')

  image_path = "/content/drive/MyDrive/Gen_AI/LongBoards.jpg"
  query_text = "Is this board good for cruising?" # OR：I want a lightweight longboard for carving and cruising.

  # embedding
  with open(image_path, "rb") as f:
    img_emb = embed_image(f)

  txt_emb = embed_query(query_text)

  # combine images and text embeddings
  combined_emb = np.concatenate([txt_emb, img_emb])
  combined_emb /= np.linalg.norm(combined_emb)

  # retrieve top context
  raw_result = client.query_points(
      collection_name="combined_products",
      query=combined_emb.tolist(),
      limit=5,
      with_payload=True
  )
  hits = raw_result.points

  # get prompt
  prompt = build_multimodal_prompt_with_image(query_text, hits, image_uploaded=True)

  # get answer
  answer = generate_llama_answer(prompt)
  print(answer)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Thank you for reaching out! Based on the product details provided, the DB Longboards Phase 38" Maple Drop Through Longboard may be suitable for cruising, given its cambered platform and drop-through mounting, which can provide a smoother ride and more comfortable pushing. However, without more context or information about your specific needs and preferences, I cannot make a definitive recommendation. Please feel free to provide additional details or ask further questions!


In [7]:
if test == True:
  from google.colab import drive
  drive.mount('/content/drive')
  image_path = "/content/drive/MyDrive/Gen_AI/toy.jpg"
  query_text = "Recommend similar products for me with images"
  with open(image_path, "rb") as f:
    img_emb = embed_image(f)

  txt_emb = embed_query(query_text)
  combined_emb = np.concatenate([txt_emb, img_emb])
  combined_emb /= np.linalg.norm(combined_emb)
  raw_result = client.query_points(
      collection_name="combined_products",
      query=combined_emb.tolist(),
      limit=5,
      with_payload=True
  )
  hits = raw_result.points
  prompt = build_multimodal_prompt_with_image(query_text, hits, image_uploaded=True)
  answer = generate_llama_answer(prompt)
  print(answer)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Thank you for reaching out! Based on the product details provided, I'd recommend the Little Live Pets - My Kissing Puppy - Wrinkles. It's a cute and cuddly toy that makes realistic puppy sounds and has different actions, just like a real puppy! The image URL for this product is https://images-na.ssl-images-amazon.com/images/I/414mqi2RZLL.jpg. Would you like me to provide more recommendations or help you with anything else?


In [8]:
if test == True:
  from google.colab import drive
  drive.mount('/content/drive')
  image_path = "/content/drive/MyDrive/Gen_AI/ladder.jpg"
  query_text = "What is the maximum weight capacity of this ladder? How tall is this ladder when fully opened? Does this ladder have anti-slip feet or other safety features?"
  with open(image_path, "rb") as f:
    img_emb = embed_image(f)

  txt_emb = embed_query(query_text)
  combined_emb = np.concatenate([txt_emb, img_emb])
  combined_emb /= np.linalg.norm(combined_emb)
  raw_result = client.query_points(
      collection_name="combined_products",
      query=combined_emb.tolist(),
      limit=5,
      with_payload=True
  )
  hits = raw_result.points
  prompt = build_multimodal_prompt_with_image(query_text, hits, image_uploaded=True)
  answer = generate_llama_answer(prompt)
  print(answer)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Thank you for your question! Based on the product information provided, the Hasegawa Ladders Lucano Step Ladder has a maximum weight capacity of 225 lbs. The ladder's height when fully opened is 7.5 feet. As for anti-slip feet or other safety features, the product description mentions that the ladder is sturdy and safe, holding up to 225 lbs. weight. However, I couldn't find any specific mention of anti-slip feet or other safety features in the product details. Please let me know if you have any further questions!


In [9]:
if test == True:
  from google.colab import drive
  drive.mount('/content/drive')
  image_path = "/content/drive/MyDrive/Gen_AI/songbook.jpg"
  query_text = "What educational skills does this toy help develop? Also recommend me similar products with images"
  with open(image_path, "rb") as f:
    img_emb = embed_image(f)
  txt_emb = embed_query(query_text)
  combined_emb = np.concatenate([txt_emb, img_emb])
  combined_emb /= np.linalg.norm(combined_emb)
  raw_result = client.query_points(collection_name="combined_products",query=combined_emb.tolist(),limit=5,with_payload=True)
  hits = raw_result.points
  prompt = build_multimodal_prompt_with_image(query_text, hits, image_uploaded=True)
  answer = generate_llama_answer(prompt)
  print(answer)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Based on the product details provided, the toy "3-in-1 Around We Go Activity Center" helps develop various educational skills, including:

1. Motor Skills: As the child moves the toy around the activity center, they develop their hand-eye coordination and fine motor skills.
2. Problem-Solving: The toy encourages children to problem-solve and think critically as they navigate around the center.
3. Creativity: The toy fosters creativity and imagination as children explore and interact with the different activities and objects.

Recommended similar products with images:
1. "Learning Journey Activity Center" - This product provides a similar interactive experience with different activities and toys for children to explore. Image: https://images-na.ssl-images-amazon.com/images/I/61%2B-fA0ng2L.jpg
2. "Hape Activity Center" - This product offers a wooden activity ce

In [36]:
if test == True:
  query_text = "My daughter wants a bedsheet with cartoon characters. Can you recommend some good options with urls?"
  txt_emb = embed_query(query_text)
  txt_emb /= np.linalg.norm(txt_emb)
  raw_result = client.query_points(collection_name="text_products",  query=txt_emb.tolist(),limit=5,with_payload=True)
  hits = raw_result.points
  prompt = build_multimodal_prompt_with_image(query_text, hits, image_uploaded=False)
  answer = generate_llama_answer(prompt)
  print(answer)

Of course, I'd be happy to help! 😊 There are some adorable cartoon character-themed bed sheets available on our platform. Here are a few options that caught my eye:

1. Unbranded Emoji, Cool Dude, 3-Piece Comforter Set Twin: This set features a fun and funny emoji-themed design with bright colors and cute illustrations. Your daughter will love the soft brushed fabric and the novelty pattern. Check out the image here: <https://images-na.ssl-images-amazon.com/images/I/51iVCRtMVrL.jpg>
2. Sweet Jojo Designs Ballet Dancer Ballerina Queen Kids Children's Bed Skirt: This bed skirt features a beautiful ballet-themed design with colorful illustrations of ballerinas. It's made of 100% cotton and is machine washable. Take a look at the image here: <https://images-na.ssl-images-amazon.com/images/I/311GVPNhFEL.jpg>
3. Urban Habitat Kids Lola Full/Queen Duvet Cover Set Girls Bedding - Purple, Aqua, Unicorns: This duvet cover set features a whimsical unicorn-themed design with bright colors and fun 