## **Generating Product Image from Customer Reviews**
### 94844 Gen AI Lab Final Project
#### April 2024

In [37]:
# Import Libraries
import os, io, json, transformers, pinecone, fitz, pypdf, faiss, sqlite3, langchain_community, langchain, openai, math, time 
import requests
from transformers import pipeline
import pandas as pd
import numpy as np
from io import StringIO
from dotenv import load_dotenv
from operator import itemgetter
import logging

from langchain import document_loaders, embeddings
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
#from llama_index.core.node_parser import SentenceSplitter
from langchain_experimental.text_splitter import SemanticChunker
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from pinecone import Pinecone, ServerlessSpec, Pinecone         # vector store

from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    SimpleDirectoryReader
)

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core.schema import TextNode
from llama_index.core.extractors import (
    QuestionsAnsweredExtractor,
    TitleExtractor,
)
from llama_index.core.ingestion import IngestionPipeline
from llama_index.llms.openai import OpenAI

# sentence transformers
from sentence_transformers import SentenceTransformer
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

In [22]:
load_dotenv()

access_endpoint_api_key = os.getenv('access_endpoint_api_key')
openai_key = os.getenv('OPENAI_API_KEY')
pinecone_api_key = os.getenv('PINECONE_API_KEY')
environment = os.getenv('PINECONE_ENV')

# configure Pinecone client
pinecone_api_key = '1b7fc13c-0ea1-4edc-a174-244bce3e0b47'
pc = Pinecone(api_key=pinecone_api_key)

In [46]:
# LOGGER
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
                    filename='log/test.log')

## **Part 1: Product Selection and Customer Review Data Collection**
- Select 3 different products from different categories on a digital marketplace (e.g., Amazon).
- Please take into consideration different factors (e.g., product categories, popularity levels) when making your selection.
- Explain the rationale of your choices.
- Collect the corresponding product descriptions (textual content) and customer reviews (textual content) for each product

In [8]:
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
from urllib.error import HTTPError, URLError
import time

def get_page(url, headers):
    """
    Get webpage results into a Beautiful Soup object.
    """
    try:
        req = Request(url, headers=headers)
        page = urlopen(req)
        soup = BeautifulSoup(page, "html.parser")
        return soup
    except HTTPError as e:
        print(f"HTTP error: {e.code} {e.reason}")
    except URLError as e:
        print(f"URL error: {e.reason}")
    except Exception as e:
        print(f"General error: {e}")
    return None

def scrape_product_details(base_url, product_id, headers):
    """
    Scrape product description and up to 200 most recent reviews.
    """
    url = f"{base_url}/dp/{product_id}/"
    print("page url:", url)
    soup = get_page(url, headers)
    if not soup:
        return None, []

    # Find product description
    try:
        #description = soup.find_all('div', id='productDescription', class_="a-section a-spacing-small").get_text(strip=True)
        description = soup.find('div', id='feature-bullets').get_text(strip=True)
    except AttributeError:
        description = "No description available"

    reviews = []
    # Adjust the URL for 'Most Recent' reviews and iterate through pages
    review_url = f"{base_url}/product-reviews/{product_id}/"
    print("review url:", review_url)
    for i in range(1, 21):  # Assume there are 10 reviews per page, fetch 20 pages
        url = f"{review_url}?pageNumber={i}&sortBy=helpful"
        soup = get_page(url, headers)
        if not soup:
            continue
        review_divs = soup.find_all('div', {'data-hook': 'review'})
        for div in review_divs:
            try:
                #title = div.find('a', {'data-hook': 'review-title'}).get_text(strip=True)
                body = div.find('span', {'data-hook': 'review-body'}).get_text(strip=True)
                reviews.append(body)
                if len(reviews) >= 200:
                    break
            except AttributeError:
                continue
        if len(reviews) >= 200:
            break
        time.sleep(1)  # Sleep to prevent too frequent requests

    return description, reviews

In [9]:
# Usage example
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
urls = {"https://www.amazon.com/Samsonite-Omni-Hardside-Spinner-Teal": "B013WFNZRE",
        "https://www.amazon.com/Hurtle-3-Wheeled-Scooter-Kids-Graffiti": "B08BDGS58Q",
        "https://www.amazon.com/DeLonghi-ECAM29084SB-Magnifica-LatteCrema-Espresso": "B0B38KRTV6"
        }

descriptions = []
reviews = []
for base_url, product_id in urls.items():
    description, review = scrape_product_details(base_url, product_id, headers)
    descriptions.append(description)
    reviews.append(review)

products = ["Product A", "Product B", "Product C"]

# Create dictionary
product_data = {}
for i in range(len(products)):
    product_data[products[i]] = {
        "description": descriptions[i],
        "reviews": reviews[i]
    }
print(product_data)

page url: https://www.amazon.com/Samsonite-Omni-Hardside-Spinner-Teal/dp/B013WFNZRE/
review url: https://www.amazon.com/Samsonite-Omni-Hardside-Spinner-Teal/product-reviews/B013WFNZRE/
page url: https://www.amazon.com/Hurtle-3-Wheeled-Scooter-Kids-Graffiti/dp/B08BDGS58Q/
review url: https://www.amazon.com/Hurtle-3-Wheeled-Scooter-Kids-Graffiti/product-reviews/B08BDGS58Q/
page url: https://www.amazon.com/DeLonghi-ECAM29084SB-Magnifica-LatteCrema-Espresso/dp/B0B38KRTV6/
review url: https://www.amazon.com/DeLonghi-ECAM29084SB-Magnifica-LatteCrema-Espresso/product-reviews/B0B38KRTV6/


In [10]:
product_data

{'Product A': {'description': '24" SPINNER LUGGAGE maximizes your packing power and is the ideal checked bag for longer tripsPACKING Dimensions: 24” x 17.5” x 11.5”, Overall Dimensions: 26.5” x 17.75” x 11.75”, Weight: 8.34 lbs.10 YEAR LIMITED WARRANTY: Samsonite products are rigorously tested to ensure our products meet stringent standards. This bag comes with a 10-year warranty against defects in materials and workmanship.MICRO-DIAMOND POLYCARBONATE texture is extremely scratch-resistant, keeping cases beautiful trip after tripSIDE-MOUNTED TSA LOCKS act to deter theft, ensuring that only you or a TSA agent have easy access to your belongings when travelingFOUR, MULTI-DIRECTIONAL OVERSIZED SPINNER WHEELS for effortless mobility, re-engineered lightweight',
  'reviews': ['Perfect weight and very maneuverable.  Zippers work easily and has a convenient TSA approved lock.  Looks great and will be perfect size for longer trips.  Happy with the purchase / great value for the money.',
   'Wh

In [11]:
# Method 2: pull data from pdf
doc = fitz.open("coffeemaker_product.pdf")

## **Part 2: Analysis of Customer Reviews with LLM**
- Use OpenAI’s GPT 3.5 turbo API to conduct text analysis to extract valuable information from the textual data collected above and build a more wholistic understanding about the product.
- Some analyses that are relevant to consider include, but not limited to, for example, text summarization, extraction of particular product features (e.g., visual information), or sentiment analysis.
- You can do the analyses using prompt engineering (with different prompt strategies), RAG, or a combination of both.
- When doing the analysis, you may need to consider different documentation chunking strategies given the input token limit OpenAI API has.
- You could consider using a vector database to store your text embedding if necessary, but not mandatory.
- You also need to think about what is an effective output from this step, given that your goal next step is to send this output into the diffusion model for meaningful product image generation.

In [12]:
# Chunk text from pdf
text_splitter = SemanticChunker(OpenAIEmbeddings())
text_chunks = []
doc_idxs = []

for doc_idx, page in enumerate(doc):
    page_text = page.get_text("text")
    cur_text_chunks = text_splitter.split_text(page_text)
    text_chunks.extend(cur_text_chunks)
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))

In [13]:
def create_textnode(usepdf=False):
    nodes = []
    # Use scraped chunks
    if usepdf == False:
        # Create a TextNode for the description
        desc_node = TextNode(text=product_data[i]['description'])
        nodes.append(desc_node)

        # Create a TextNode for each review
        for review in product_data[i]['reviews']:
            review_node = TextNode(text=review)
            nodes.append(review_node)
    # Use text from pdf, chunked
    else:
        for idx, text_chunk in enumerate(text_chunks):
            node = TextNode(text=text_chunk)
            src_doc_idx = doc_idxs[idx]
            src_page = doc[src_doc_idx]
            nodes.append(node)
    
    return nodes

In [47]:
### CHANGE HERE ###
# Select which product to analyze
# Madi: Product B and usepdf=False
# Jyoti: usepdf=True

i = "Product B"

nodes = create_textnode(usepdf=True)

In [48]:
gpt_3_5_turbo = "gpt-3.5-turbo"
llm = OpenAI(model=gpt_3_5_turbo)

extractors = [
    TitleExtractor(nodes=5, llm=llm),
    QuestionsAnsweredExtractor(questions=3, llm=llm),
]

pipeline = IngestionPipeline(
    transformations=extractors,
)

nodes = await pipeline.arun(nodes=nodes, in_place=False)

100%|██████████| 5/5 [00:01<00:00,  3.49it/s]
100%|██████████| 120/120 [00:57<00:00,  2.10it/s]


In [49]:
embed_model_3_small = "text-embedding-3-small"
embed_model = OpenAIEmbedding(model=embed_model_3_small, openai_api_key=openai_key)

# create embeddings for the text dataset
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

In [54]:
use_serverless = os.environ.get("USE_SERVERLESS", "False").lower() == "true"

if use_serverless:
    spec = pinecone.ServerlessSpec(cloud='aws', region='us-west-2')
else:
    spec = pinecone.PodSpec(environment=environment)

# Name our Pinecone Index
index_name = "hw03"

# If a Pinecone index of the same name already exists, delete it:
if index_name in pc.list_indexes().names():
    pc.delete_index(index_name)

In [55]:
print("Using serverless:",use_serverless)

Using serverless: False


In [56]:
# Name our Pinecone Index
index_name = "hw03"

dimensions = 1536 #768              # the dimensions of the index need to align with the LLM we are using for the RAG system. For example, if using openAI then dimenion = 1536. If using Llama2, then dimension = 384.

pc.create_index(
    name=index_name, 
    dimension=dimensions, 
    metric="cosine",          # we can use different distance metrics to measure the similarity between vector embeddings and user queries. this is where we define what similarity metric we are going to use for the vector store.
    spec=spec
)

# wait for index to be ready before connecting
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

for index in pc.list_indexes():
    print(index['name'])

pc.describe_index("hw03")

pc_index = pc.Index(index_name)  # create an index to use in the vector store

vector_store = PineconeVectorStore(pinecone_index=pc_index)    # this function creates a vector store where we will add and store embeddings

hw03


In [57]:
# nothing so far - empty index
pc_index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [58]:
vector_store.add(nodes)

Upserted vectors: 100%|██████████| 120/120 [00:04<00:00, 28.51it/s]


['36c257bb-efd7-4623-9ff6-fc1bf0840143',
 'd62c0252-88eb-4dc4-acc9-6e639d0feab1',
 'bf709e74-1ee2-4fec-87a3-c0dd60802d99',
 '5fd44a20-11d9-466f-bbb3-ee4add1b8cd5',
 'b6e4a1f9-44e6-455a-b315-50c690a1683b',
 'cb57a363-f582-4010-9331-7459f8674935',
 'b2c72837-7ec6-4b55-8df5-379334945ef9',
 'cb9bc7ee-ead6-42d5-ab2a-ea37101d46d9',
 'e45ccb5e-5e3b-4ff3-bd25-09c87c3be5ff',
 'a7a7d923-0f84-465b-b8de-d849f9458103',
 'f198f981-945c-45b1-bfc2-0c1e3dbe76ae',
 '9912ef37-5483-41d0-937d-51fc0d1d0b52',
 'f4d08c74-dc7a-4334-a8e2-8efc49b2d566',
 'ea79517f-b876-48db-9781-21a499cb1aeb',
 '2309dd66-cd0b-4de8-a22b-1e57c66797e2',
 '51cacdec-1aa8-4bf4-a360-5d27693a463d',
 'fe216fcb-70e3-4b3c-a2d2-7b671789d312',
 '1d28abeb-633a-467c-a43c-b7d1b41ef6f7',
 '43f2d4df-08f6-491a-82bc-76af9345ae84',
 'b318af26-699b-41c1-9b90-d5784b26f872',
 'c6ebc6f3-adf5-4901-86b8-f1f4fde410d0',
 '2835b752-b3a2-48b5-8b76-7de9ce7ed1e1',
 'e62b7048-1ec1-4b0c-aa80-0c27fc17a36a',
 'd119da03-5369-4c73-ada8-8ed6571f699e',
 '795aeb0c-88f3-

In [59]:
pc_index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [60]:
print(nodes[0].metadata)
print(nodes[0])

{'document_title': 'Ultimate Guide to the Best Semi Automatic Espresso Machine with Burr Grinder: Enhance Your Espresso Experience at Home', 'questions_this_excerpt_can_answer': '1. What is included in the Model KES6551 semi automatic espresso machine with burr grinder?\n2. What features does the semi automatic espresso machine with burr grinder offer, such as durability and smart dosing technology?\n3. How long is the warranty for the semi automatic espresso machine with burr grinder, and what does it cover?'}
Node ID: 36c257bb-efd7-4623-9ff6-fc1bf0840143
Text: Overview  Product Description   Semi Automatic Espresso Machine
with Burr Grinder   Designed for easy hands-on espresso exploration,
take your espresso based drinks to the next  level - create wherever
your curiosity takes you - from Espresso, Americano, Latte, to
Cappucino  and much more. What's in the box   Model KES6551 Includes
  (1) semi ...


In [64]:
from openai import OpenAI
client = OpenAI()

def get_sys_message(q, k):
    # get embedding
    result = client.embeddings.create(
        input=[q],
        model=embed_model_3_small
    )

    # retrieve from Pinecone vector store
    xq = result.data[0].embedding

    # search
    res2 = pc_index.query(vector=xq, top_k=k, include_metadata=True)
    
    context = ""
    for match in res2['matches']:
        dict_obj = json.loads(match['metadata']['_node_content'])
        context += dict_obj['text'] + " "
    context = context.replace("\n", "")

    return context

In [65]:
queries = ["Describe in detail what this product is, based on the product description.",
           "What are some features of this product, especially focusing on visual features?",
           "What is the main purpose of this product?",
           "What is most loved about this product?",
           "What is most hated about this product?"
]   

In [66]:
# try a query
get_sys_message(q=queries[0], k=20)

"Great product sabah95 a month ago Great product,I received this product from Influenster for free. We love this ! My husband makes me a latte everyday . thank you Influenster !! Watch out, you will become spoiled! sarahz131 a month ago I received this for free from Influenster to review with my honest opinion. This is the most amazing drink I have ever made!! It was a little difficult at first to understand the instructions. Having a really clear page labeling the buttons would have been great! I started off with making  It is a luxury machine! 33. Great product! daddybear84 27 days ago This I love it!!!!!! Great product l, if you’re a coffee lover like me! The brewing quality is very decent, grinds well. I find they it’s easy to clean, as well! Overall I really love like this set. 42. Fantastic Machine glampalette a month ago This espresso machine rules. It grinds the beans, dispenses them, makes the espresso, and steams the milk. It does everything and I love it more than any applia

In [67]:
def rag_openAI_gpt(model, q, k, prompt):
    f=get_sys_message(q, k)
    
    response = client.chat.completions.create(
    model=model,
    messages=[
        {"role": "system", "content": "Instruction: use the information in" + f + "to answer the user's question."},
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": f},
        {"role": "user", "content": "What is the answer?"}
    ]
    )
    return response.choices[0].message.content

In [74]:
# Run
analyses = {}
for i in range(len(queries)):
    response = rag_openAI_gpt(model=gpt_3_5_turbo, q=queries[i], k=20, prompt="Product Description")
    print(f"<Query: {queries[i]}>")
    print(response)
    analyses[i+1] = response
    print()
analyses

<Query: Describe in detail what this product is, based on the product description.>
Based on the user reviews, the product in question is a KitchenAid Espresso Machine that has received highly positive feedback. Users have praised its high-quality features, ease of use, customizable settings, built-in grinder, milk steamer, and overall performance in making espresso, lattes, and other coffee drinks. The machine has been described as a great investment for coffee lovers, offering cafe-quality results at home.

<Query: What are some features of this product, especially focusing on visual features?>
Based on the reviews provided, the KitchenAid Semi Automatic Espresso Machine with Burr Grinder is praised for its high quality construction, ease of use, and customization options for creating the perfect cup of coffee tailored to individual preferences. Users particularly appreciate features such as the built-in grinder, temperature controls, and milk frother. Despite a learning curve in und

{1: 'Based on the user reviews, the product in question is a KitchenAid Espresso Machine that has received highly positive feedback. Users have praised its high-quality features, ease of use, customizable settings, built-in grinder, milk steamer, and overall performance in making espresso, lattes, and other coffee drinks. The machine has been described as a great investment for coffee lovers, offering cafe-quality results at home.',
 2: 'Based on the reviews provided, the KitchenAid Semi Automatic Espresso Machine with Burr Grinder is praised for its high quality construction, ease of use, and customization options for creating the perfect cup of coffee tailored to individual preferences. Users particularly appreciate features such as the built-in grinder, temperature controls, and milk frother. Despite a learning curve in understanding all the settings and adjustments for optimal results, users have found the machine to be worth the investment for those who truly enjoy high-quality es

In [75]:
# Save responses to the log file
logging.info(analyses)

## **Part 3: Image Generation**
- For each product, based on the information extracted from the product description and customer reviews, craft prompts to guide the image generation process effectively.
- Use the OpenAI’s DALLE 2 to generate 3~5 images for each product based on your crafted prompts. Experiment with different prompts and settings to best visualize what you believe is a good illustration of the product based on product description and customer reviews.
- If necessary, iterate on your prompts based on initial results to refine the illustrations.
- Compare AI-generated product images with the actual product images posted in real world. Are they similar or different? In what dimensions? Do you think AI is able to illustrate the products well? Why or why not?
- Provide analyses and explanations of your findings.

In [76]:
# Change keys to dictionary
analyses = {
    "Product Description": analyses[1],
    "Product Features": analyses[2],
    "Product Usage": analyses[3],
    "Product Sentiment Good": analyses[4],
    "Product Sentiment Bad": analyses[5]
}

In [77]:
# Craft prompt
prompt = f"""   
        Generate an image of a product using the following information:
        Description: {analyses["Product Description"]}. 
        Features: {analyses["Product Features"]}.
        Usage: {analyses["Product Usage"]}. 
        Good Sentiment: {analyses["Product Sentiment Good"]}.
        Bad Sentiment: {analyses["Product Sentiment Bad"]}.
        """

In [79]:
# Generate images using DALL-E 3
response = openai.images.generate(
    model="dall-e-3",
    prompt=prompt,
    size="1024x1024",
    quality="standard",
    n=1,
)

import webbrowser
webbrowser.open(response.data[0].url)

BadRequestError: Error code: 400 - {'error': {'code': None, 'message': '"   \\n        Generate an image of a product using the following information:\\n        Description: Based on the user reviews, the product in question is a KitchenAid Espresso Machine that has received highly positive feedback. Users have praised its high-quality features, ease of use, customizable settings, built-in grinder, milk steamer, and overall performance in making espresso, lattes, and other coffee drinks. The machine has been described as a great investment for coffee lovers, offering cafe-quality results at home.. \\n        Features: Based on the reviews provided, the KitchenAid Semi Automatic Espresso Machine with Burr Grinder is praised for its high quality construction, ease of use, and customization options for creating the perfect cup of coffee tailored to individual preferences. Users particularly appreciate features such as the built-in grinder, temperature controls, and milk frother. Despite a learning curve in understanding all the settings and adjustments for optimal results, users have found the machine to be worth the investment for those who truly enjoy high-quality espresso beverages. The product comes with various accessories, recipe book, and detailed instructions for a great overall user experience..\\n        Usage: Based on the reviews provided, the KitchenAid Semi Automatic Espresso Machine with Burr Grinder seems to be a highly recommended and versatile product for coffee lovers. Here are some key points highlighted by the reviews:\\n\\n- The machine is praised for its ease of use, durability, and ability to produce consistent, high-quality espresso and lattes.\\n- Users appreciate the built-in burr grinder, Smart Dosing Technology, and customizable settings for grind size, brew strength, and temperature control.\\n- The semi-automatic features, along with the flat-base portafilter and multi-angle steam wand, make it a convenient and functional appliance for creating barista-quality drinks at home.\\n- The machine\'s aesthetics, compact size, and the included accessories such as multiple filter baskets, frothing cup, and tamper are mentioned as added bonuses.\\n- Reviewers note that the machine may have a learning curve for beginners, but with practice and following the detailed instructions provided, it becomes a staple in their morning routine.\\n- Many users emphasize the cost-saving benefits of making cafe-style drinks at home instead of frequenting coffee shops like Starbucks.\\n\\nOverall, the KitchenAid Semi Automatic Espresso Machine with Burr Grinder is highly recommended for coffee enthusiasts looking to elevate their at-home coffee experience with a reliable, versatile, and high-quality espresso machine.. \\n        Good Sentiment: Based on the reviews provided, the KitchenAid Semi Automatic Espresso Machine with Burr Grinder is highly praised for its quality, ease of use, and ability to produce delicious espresso drinks. Reviewers appreciate features such as the built-in grinder, customizable settings, sleek design, and the ability to make a variety of coffee drinks at home. Overall, feedback indicates that this espresso machine is a worthwhile investment for coffee lovers who enjoy making espresso-based beverages at home..\\n        Bad Sentiment: Based on the reviews provided for the KitchenAid Semi Automatic Espresso Machine with Burr Grinder, here is a summary of the product features and customer feedback:\\n\\n1. **High-Quality Espresso**: Customers rave about the quality of espresso produced by this machine, comparing it favorably to coffee shop purchases.\\n  \\n2. **Ease of Use**: Many users highlighted the ease of use once they became familiar with the machine. The built-in grinder and various settings allow for customization and control over the brewing process.\\n\\n3. **Design and Build**: The design of the machine was praised for being sleek, modern, and well-made. The stainless steel construction was particularly attractive to users.\\n\\n4. **Convenience**: The all-in-one feature of having a built-in grinder and espresso maker was a key selling point for many customers. They appreciated the compact size and functionality.\\n\\n5. **Learning Curve**: While there is a learning curve, especially for those new to espresso machines, users found the detailed instructions and included resources helpful in mastering the process.\\n\\n6. **Accessories and Maintenance**: Customers appreciated the accessories included with the machine, such as different filter baskets, tamper, and frothing cup. The maintenance and cleaning were considered relatively easy.\\n\\n7. **Value for Money**: Despite some initial complexities, customers found the machine to be worth the investment, considering the quality of espresso it produces and the convenience it offers.\\n\\nOverall, the KitchenAid Semi Automatic Espresso Machine with Burr Grinder seems to be a favorite among coffee lovers for its quality, convenience, and ability to create cafe-worthy espresso drinks at home..\\n        " is too long - \'prompt\'', 'param': None, 'type': 'invalid_request_error'}}