# LLM as the entrypoint

- Inspired from https://blog.apiad.net/p/building-a-perplexity-ai-clone and others to build Perplexity.ai clones

In [14]:
from core import init, Seed

init()

In [26]:
# The base query/etc
context = """
Current, formerly known as GE Current, a Daintree Company, is a prominent player in the lighting industry, specializing in LED technology and intelligent lighting controls. The company was founded on January 1, 2015, following the merger of GE Current and Hubbell's commercial and industrial lighting business (Crunchbase, 2024). Current focuses on sustainability and energy efficiency, offering a wide range of lighting solutions for various sectors, including healthcare, hospitality, education, and industrial markets.

Current's product offerings include:

Tetra® LED Light Systems: High-performance signage lighting systems.
Architectural Area Lighting: Contemporary luminaires for diverse applications.
Lighting Controls Systems: Wireless, stand-alone, outdoor, and wired controls.
Healthcare Lighting: Products like Remedi and MODx™ for medical environments.
Hospitality Lighting: Solutions for hotels and restaurants.
Industrial Lighting: High-efficiency luminaires for manufacturing.
Outdoor Lighting: EXO brand quick-ship solutions for contractors.

Website: https://www.currentlighting.com/
"""

query = "What do Current's customers say about them or their products?"

search_tips = """
Tips for Google queries:
- It's good to start with a fairly general search, then refine it iteratively based on the results. In other words, start with a higher recall search and then refine it to increase precision
- If the company's website is known, you can add related:domain.com to improve precision
- If Reddit reviews are promising, we can add site:reddit.com to the query
- If results from a source are generally irrelevant, you can exclude that source with -site:source.com
- If the company name has multiple words, you can put the name in quotes to increase precision at the cost of recall
- Sometimes the informal name of the company may be more useful than the formal name for search queries, for example "Pomelo" instead of "Pomelo Care" will increase recall but may decrease precision. Likewise "S6" instead of "Singularity 6" may increase recall but decrease precision
"""

In [27]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

from pydantic import BaseModel, Field
from typing import List

class Query(BaseModel):
    query: str = Field(description="Google search query")
    time_period: str = Field(description="The time period for the search (all, 5y, 1y, 1m, 1w, 1d, etc.)")

class QueryExpansion(BaseModel):
    interpretation: str = Field(description="interpretation of the user's information need(s)")
    queries: List[Query] = Field(description="list of relevant Google searches")

_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
Given the following question, provide a set of {num_queries} relevant Google searches that would answer the question. 
First think about the user's question and provide your own interpretation of the information need(s).
Then generate the relevant queries including the time period for the search.
            """ + search_tips,
        ),
        (
            "human",
            """
User query: {query}

Additional context:
{context}

Now, please provide your interpretation, and suggest a set of relevant Google searches that would answer the user's question.
            """,
        ),
    ]
)



llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
runnable = _prompt | llm.with_structured_output(QueryExpansion)
result = runnable.invoke(
    {
        "query": query,
        "context": context,
        "num_queries": 2
    }
)

result

QueryExpansion(interpretation="The user is looking for customer reviews and feedback about Current's products and services, particularly in the lighting industry. They may be interested in insights from various sectors such as healthcare, hospitality, and industrial markets, as well as general customer satisfaction.", queries=[Query(query='Current lighting customer reviews', time_period='1y'), Query(query='Current GE lighting reviews site:reddit.com', time_period='1y')])

In [28]:
for query in result.queries:
    print(query)

query='Current lighting customer reviews' time_period='1y'
query='Current GE lighting reviews site:reddit.com' time_period='1y'


In [29]:
from utils.google_search import search

query_results_pairs = []

for query in result.queries[:2]:
    query_results_pairs.append((query, list(search(query.query, num=10, dateRestrict=query.time_period))))


In [30]:

def format_search(query, results):
    result_str = "\n".join(f"{i+1}. {result.title} - {result.link}\n{result.snippet}" for i, result in enumerate(results))
    return f"# Search query: {query}\n{result_str}"

def format_searches(query_results_pairs):
    return "\n\n".join(format_search(query, results) for query, results in query_results_pairs)

print(format_searches(query_results_pairs))

# Search query: query='Current lighting customer reviews' time_period='1y'
1. Working at Current Lighting | Glassdoor - https://www.glassdoor.com/Overview/Working-at-Current-Lighting-EI_IE8575783.11,27.htm
Customer Relations Jobs. Administrative Assistant Jobs ... All answers shown come directly from Current Lighting Reviews and are not edited or altered.
2. [Geekerwan]Intel Lunar Lake in-depth review: Thin and light laptops ... - https://www.reddit.com/r/hardware/comments/1fuk76p/geekerwanintel_lunar_lake_indepth_review_thin_and/
Oct 2, 2024 ... I would also argue that the current generation of Snapdragon is ... client mobile. Upvote 2. Downvote Reply reply. Award
3. Customer reviews: CURRENT USA Orbit R24 Reef ... - Amazon.com - https://www.amazon.com/Current-USA-Aquarium-Wireless-Controller/product-reviews/B09423ZLWS
Find helpful customer reviews and review ratings for CURRENT USA Orbit R24 Reef LED Aquarium Light with Flex Arm Tank Mount | Wireless Light and Pump ...
4. Working at 

In [31]:
class QueryRevision(BaseModel):
    interpretation: str = Field(description="interpretation of the user's information need(s)")
    critique: str = Field(description="brief critique of the existing search results")
    queries: List[Query] = Field(description="list of additional relevant Google searches")

_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
Given the following question and a list of search results, provide a set of {num_queries} relevant Google searches that would make the search results more comprehensive.
First think about the user's question and provide your own interpretation of the information need(s).
Then think about the previous queries and search results and briefly explain how they are limited.
Then generate the additional queries including the time period for the search.
            """ + search_tips,
        ),
        (
            "human",
            """
User query: {query}

Existing search results:
{search_results}

Additional context:
{context}

Now, please provide your interpretation, and suggest a set of Google searches that are better than the ones we've tried so far.

            """,
        ),
    ]
)


llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
runnable = _prompt | llm.with_structured_output(QueryRevision)
first_refinement_result = runnable.invoke(
    {
        "query": query,
        "context": context,
        "num_queries": 2,
        "search_results": format_searches(query_results_pairs)
    }
)

first_refinement_result.critique

'The existing search results are limited as they include a mix of unrelated topics and reviews from various sources, not specifically focused on Current GE lighting. Additionally, some results are outdated or pertain to different products or brands, which may not provide the relevant insights the user is looking for.'

In [32]:
for query in first_refinement_result.queries:
    print(query)

query='Current GE lighting product reviews site:reddit.com' time_period='1y'
query='Current GE lighting customer feedback site:reddit.com' time_period='1y'


In [33]:
for query in first_refinement_result.queries[:2]:
    query_results_pairs.append((query, list(search(query.query, num=10, dateRestrict=query.time_period))))


In [34]:
print(format_searches(query_results_pairs))

# Search query: query='Current lighting customer reviews' time_period='1y'
1. Working at Current Lighting | Glassdoor - https://www.glassdoor.com/Overview/Working-at-Current-Lighting-EI_IE8575783.11,27.htm
Customer Relations Jobs. Administrative Assistant Jobs ... All answers shown come directly from Current Lighting Reviews and are not edited or altered.
2. [Geekerwan]Intel Lunar Lake in-depth review: Thin and light laptops ... - https://www.reddit.com/r/hardware/comments/1fuk76p/geekerwanintel_lunar_lake_indepth_review_thin_and/
Oct 2, 2024 ... I would also argue that the current generation of Snapdragon is ... client mobile. Upvote 2. Downvote Reply reply. Award
3. Customer reviews: CURRENT USA Orbit R24 Reef ... - Amazon.com - https://www.amazon.com/Current-USA-Aquarium-Wireless-Controller/product-reviews/B09423ZLWS
Find helpful customer reviews and review ratings for CURRENT USA Orbit R24 Reef LED Aquarium Light with Flex Arm Tank Mount | Wireless Light and Pump ...
4. Working at 

In [35]:
second_refinement_result = runnable.invoke(
    {
        "query": query,
        "context": context,
        "num_queries": 2,
        "search_results": format_searches(query_results_pairs)
    }
)

second_refinement_result.critique

'The existing search results are limited as they include a mix of unrelated topics and products, and many results do not directly address customer feedback on Current GE Lighting. Additionally, some results are outdated or pertain to other GE products rather than lighting specifically.'

In [36]:
second_refinement_result.queries

[Query(query='Current GE Lighting customer reviews site:reddit.com', time_period='1y'),
 Query(query='Current GE Lighting user experiences site:reddit.com', time_period='1y')]