# LLM as the entrypoint

- Inspired from https://blog.apiad.net/p/building-a-perplexity-ai-clone and others to build Perplexity.ai clones

In [27]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages.ai import AIMessage

from pydantic import BaseModel, Field
from typing import List

class Query(BaseModel):
    query: str = Field(description="Google search query")
    time_period: str = Field(description="The time period for the search (all, 5y, 1y, 1m, 1w, 1d, etc.)")

class QueryExpansion(BaseModel):
    interpretation: str = Field(description="interpretation of the user's information need(s)")
    queries: List[Query] = Field(description="list of relevant Google searches")

_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
Given the following question, provide a set of {num_queries} relevant Google searches that would answer the question. 
First think about the user's question and provide your own interpretation of the information need(s).
Then generate the relevant queries including the time period for the search.
            """,
        ),
        (
            "human",
            """
User query: {query}
            """,
        ),
    ]
)



llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
runnable = _prompt | llm.with_structured_output(QueryExpansion)
result = runnable.invoke(
    {
        "query": "Is Pomelo Care a good company to work for?",
        "num_queries": 2
    }
)

result

QueryExpansion(interpretation='The user is looking for reviews, employee experiences, and overall company culture at Pomelo Care to determine if it is a good workplace.', queries=[Query(query='Pomelo Care employee reviews 2023', time_period='1y'), Query(query='Pomelo Care company culture and work environment', time_period='1y')])

In [28]:
for query in result.queries:
    print(query)

query='Pomelo Care employee reviews 2023' time_period='1y'
query='Pomelo Care company culture and work environment' time_period='1y'


In [29]:
from src.utils.google_search import search

query_results_pairs = []

for query in result.queries[:2]:
    query_results_pairs.append((query, list(search(query.query, num=10, dateRestrict=query.time_period))))


[32m2024-08-27 15:18:37.361[0m | [34m[1mDEBUG   [0m | [36mgoogle_search[0m:[36msearch[0m:[36m58[0m - [34m[1mGoogle search results: {'kind': 'customsearch#search', 'url': {'type': 'application/json', 'template': 'https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={safe?}&cx={cx?}&sort={sort?}&filter={filter?}&gl={gl?}&cr={cr?}&googlehost={googleHost?}&c2coff={disableCnTwTranslation?}&hq={hq?}&hl={hl?}&siteSearch={siteSearch?}&siteSearchFilter={siteSearchFilter?}&exactTerms={exactTerms?}&excludeTerms={excludeTerms?}&linkSite={linkSite?}&orTerms={orTerms?}&dateRestrict={dateRestrict?}&lowRange={lowRange?}&highRange={highRange?}&searchType={searchType}&fileType={fileType?}&rights={rights?}&imgSize={imgSize?}&imgType={imgType?}&imgColorType={imgColorType?}&imgDominantColor={imgDominantColor?}&alt=json'}, 'queries': {'request': [{'title': 'Google Custom Search - Pomelo Care employee reviews 2023', 'totalResults': '1400

In [30]:

def format_search(query, results):
    result_str = "\n".join(f"{i+1}. {result.title} - {result.link}\n{result.snippet}" for i, result in enumerate(results))
    return f"# Search query: {query}\n{result_str}"

def format_searches(query_results_pairs):
    return "\n\n".join(format_search(query, results) for query, results in query_results_pairs)

print(format_searches(query_results_pairs))

# Search query: query='Pomelo Care employee reviews 2023' time_period='1y'
1. Pomelo Care : r/dietetics - https://www.reddit.com/r/dietetics/comments/18lkni7/pomelo_care/
Dec 18, 2023 ... Seems like a good job but when I interviewed they didn't really have lots of answers for my questions! But like I said, start ups are iffy ...
2. Pomelo Reviews: What Is It Like to Work At Pomelo?<!-- --> - https://www.glassdoor.com/Reviews/Pomelo-Reviews-E4407541.htm
Pomelo has an employee rating of 4.4 out of 5 stars, based on 18 company reviews on Glassdoor which indicates that most employees have an excellent working ...
3. Pomelo Care | LinkedIn - https://www.linkedin.com/company/pomelo-care
Jul 20, 2024 ... Employees at Pomelo Care. Click here to view Ron Shah's profile. Ron ... If you're at #HLTH2023, come say hi to Marta Bralic Kerns today ...
4. Pomelo Health Reviews: What Is It Like to Work At Pomelo Health ... - https://www.glassdoor.com/Reviews/Pomelo-Health-Reviews-E4602616.htm
Okay. Devo

In [32]:
class QueryRevision(BaseModel):
    interpretation: str = Field(description="interpretation of the user's information need(s)")
    critique: str = Field(description="brief critique of the existing search results")
    queries: List[Query] = Field(description="list of additional relevant Google searches")

_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
Given the following question and a list of search results, provide a set of {num_queries} relevant Google searches that would make the search results more comprehensive.
First think about the user's question and provide your own interpretation of the information need(s).
Then think about the previous queries and search results and briefly explain how they are limited.
Then generate the additional queries including the time period for the search.
            """,
        ),
        (
            "human",
            """
User query: {query}

Existing search results:
{search_results}
            """,
        ),
    ]
)


llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
runnable = _prompt | llm.with_structured_output(QueryRevision)
first_refinement_result = runnable.invoke(
    {
        "query": "Is Pomelo Care a good company to work for?",
        "num_queries": 2,
        "search_results": format_searches(query_results_pairs)
    }
)

first_refinement_result.critique

'The existing search results primarily focus on employee reviews and company culture but are limited in scope. They do not include broader perspectives such as comparisons with similar companies, long-term employee satisfaction trends, or insights from former employees who may have left the company. Additionally, the time frame of the searches is limited to the past year, which may not capture longer-term trends or changes in the company.'

In [33]:
for query in first_refinement_result.queries:
    print(query)

query='Pomelo Care employee reviews and ratings 2022' time_period='2y'
query='Pomelo Care company culture and employee satisfaction over the years' time_period='5y'


In [34]:
for query in first_refinement_result.queries[:2]:
    query_results_pairs.append((query, list(search(query.query, num=10, dateRestrict=query.time_period))))


[32m2024-08-27 15:20:31.128[0m | [34m[1mDEBUG   [0m | [36mgoogle_search[0m:[36msearch[0m:[36m58[0m - [34m[1mGoogle search results: {'kind': 'customsearch#search', 'url': {'type': 'application/json', 'template': 'https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={safe?}&cx={cx?}&sort={sort?}&filter={filter?}&gl={gl?}&cr={cr?}&googlehost={googleHost?}&c2coff={disableCnTwTranslation?}&hq={hq?}&hl={hl?}&siteSearch={siteSearch?}&siteSearchFilter={siteSearchFilter?}&exactTerms={exactTerms?}&excludeTerms={excludeTerms?}&linkSite={linkSite?}&orTerms={orTerms?}&dateRestrict={dateRestrict?}&lowRange={lowRange?}&highRange={highRange?}&searchType={searchType}&fileType={fileType?}&rights={rights?}&imgSize={imgSize?}&imgType={imgType?}&imgColorType={imgColorType?}&imgDominantColor={imgDominantColor?}&alt=json'}, 'queries': {'request': [{'title': 'Google Custom Search - Pomelo Care employee reviews and ratings 2022', 'totalRes

In [35]:
print(format_searches(query_results_pairs))

# Search query: query='Pomelo Care employee reviews 2023' time_period='1y'
1. Pomelo Care : r/dietetics - https://www.reddit.com/r/dietetics/comments/18lkni7/pomelo_care/
Dec 18, 2023 ... Seems like a good job but when I interviewed they didn't really have lots of answers for my questions! But like I said, start ups are iffy ...
2. Pomelo Reviews: What Is It Like to Work At Pomelo?<!-- --> - https://www.glassdoor.com/Reviews/Pomelo-Reviews-E4407541.htm
Pomelo has an employee rating of 4.4 out of 5 stars, based on 18 company reviews on Glassdoor which indicates that most employees have an excellent working ...
3. Pomelo Care | LinkedIn - https://www.linkedin.com/company/pomelo-care
Jul 20, 2024 ... Employees at Pomelo Care. Click here to view Ron Shah's profile. Ron ... If you're at #HLTH2023, come say hi to Marta Bralic Kerns today ...
4. Pomelo Health Reviews: What Is It Like to Work At Pomelo Health ... - https://www.glassdoor.com/Reviews/Pomelo-Health-Reviews-E4602616.htm
Okay. Devo

In [25]:
second_refinement_result = runnable.invoke(
    {
        "query": "Is 98point6 a good company to work for?",
        "num_queries": 2,
        "search_results": format_searches(query_results_pairs)
    }
)

second_refinement_result.critique

'The existing search results primarily focus on employee reviews and ratings from platforms like Glassdoor and Indeed, but they lack in-depth analysis of specific aspects such as employee benefits, work-life balance, and comparisons with other companies. Additionally, the time frames of the searches are limited, which may not capture the full scope of employee experiences over time.'

In [26]:
second_refinement_result.queries

[Query(query='98point6 employee benefits and work-life balance reviews', time_period='5y'),
 Query(query='98point6 company culture and employee satisfaction comparison with similar companies', time_period='5y')]