In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
from metaphor_python import Metaphor
import openai
from IPython.display import Markdown, display

openai.api_key = os.getenv("OPENAI_API_KEY")
metaphor = Metaphor(os.getenv("METAPHOR_API_KEY"))

In [2]:
def get_llm_response(system='You are a helpful assistant.', user = '', temperature = 1, model = 'gpt-3.5-turbo'):
    completion = openai.chat.completions.create(
        model=model,
        temperature=temperature,
        messages=[
            {'role': 'system', 'content': system},
            {'role': 'user', 'content': user},
        ]
    )
    return completion.choices[0].message.content

In [3]:
PROPERTY_TOPIC = 'Vintage Clark Kent figurine, possibly from Kenner 1986 "Super Powers Collection."'
STRUCTURE_TOPIC = 'Purchase roof shingles'

In [4]:
def create_keyword_query_generation_prompt(topic, n):
    return f"""I'm writing a research report on {topic} and need help coming up with Google keyword search queries.
Google keyword searches should just be a few words long. It should not be a complete sentence.
Please generate a diverse list of {n} Google keyword search queries that would be useful for writing a research report on ${topic}. Do not add any formatting or numbering to the queries."""

print(get_llm_response(
    system='The user will ask you to help generate some search queries. Respond with only the suggested queries in plain text with no extra formatting, each on it\'s own line.',
    user=create_keyword_query_generation_prompt(PROPERTY_TOPIC, 3),
))


Vintage Clark Kent figurine
Kenner 1986 Clark Kent figurine
Super Powers Collection Clark Kent figurine


In [5]:
def generate_search_queries(topic, n):
    user_prompt = create_keyword_query_generation_prompt(topic, n)
    completion = get_llm_response(
        system='The user will ask you to help generate some search queries. Respond with only the suggested queries in plain text with no extra formatting, each on it\'s own line.',
        user=user_prompt,
        temperature=1
    )
    queries = [s for s in completion.split('\n') if s.strip()][:n]
    return queries

In [6]:
prop_queries = generate_search_queries(PROPERTY_TOPIC, 3)
struct_queries = generate_search_queries(STRUCTURE_TOPIC, 3)

In [7]:
print(prop_queries)
print(struct_queries)

['vintage Clark Kent figurine', 'Kenner 1986 Clark Kent figurine', 'Super Powers Collection Clark Kent figurine']
['purchase roof shingles', 'buy roof shingles', 'roof shingles for sale']


In [8]:
def get_search_results(queries, type, linksPerQuery=1):
    results = []
    for query in queries:
        search_response = metaphor.search(query, type=type, num_results=linksPerQuery, use_autoprompt=False)
        results.extend(search_response.results)
    return results

In [13]:
struct_links = get_search_results(struct_queries, 'neural')

In [10]:
def display_base_models(base_models):
    """
    Display the title, url, and published_date of each BaseModel object in a list.

    Args:
        base_models (List[BaseModel]): List of BaseModel objects.

    """
    for model in base_models:
        print(f"Title: {model.title}")
        print(f"URL: {model.url}")
        print(f"Published Date: {model.published_date}")

In [14]:
display_base_models(struct_links)

Title: Roofing Selection - American Roofing & Renovation
URL: https://www.americanroofingandrenovation.com/roofing-selection
Published Date: 2023-02-05
Title: Discount Roofing Supplies – Smyrna, GA | Preferred Roofing Supply Supply
URL: https://www.preferredroofingsupply.com/smyrna-ga
Published Date: 2021-09-21
Title: Roofing | Premium Business Directories | Free Directory Submission Website
URL: https://www.blimey.us/roofing
Published Date: 2023-01-01


In [15]:
for link in struct_links:
    print(link)

Title: Roofing Selection - American Roofing & Renovation
URL: https://www.americanroofingandrenovation.com/roofing-selection
ID: S8nua4b2t1gi2P81QQJ81g
Score: 0.16696617007255554
Published Date: 2023-02-05
Author: None
Extract: None
Title: Discount Roofing Supplies – Smyrna, GA | Preferred Roofing Supply Supply
URL: https://www.preferredroofingsupply.com/smyrna-ga
ID: Pp4IS-Mcs_g24O_NTllo9w
Score: 0.15565849840641022
Published Date: 2021-09-21
Author: None
Extract: None
Title: Roofing | Premium Business Directories | Free Directory Submission Website
URL: https://www.blimey.us/roofing
ID: XNlXU9JJvs0uji4GwgeQLw
Score: 0.18237528204917908
Published Date: 2023-01-01
Author: None
Extract: None


In [11]:
prop_links = get_search_results(prop_queries, 'keyword')

In [12]:
for link in prop_links:
    print(link)

Title: Clark Kent Action Figure for sale - eBay
URL: https://www.ebay.com/b/Clark-Kent-Action-Figure/246/bn_7023242372
ID: 78ec10a3-cdaa-47c8-9d95-85f757098c63
Score: None
Published Date: None
Author: None
Extract: None


In [16]:
from bs4 import BeautifulSoup
import re

def clean_html_content(content: str) -> str:
    """
    Clean the HTML content using BeautifulSoup.

    Args:
        content (str): HTML content.

    Returns:
        str: Cleaned text content.
    """
    soup = BeautifulSoup(content, "html.parser")

    # Extract header and paragraph tags
    header_tags = soup.find_all(re.compile(r"^h\d$"))
    paragraph_tags = soup.find_all("p")

    # Strip HTML tags and collect text content
    stripped_content = ""
    for tag in header_tags + paragraph_tags:
        stripped_content += " " + tag.get_text().strip() + " "

    return ' '.join(stripped_content.split())

In [17]:
def get_page_contents(search_results):
    contents_response = metaphor.get_contents(search_results)
    return contents_response.contents

In [18]:
struct_content = get_page_contents([link.id for link in struct_links])

In [19]:
print(f"{struct_content[1].extract}")

<div><div>
<article>
	<div><div><p></p><h2>Roofing SuppliesSmyrna, GA</h2><p></p></div><div><div><p></p>
	</div><div><div><p></p><h2>TRUSTED ROOFING SUPPLY COMPANY</h2><p></p></div><div><p>Do you want to complete your roofing job efficiently and within budget? While it may seem like an impossible task, by choosing the best roofing supply company, having a seamless experience on your next project can actually be simple.  At Preferred Roofing Smyrna, Georgia, we understand roofing and siding projects aren't always expected. That's why we do everything we can to offer our customers the best prices on top brand roofing materials.</p></div></div></div><div><div><p>Whether you are a contractor, homeowner, builder, or local roofer, saving money is one of the most important factors when working on a construction project. Here are a few ways that Preferred Roofing Supply Smyrna helps our customers save money:</p></div><div><div><div><p></p><h2>01</h2><p></p></div>
<div><p>We offer a price match

In [23]:
struct_content[2]

DocumentContent(id='XNlXU9JJvs0uji4GwgeQLw', url='https://www.blimey.us/roofing', title='Roofing | Premium Business Directories | Free Directory Submission Website', extract='<div>\n<section>\n <header>\n <div>\n <p><a href="https://www.blimey.us/index.php?route=common/home"></a></p>\n </div>\n </header>\n</section>\n<div>\n <div>\n \n <p>\n Show: \n Sort By: \n </p>\n \n <div> \n <div>\n <p>\n \uf1ba\n \n \n \n </p>\n </div>\n <nav>\n <ul>\n <li><h2> All American....</h2></li> \n <li><i>\uf0ac </i>Composition shingles are affordable, durable....</li>\n \n <li> <a href="https://www.blimey.us/detail/6602/all-american-roofing---shingle-roofing-johnson-county.html"> VIEW BUSSINESS </a></li>\n </ul>\n </nav>\n </div> \n \n <div> \n <div>\n <p>\n \uf1ba\n \n \n \n </p>\n </div>\n <nav>\n <ul>\n <li><h2> Are You Look....</h2></li> \n <li><i>\uf0ac </i>When wind, weather or simple time has made y....</li>\n \n <li> <a href="https://www.blimey.us/detail/7078/are-you-looking-for-roof-maintenanc

In [24]:
def create_web_content_string(search_contents: list, char_limit: int = 30000) -> str:
    """
    Synthesize a report from search contents.

    Args:
        search_contents (list): List of search contents.
        char_limit (int, optional): Total character limit. Defaults to 30000.

    Returns:
        str: Synthesized report.
    """
    total_chars = sum([len(clean_html_content(item.extract)) for item in search_contents])
    inputData = ''

    for item in search_contents:
        cleaned_content = clean_html_content(item.extract)
        item_chars = len(cleaned_content)
        slice_ratio = item_chars / total_chars
        slice_limit = int(char_limit * slice_ratio)
        sliced_content = cleaned_content[:slice_limit]

        inputData += f'--START ITEM--\nURL: {item.url}\nTITLE: {item.title}\nCONTENT: {sliced_content}\n--END ITEM--\n'

    return inputData
    # return get_llm_response(
    #     system='You are a helpful research assistant. Write a report according to the user\'s instructions.',
    #     user='Input Data:\n' + inputData + f'Write a two paragraph research report about {topic} based on the provided information. Include as many sources as possible. Provide citations in the text using footnote notation ([#]). First provide the report, followed by a single "References" section that lists all the URLs used, in the format [#] .',
    #     model='gpt-4' # want a better report? use gpt-4
    # )

In [28]:
prop_report = create_web_content_string(struct_content)

In [41]:
def format_for_markdown(text: str) -> str:
    """
    Formats the given text for markdown.

    Args:
        text (str): The text to be formatted.

    Returns:
        str: The formatted text.
    """
    # Split the text into items
    items = text.split("--END ITEM--")
    
    # Process each item
    formatted_items = []
    for item in items:
        if item.strip() == "":
            continue

        # Remove START ITEM tag and split into lines
        lines = item.replace("--START ITEM--", "").strip().split(" ")

        # Initialize formatted item
        formatted_item = "\n\n"

        # Add each line with a newline at the end
        for line in lines:
            if "URL:" in line or "TITLE:" in line:
                formatted_item += "<br/>" + line
            elif "CONTENT:" in line:
                formatted_item += "<br/>" + line + "<br/>"
            else:
                formatted_item += " " + line

        formatted_items.append(formatted_item.strip())

    return "<br/><br/>".join(formatted_items)

# Your provided text
text = """
--START ITEM-- URL: https://www.stanley12volt.com/ TITLE: HOME | stanley12volt CONTENT: WATCH & LEARN Watch and Learn Learn more about your favorite products! Check out the Watch & Learn Section to see more videos Subscribe for Updates Congrats! You’re subscribed --END ITEM-- --START ITEM-- URL: https://www.woodcraft.com/categories/planes TITLE: Hand Planes for Sale from WoodRiver, Stanley, Veritas & More | Woodcraft CONTENT: Woodworking Hand Planes This Simple Yet Versatile Tool Deserves a Place In Your Workshop Hand Plane Resources & Videos Hand Planes are deceptively simple tools that when used properly can deliver speed and precision not possible with power tools or abrasives. Prior to the introduction of motorized power tools, hand planes bridged the gap between rough lumber and finished projects. A properly tuned hand plane can give you superior results when it comes to smoothing and other fine work. Learn more about this versatile tool through the articles and videos below, and be sure to check out our Woodcraft's Hand Plane Buying Guide! Top of Page --END ITEM-- --START ITEM-- URL: https://www.k-bid.com/auction/44829/item/203 TITLE: Vintage Stanley Woodworking Planer No. 602 CONTENT: Pick up is located at the back entrance of the warehouse.
"""

# Format the text for markdown
formatted_text = format_for_markdown(text)
Markdown(f"{formatted_text}")

URL: https://www.stanley12volt.com/
 TITLE: HOME | stanley12volt
 CONTENT:
 WATCH & LEARN Watch and Learn Learn more about your favorite products! Check out the Watch & Learn Section to see more videos Subscribe for Updates Congrats! You’re subscribed

URL: https://www.woodcraft.com/categories/planes
 TITLE: Hand Planes for Sale from WoodRiver, Stanley, Veritas & More | Woodcraft
 CONTENT:
 Woodworking Hand Planes This Simple Yet Versatile Tool Deserves a Place In Your Workshop Hand Plane Resources & Videos Hand Planes are deceptively simple tools that when used properly can deliver speed and precision not possible with power tools or abrasives. Prior to the introduction of motorized power tools, hand planes bridged the gap between rough lumber and finished projects. A properly tuned hand plane can give you superior results when it comes to smoothing and other fine work. Learn more about this versatile tool through the articles and videos below, and be sure to check out our Woodcraft's Hand Plane Buying Guide! Top of Page

URL: https://www.k-bid.com/auction/44829/item/203
 TITLE: Vintage Stanley Woodworking Planer No. 602
 CONTENT:
 Pick up is located at the back entrance of the warehouse.

In [42]:
formatted_text = format_for_markdown(prop_report)
Markdown(f"{formatted_text}")

URL:
 https://www.americanroofingandrenovation.com/roofing-selection
TITLE: Roofing Selection - American Roofing &
 Renovation
CONTENT:
 Roofing Material Selection Landmark BestBuy Series The CertainTeed Landmark series is a durable, beautiful color-blended line of shingles. The widest array of colors to suit any trim, stucco or siding color. Manufactured with self-sealing adhesive strips and a wide nailing zone, larger than competitors for ease of installation so your roofing job is complete in no time. Specifications & Technical Data CertainTeed XT25 The CertainTeed XT25 shingles are lightweight and easy to handle. Affordable with a beautiful design and many color choices, the CertainTeed XT25 Series might be just what you need for your home. Talk to your American Roofing & Renovation specialist to see which shingles are right for your home. If you are looking for Architectural Shingles with staying power, your search stops here. When it comes to weathering the elements, XT 25 is at the top of its class. Specifications & Technical Data CertainTeed Belmont® Luxury Shingles When it comes to replicating the authentic appearance of natural slate, Belmont® truly hits the mark. This luxury shingle’s layered construction and blended coloration – which includes rich hues and high contrast shadow lines – artfully emulate the classic slate roof look. Made from top grade roofing asphalt that is reinforced with a strong fiber glass base mat, Belmont is built to endure, and comes backed by a lifetime-limited manufacturer warranty. If you are looking for Luxury Shingles with staying power, your search stops here. When it comes to weathering the elements, Belmont is at the top of its class. Specifications & Technical Data Smith Built Metal Roofs Smith Built stands behind the products we sell. We differentiate ourselves by only selling roofing panels that are painted domestically and that we warranty for 40 years. We offer a variety of colors and sizes. If you would like to see a sample of any of our products, please contact American Roofing & Renovation and we’ll

URL:
 https://www.preferredroofingsupply.com/smyrna-ga
TITLE: Discount Roofing Supplies – Smyrna, GA | Preferred Roofing Supply
 Supply
CONTENT:
 Roofing SuppliesSmyrna, GA TRUSTED ROOFING SUPPLY COMPANY 01 02 03 04 Roofing Products Do you want to complete your roofing job efficiently and within budget? While it may seem like an impossible task, by choosing the best roofing supply company, having a seamless experience on your next project can actually be simple. At Preferred Roofing Smyrna, Georgia, we understand roofing and siding projects aren't always expected. That's why we do everything we can to offer our customers the best prices on top brand roofing materials. Whether you are a contractor, homeowner, builder, or local roofer, saving money is one of the most important factors when working on a construction project. Here are a few ways that Preferred Roofing Supply Smyrna helps our customers save money: We offer a price match guarantee, so you don't have to waste time shopping around for the best prices; you already know we match competitive prices on roofing supplies and materials. We pass along our savings to customers. Through the years, we have developed trusted relationships with our distributors. This enables us to stock a large inventory of roofing materials and supplies at low prices. Any savings we receive, we pass along to you. One-stop-shop for roofing materials and supplies. Time is money. By stocking roofing tools, siding, asphalt shingles, metal roofing materials, dumpsters, and more, you can find everything you need at Preferred Roofing Supply Smyrna, which means you aren't driving to various stores looking for materials. We truly are a one-stop-shop. Preferred Roofing Supply Smyrna provides a free estimate so you can budget accordingly. Whether you are a contractor, homeowner, builder, or local roofer, saving money is one of the most important factors when working on a construction project. Here are a few ways that Preferred Roofing Supply Smyrna helps our customers save money: Choosing the best materials for your project is the best way to save money in the long run. Investing in quality tools and supplies preserves your investment. At Preferred Roofing Supply Smyrna, we have over 12 years of experience in the construction industry. Whether you need advice on what siding materials to select or what tool is best for the job, we can help. Our knowledge of Georgia construction codes and laws means we can be of even greater assistance to you. In addition, Preferred Roofing Supply Smyrna stocks large quantities of roofing supplies because we know emergencies happen. We don't want our customers to be caught off guard. Our trusted advantage is part of what sets us apart from other roofing supply companies. More than just a roofing supply company, our roofing experts are available to help you find the best materials for your project or offer any needed advice, and help you stay within budget.We value our relationships with our suppliers, local roofers, homeowners, and contractors. Keeping your roofing project on time and budget is our goal. At Preferred Roofing Supply Smyrna, we take the extra steps to so you don’t have to. When you need the best roofing products at the best prices, guaranteed, turn to Preferred Roofing Supply Smyrna, Georgia. We carry

URL:
 https://www.blimey.us/roofing
TITLE: Roofing | Premium Business Directories | Free Directory Submission
 Website
CONTENT:
 All American.... Are You Look.... Century Roof.... Commercial R.... Commercial R.... Commercial R.... Show: Sort By:      

In [29]:
print(f"{prop_report}")

--START ITEM--
URL: https://www.americanroofingandrenovation.com/roofing-selection
TITLE: Roofing Selection - American Roofing & Renovation
CONTENT: Roofing Material Selection Landmark BestBuy Series The CertainTeed Landmark series is a durable, beautiful color-blended line of shingles. The widest array of colors to suit any trim, stucco or siding color. Manufactured with self-sealing adhesive strips and a wide nailing zone, larger than competitors for ease of installation so your roofing job is complete in no time. Specifications & Technical Data CertainTeed XT25 The CertainTeed XT25 shingles are lightweight and easy to handle. Affordable with a beautiful design and many color choices, the CertainTeed XT25 Series might be just what you need for your home. Talk to your American Roofing & Renovation specialist to see which shingles are right for your home. If you are looking for Architectural Shingles with staying power, your search stops here. When it comes to weathering the elements, 

In [234]:
def researcher(topic, n_queries, n_links_per_query):
    search_queries = generate_search_queries(topic, n_queries)
    print(search_queries)
    search_results = get_search_results(search_queries, 'keyword', n_links_per_query)
    search_contents = get_page_contents([link.id for link in search_results])
    report = synthesize_report(topic, search_contents)
    return report

In [235]:
research = researcher(PROPERTY_TOPIC, 2, 2)

['vintage Clark Kent figurine', 'Kenner 1986 Super Powers Collection']


In [236]:
print(f"{research}")

The Vintage Clark Kent action figure is potentially part of the 'Super Powers Collection,' a line of action figures produced and made available by Kenner Products, an American toy company, between the years 1984-1986[^2^]. The collection was known to feature characters from DC Comics[^2^], indicating a strong possibility that characters such as Clark Kent, the civilian identity of Superman, would have been part of the product line. With the figures compounded of high quality plastic material[^2^], the popularity and collectibility of these figures have not diminished over the years. 

On online platforms such as eBay, various iconic action figure collectibles including the Clark Kent figure can be found[^1^]. Fans and collectors alike have taken to such platforms to buy, sell or trade these items. Therefore, it is fair to conclude that even after decades of their initial release, these Kenner action figures, inclusive of the Clark Kent model, continue to sustain appeal in today's marke

In [241]:
response = metaphor.search("ceiling drywall",
    num_results=5,
    type='keyword',
    start_published_date="2023-06-12"
)

In [242]:
print(response)

Title: How to Hang Drywall Like a Pro (DIY) - Family Handyman
URL: https://www.familyhandyman.com/project/how-to-hang-drywall-like-a-pro/
ID: cfbd3af3-52c9-4457-b897-30805375d47f
Score: None
Published Date: 2023-09-26T12:00:00.000Z
Author: None
Extract: None

Title: USG Sheetrock Brand 1/2 in. x 4 ft. x 8 ft. UltraLight Drywall 14113411708
URL: https://www.homedepot.com/p/USG-Sheetrock-Brand-1-2-in-x-4-ft-x-8-ft-UltraLight-Drywall-14113411708/202530243
ID: 03c5ea62-5489-449d-bfef-d954951799d4
Score: None
Published Date: 2023-11-21T12:00:00.000Z
Author: None
Extract: None

Title: Sheetrock vs. Drywall: What's the Difference? - Angie's List
URL: https://www.angi.com/articles/sheetrock-vs-drywall.htm
ID: ae79c4e8-94fc-4d88-954b-39fab16a13ce
Score: None
Published Date: 2023-10-11T12:00:00.000Z
Author: None
Extract: None

Title: Drywall Ceiling Cost to Install or Replace (2023 Update) - HomeGuide
URL: https://homeguide.com/costs/drywall-ceiling-cost
ID: 346338ca-b367-468e-9daa-08a45dacd97c
