In [2]:
from typing import NamedTuple, Mapping, List
from pprint import pprint

from core import CompanyProduct
from search import SearchResult
import news.search
import news.scrape
import news.summarize

class NewsSummary(NamedTuple):
    # input
    target: CompanyProduct

    # intermediates
    search_results: List[SearchResult]
    article_markdowns: Mapping[str, str]

    # output
    summary_markdown: str

# Let's try another company
target = CompanyProduct.same("98point6")

def process_news(target: CompanyProduct) -> NewsSummary:
    search_results = news.search.find_news_articles(target, num_results=30)

    print(f"{len(search_results)} URLs found: {search_results}")
    article_markdowns = {result.link: news.scrape.get_article_markdown(result.link) for result in search_results}

    article_markdown_list = [article for article in article_markdowns.values() if article]

    llm_result = news.summarize.summarize(target, article_markdown_list)

    return NewsSummary(
        target=target, 
        search_results=search_results, 
        article_markdowns=article_markdowns, 
        summary_markdown=llm_result.content
        )

summary = process_news(target)
pprint(summary)

30 URLs found: [SearchResult(title='98point6 Technologies Announces the Acquisition of Bright.md to ...', link='https://www.prnewswire.com/news-releases/98point6-technologies-announces-the-acquisition-of-brightmd-to-accelerate-the-launch-of-its-asynchronous-care-module-302034295.html', snippet='Jan 16, 2024 ... SEATTLE, Jan. 16, 2024 /PRNewswire/ -- 98point6 Technologies, a leader in licensed on-demand virtual care software, announced the addition of a new asynchronous\xa0...', formattedUrl='https://www.prnewswire.com/news.../98point6-technologies-announces-the-...'), SearchResult(title='98point6 hit by new layoffs in latest change at health tech startup ...', link='https://www.geekwire.com/2024/98point6-hit-by-new-layoffs-in-latest-change-at-health-tech-startup/', snippet='Apr 23, 2024 ... In March of last year, 98point6 announced that it was selling its virtual care platform and primary care business to Transcarent for $100 million in cash and\xa0...', formattedUrl='https://www.geekw

In [5]:
from markdown import markdown
import re

def write_summary(summary: NewsSummary):
    cleaned_markdown = re.sub(r"^([^-].*)\n-", r"\1\n\n-", summary.summary_markdown, flags=re.MULTILINE)

    with open(f"{summary.target.company}_news.html", "w") as f:
        f.write(markdown(cleaned_markdown))

write_summary(summary)