In [23]:
from typing import List
from langchain.prompts import PromptTemplate
from langchain_ollama import ChatOllama
from langchain.schema import StrOutputParser
import requests

from bs4 import BeautifulSoup


class HackerNewsFetcher:
    """A class to fetch articles from Hacker News."""

    def __init__(self):
        self.base_url = "https://hacker-news.firebaseio.com/v0"
		

    def _fetch_article(self, article_id: int) -> dict:
        """Fetch an article from the Hacker News API.

        Args:
            article_id (int): The ID of the article to fetch.

        Returns:
            dict: A dictionary containing the article's data.
        """
        try:
            post = requests.get(f"{self.base_url}/item/{article_id}.json").json()
            return post
        except requests.RequestException as e:
            print(f"Failed to fetch article {article_id}: {e}")
            return None

    def _fetch_content(self, url: str) -> str:
        """Fetch the content of a web page.

        Args:
            url (str): The URL of the web page.

        Returns:
            str: The content of the web page as text.
        """
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.content, "html.parser")
            return soup.get_text()
        except requests.RequestException as e:
            print(f"Failed to fetch content from {url}: {e}")
            return ""
        

    def fetch_articles(self, top_k: int) -> List[dict]:
        """Fetch the top K articles from Hacker News.

        Args:
            top_k (int): The number of articles to fetch.

        Returns:
            List[dict]: A list of dictionaries containing the article's data.
        """
        response = requests.get(f"{self.base_url}/topstories.json")
        trending_list = response.json()

        articles = []
        for id in trending_list[:top_k]:
            post = self._fetch_article(id)
            if post and "url" in post:
                content = self._fetch_content(post["url"])
                article_data = {
                    "content": content,
                    "title": post["title"],
                    "url": post["url"],
                }
                articles.append(article_data)

        return articles


class ArticleSummarizer:
    """A class to summarize articles fetched from Hacker News."""

    def __init__(self):
        self.prompt_template = """
            Summarize the following article from HackerNews. Provide a brief summary in the following format:
            URL:
            Summary:
            
			The full post can be found at.
            
            Article:
            Title: {title}
            Content: {content}
            URL: {url}

            Summary:
            """

    def summarize_articles(self, articles: List[dict]) -> str:
        """Summarize a list of articles fetched from Hacker News.

        Args:
            articles (List[dict]): A list of dictionaries containing the article's data.

        Returns:
            str: A string summarizing each article.
        """
        llm = ChatOllama(model="llama3.1:8b", temperature=0)
        prompt_template = PromptTemplate(
            input_variables=["title", "content", "url"], template=self.prompt_template
        )

        chain = (prompt_template | llm | StrOutputParser()).invoke

        summaries = []
        for article in articles:
            summary = chain(
                {
                    "title": article["title"],
                    "content": article["content"],
                    "url": article["url"],
                }
            )
            summaries.append(summary)

        return "\n-------------\n".join(summaries)

In [24]:
# Main execution
fetcher = HackerNewsFetcher()
articles = fetcher.fetch_articles(top_k=4)

In [25]:
articles

[{'content': 'Create Calendar Entries with Anthropic Claude 3.5Greg’s RamblingsFollowGreg’s RamblingsFollowStupid but useful AI tricks: Creating calendar entries from an image using Anthropic Claude 3.5Greg Wilson·Aug 25, 2024·4 min readA few days ago, my jazz piano teacher sent me the new fall/winter schedule for my private jazz piano lessons -- 13 different dates -- as a JPG (mine are outlined in green marker):\n\nI was too lazy to go make 13 entries in Google Calendar, so I decided to see if Claude could help me out:\nI first uploaded the jpg to Claude 3.5 Sonnet)...\n\nMy prompt: List the dates that are outlined in green\nThe dates outlined in green in the image are:\nFriday, 13-Sep-24 Friday, 27-Sep-24 Friday, 11-Oct-24 Friday, 8-Nov-24 Friday, 6-Dec-24 Friday, 20-Dec-24 Friday, 17-Jan-25 Friday, 31-Jan-25 Friday, 28-Feb-25 Friday, 14-Mar-25 Friday, 11-Apr-25 Friday, 25-Apr-25 Friday, 23-May-25\nThese dates correspond to the Friday column for students who have their first every-ot

In [26]:

summarizer = ArticleSummarizer()
summaries = summarizer.summarize_articles(articles)
print(summaries)

https://gregsramblings.com/stupid-but-useful-ai-tricks-creating-calendar-entries-from-an-image-using-anthropic-claude-35
Summary: A user used Anthropic Claude 3.5 to create an iCalendar file (.ics) from a JPG image of their jazz piano lesson schedule, saving them the hassle of manually entering each appointment into Google Calendar.
-------------
The text discusses the importance of thinking in graphs rather than lists when it comes to network security. It highlights how attackers can use visualizations and connections between systems to compromise a High Value Asset (HVA) by targeting dependent elements, such as terminal servers, admin accounts, and certificate authorities.

To protect against this, defenders should:

1. Visualize their network as a graph.
2. Implement controls to prune the graph, such as:
	* Reducing unwanted edges that create huge connectivity bursts.
	* Minimizing the number of admins using Just-In-Time/Just Enough techniques.
	* Using two-factor authentication.
	*

In [14]:
articles

[{'content': 'Create Calendar Entries with Anthropic Claude 3.5Greg’s RamblingsFollowGreg’s RamblingsFollowStupid but useful AI tricks: Creating calendar entries from an image using Anthropic Claude 3.5Greg Wilson·Aug 25, 2024·4 min readA few days ago, my jazz piano teacher sent me the new fall/winter schedule for my private jazz piano lessons -- 13 different dates -- as a JPG (mine are outlined in green marker):\n\nI was too lazy to go make 13 entries in Google Calendar, so I decided to see if Claude could help me out:\nI first uploaded the jpg to Claude 3.5 Sonnet)...\n\nMy prompt: List the dates that are outlined in green\nThe dates outlined in green in the image are:\nFriday, 13-Sep-24 Friday, 27-Sep-24 Friday, 11-Oct-24 Friday, 8-Nov-24 Friday, 6-Dec-24 Friday, 20-Dec-24 Friday, 17-Jan-25 Friday, 31-Jan-25 Friday, 28-Feb-25 Friday, 14-Mar-25 Friday, 11-Apr-25 Friday, 25-Apr-25 Friday, 23-May-25\nThese dates correspond to the Friday column for students who have their first every-ot

In [None]:
base_url = "https://hacker-news.firebaseio.com/v0"
response = requests.get(f"{base_url}/topstories.json")

In [None]:
trending_list = response.json()
top_k = 2

In [None]:
chain = (prompt_template | llm | StrOutputParser()).invoke

In [1]:
from typing import List
import requests

from bs4 import BeautifulSoup
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_ollama import ChatOllama
from langchain.schema import StrOutputParser


class HackernewsNewestFetcher:
    def __init__(self):
        self.base_url = "https://hacker-news.firebaseio.com/v0"

    def fetch_articles(self, top_k: int) -> List[dict]:
        response = requests.get(f"{self.base_url}/topstories.json?print=pretty")
        trending_list = response.json()
        articles = []

        for id in trending_list[:top_k]:
            post = requests.get(f"{self.base_url}/item/{id}.json?print=pretty").json()
            if "url" in post:
                try:
                    content = self.fetch_content(post["url"])
                    articles.append(
                        {"content": content, "title": post["title"], "url": post["url"]}
                    )

                except requests.RequestException as e:
                    print(f"Can't download {post}, skipped. Error: {e}")
                # except:
                # 	print(f"Can't download {post}, skipped")
            elif "text" in post:
                articles.append(
                    {
                        "content": post["text"],
                        "title": post["title"],
                        "url": f"https://news.ycombinator.com/item?id={id}",
                    }
                )

        return articles

    def fetch_content(self, url: str) -> str:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        return soup.get_text()


def summarize_articles(articles: List[dict]) -> str:
    prompt_template = """
	You will be provided a few of the top posts in HackerNews, followed by their URL.
	For each post, provide a brief summary followed by the URL the full post can be found at.

	Posts:
	{article_summaries}
	"""

    # Prepare the article summaries
    article_summaries = "\n\n".join(
        [
            f"Content: {article['content']}\nURL: {article['url']}"
            for article in articles
        ]
    )

    prompt = PromptTemplate(
        input_variables=["article_summaries"], template=prompt_template
    )

    llm = ChatOllama(
        model="llama3.1:8b",
        temperature=0,
    )

    output_parser = StrOutputParser()

    chain = prompt | llm | output_parser

    output = chain.invoke(
        {
            "article_summaries": article_summaries,
        }
    )
    return output

In [2]:
# Main execution
fetcher = HackernewsNewestFetcher()
articles = fetcher.fetch_articles(top_k=2)
summaries = summarize_articles(articles)
print(summaries)

Here's a brief summary of each post followed by the URL where the full content can be found:

**Post 1:**
Summary: The author uses Anthropic Claude 3.5 to create calendar entries from an image of their jazz piano lesson schedule. They upload the image, ask Claude to identify the dates outlined in green, and then use those dates to create a calendar ICS file with appointments for each date at 2pm Pacific Time.
URL: https://gregsramblings.com/stupid-but-useful-ai-tricks-creating-calendar-entries-from-an-image-using-anthropic-claude-35

**Post 2:**
Summary: The author asks the community what they're working on and if they have any new ideas. This is a general question with no specific content.
URL: https://news.ycombinator.com/item?id=41342017
