In [1]:
import requests
import xmltodict
from pydantic import BaseModel

In [2]:
class Article(BaseModel):
    title: str
    link: str

In [3]:
# Hacker News RSS
URL = "https://news.ycombinator.com/rss"


def find_articles() -> list[Article]:
    """Finds a top AI-related article from HackerNews"""
    rss_response = requests.get(URL)
    data = xmltodict.parse(rss_response.content)

    # Find all articles with these keywords
    keywords = {
        "ai",
        "genai",
        "lightning",
        "pytorch",
        "llm",
        "llms",
        "ml",
        "rag",
        "nlp",
        "openai",
        "gemma",
        "anthropic",
    }

    articles = []
    for item in data["rss"]["channel"]["item"]:
        title = item["title"].lower()
        link = item["link"]

        # Skip articles from Hacker News, Show HN, and job postings
        if (
            link.startswith("https://news.ycombinator.com")
            or title.startswith("Show HN")
            or "is hiring" in title
        ):
            continue

        # Check if any of the keywords are in the title
        if any(word in title.split(" ") for word in keywords):
            articles.append(Article(title=title, link=link))

    return articles

In [4]:
articles = find_articles()
for article in articles:
    print(f"{article.title} - {article.link}")

llm.c – llm training in simple, pure c/cuda - https://github.com/karpathy/llm.c
hello olmo: a truly open llm - https://blog.allenai.org/hello-olmo-a-truly-open-llm-43f7e7359222?gi=760105621962
after ai beat them, professional go players got better and more creative - https://www.henrikkarlsson.xyz/p/go


In [5]:
class ArticlePage(BaseModel):
    title: str
    description: str
    content: str

In [6]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

In [7]:
def fetch_article_page(url: str):
    """Fetches the article page"""
    options = Options()
    options.add_argument("--headless=new")
    driver = webdriver.Chrome(options=options)
    driver.get(url)

    # wait a few seconds for ajax content to load
    time.sleep(5)

    # grabs all the visible content of the page
    content = driver.find_element(by=By.CSS_SELECTOR, value="body").text

    # metadata
    head_html = driver.execute_script("return document.head.innerHTML;")
    head_soup = BeautifulSoup(head_html, "html.parser")

    driver.quit()

    # grab seo metadata for title and description
    title = head_soup.title.text

    desc_tag = head_soup.find("meta", attrs={"name": "description"})

    if desc_tag is None:
        # Fall back to open graph tags
        desc_tag = head_soup.find("meta", attrs={"name": "og:description"})

    description = ""

    if desc_tag is not None:
        description = desc_tag["content"]

    return ArticlePage(title=title, description=description, content=content)

In [28]:
import re
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [39]:
prompt_template = """
The content of an article is:

{content}

Write some commentary about about a key point of the article's contents and encourage the reader to check it out in a professinal tone for a Twitter post.
The entire post should be two sentences. Use a couple emojis too.
Respond with just the post, no additional commentary, no notes, no link.
"""

model = ChatOpenAI(base_url="https://api.together.xyz/v1", model="mistralai/Mixtral-8x7B-Instruct-v0.1")
prompt = PromptTemplate.from_template(prompt_template)
parser = StrOutputParser()

chain = prompt | model | parser


def generate_post(article: ArticlePage):
    max_content_length = 7000
    content = (
        len(article.content) > max_content_length
        and article.content[:max_content_length] + "..."
        or article.content
    )
    output = chain.invoke({"content": article.content})
    output = output.strip().strip('"')
    output = re.sub(r"#\w+", "", output)    
    output = re.sub(r"\s+", " ", output)
    return output

In [10]:
import os
from requests_oauthlib import OAuth1Session

In [11]:
def post_twitter(post_content: str, url: str):
    """Posts the content to Twitter"""
    oauth = OAuth1Session(
        # "Consumer Keys" under "Keys and Tokens" in the developer console
        os.environ.get("X_CONSUMER_KEY"),
        client_secret=os.environ.get("X_CONSUMER_SECRET"),
        # "Access Token and Secret" under "Keys and Tokens" in the developer console
        resource_owner_key=os.environ.get("X_TOKEN"),
        resource_owner_secret=os.environ.get("X_SECRET"),
    )

    response = oauth.post(
        "https://api.twitter.com/2/tweets",
        json={"text": f"{post_content}\n{url}"},
    )

    if response.status_code != 201:
        print(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )

## Testing


In [42]:
article = articles[2]
article_page = fetch_article_page(article.link)

In [43]:
post_content = generate_post(article_page)
post_content
print(len(post_content))
print(post_content)

246
Fascinating article on the impact of AI on professional Go players' improvement 🎮✏️. Over 60% of the improvement in human moves was attributed to moves that deviated from the AI's suggestions, indicating increased human creativity. Check it out! 


In [44]:
post_twitter(post_content, article.link)