In [1]:
import requests
import xmltodict
from pydantic import BaseModel

In [2]:
class Article(BaseModel):
    title: str
    link: str

In [32]:
# Hacker News RSS
URL = "https://news.ycombinator.com/rss"


def find_articles() -> list[Article]:
    """Finds a top AI-related article from HackerNews"""
    rss_response = requests.get(URL)
    data = xmltodict.parse(rss_response.content)

    # Find all articles with these keywords
    keywords = {
        "ai",
        "genai",
        "lightning",
        "pytorch",
        "llm",
        "llms",
        "ml",
        "rag",
        "nlp",
        "openai",
        "gemma",
        "anthropic",
        "google",
        "apple",
        "microsoft"
        "meta"
        "ollama"
    }

    articles = []
    for item in data["rss"]["channel"]["item"]:
        title = item["title"].lower()
        link = item["link"]

        # Skip articles from Hacker News, Show HN, and job postings
        if (
            link.startswith("https://news.ycombinator.com")
            or title.startswith("Show HN")
            or "is hiring" in title
        ):
            continue

        # Check if any of the keywords are in the title
        if any(word in title.split(" ") for word in keywords):
            articles.append(Article(title=title, link=link))

    return articles

In [4]:
class ArticlePage(BaseModel):
    title: str
    description: str
    content: str

In [5]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

In [6]:
def fetch_article_page(url: str):
    """Fetches the article page"""
    options = Options()
    options.add_argument("--headless=new")
    driver = webdriver.Chrome(options=options)
    driver.get(url)

    # wait a few seconds for ajax content to load
    time.sleep(5)

    # grabs all the visible content of the page
    content = driver.find_element(by=By.CSS_SELECTOR, value="body").text

    # metadata
    head_html = driver.execute_script("return document.head.innerHTML;")
    head_soup = BeautifulSoup(head_html, "html.parser")

    driver.quit()

    # grab seo metadata for title and description
    title = head_soup.title.text

    desc_tag = head_soup.find("meta", attrs={"name": "description"})

    if desc_tag is None:
        # Fall back to open graph tags
        desc_tag = head_soup.find("meta", attrs={"name": "og:description"})

    description = ""

    if desc_tag is not None:
        description = desc_tag["content"]

    return ArticlePage(title=title, description=description, content=content)

In [7]:
import re
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [8]:
prompt_template = """
The content of an article is:

{content}

Write some commentary about about a key point of the article's contents and encourage the reader to check it out in a professinal tone for a Twitter post.
The entire post should be two sentences. Use a couple emojis too.
Respond with just the post, no additional commentary, no notes, no link.
"""

model = ChatOpenAI(
    base_url="https://api.together.xyz/v1", model="mistralai/Mixtral-8x7B-Instruct-v0.1"
)
prompt = PromptTemplate.from_template(prompt_template)
parser = StrOutputParser()

chain = prompt | model | parser


def generate_post(article: ArticlePage):
    max_content_length = 7000
    content = (
        len(article.content) > max_content_length
        and article.content[:max_content_length] + "..."
        or article.content
    )
    output = chain.invoke({"content": article.content})
    output = output.strip().strip('"')
    output = re.sub(r"#\w+", "", output)
    output = re.sub(r"\s+", " ", output)
    return output

In [9]:
import os
from requests_oauthlib import OAuth1Session

In [26]:
def post_twitter(post_content: str, url: str):
    """Posts the content to Twitter"""
    oauth = OAuth1Session(
        # "Consumer Keys" under "Keys and Tokens" in the developer console
        os.environ.get("X_CONSUMER_KEY"),
        client_secret=os.environ.get("X_CONSUMER_SECRET"),
        # "Access Token and Secret" under "Keys and Tokens" in the developer console
        resource_owner_key=os.environ.get("X_TOKEN"),
        resource_owner_secret=os.environ.get("X_SECRET"),
    )

    response = oauth.post(
        "https://api.twitter.com/2/tweets",
        json={"text": f"{post_content}\n{url}"},
    )

    if response.status_code != 201:
        print(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )

In [11]:
from linkedin_api.clients.restli.client import RestliClient

In [24]:
def post_linkedin(post_content: str, url: str):
    restli_client = RestliClient()
    linkedin_token = os.getenv("LINKEDIN_TOKEN")

    # Need to call this to get the current user's linked-in user id
    me_response = restli_client.get(resource_path="/me", access_token=linkedin_token)
    print(me_response.entity)
    user_uri = f"urn:li:person:{me_response.response}"
    # print(user_uri)

## Testing


In [33]:
articles = find_articles()
for article in articles:
    print(f"{article.title} - {article.link}")

intel gaudi 3 ai accelerator - https://www.intel.com/content/www/us/en/newsroom/news/vision-2024-gaudi-3-ai-accelerator.html


In [16]:
article = articles[0]
article_page = fetch_article_page(article.link)

In [17]:
post_content = generate_post(article_page)
post_content
print(len(post_content))
print(post_content)

355
Intel's new Gaudi 3 AI accelerator offers 4x AI compute for BF16 and 1.5x increase in memory bandwidth, providing a powerful solution for businesses seeking efficient, cost-effective, and more energy-efficient AI solutions. Check it out for versatile open community-based software, industry-standard Ethernet networking, and impressive performance gains. 


In [None]:
post_twitter(post_content, article.link)

In [30]:
url = "https://developers.googleblog.com/2024/04/gemini-15-pro-in-public-preview-with-new-features.html"

article_page = fetch_article_page(url)
post_content = generate_post(article_page)
print(len(f'{post_content}'))
print(post_content)

307
Exciting news for developers! Google's Gemini 1.5 Pro now available in 180+ countries with audio understanding capabilities & new system instructions. Check it out and unlock new use cases for your projects. 🤖🚀


In [31]:
post_twitter(post_content, url)