# Making a news processor with LLMs and vector search
## The problem: out of date training sets

In [2]:
import openai

openai.api_key = input("please provide you openai api key")

In [3]:
query = "What's been going on in mexicos alien hearings this week??"
response = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": query}
  ]
)
response.choices[0].message

<OpenAIObject at 0x1325ae270> JSON: {
  "role": "assistant",
  "content": "I'm an AI and I don't have real-time capabilities, but as of the last update, no details have been shared regarding any alien hearings in Mexico. Please, check the latest news from reliable sources for the most accurate information."
}

In [4]:
import gnews

articles = gnews.GNews(period='7d', max_results=100).get_top_news()
articles

[{'title': 'Hunter Biden indicted by special counsel on felony gun charges - ABC News',
  'description': "Hunter Biden indicted by special counsel on felony gun charges  ABC NewsRep. Raskin reacts to Hunter Biden's federal indictment  MSNBCHunter Biden indicted on federal gun charges  Fox NewsHunter Biden indicted on federal firearms charges in long-running probe weeks after plea deal failed  Yahoo NewsBREAKING: Hunter Biden indicted on federal gun charges  MSNBCView Full Coverage on Google News",
  'published date': 'Thu, 14 Sep 2023 18:27:31 GMT',
  'url': 'https://news.google.com/rss/articles/CBMiZWh0dHBzOi8vYWJjbmV3cy5nby5jb20vVVMvaHVudGVyLWJpZGVuLWluZGljdGVkLXNwZWNpYWwtY291bnNlbC1mZWxvbnktZ3VuLWNoYXJnZXMvc3Rvcnk_aWQ9MTAzMTY4MzEy0gEA?oc=5&hl=en-US&gl=US&ceid=US:en',
  'publisher': {'href': 'https://abcnews.go.com', 'title': 'ABC News'}},
  'published date': 'Thu, 14 Sep 2023 17:53:00 GMT',
  'url': 'https://news.google.com/rss/articles/CBMiYGh0dHBzOi8vd3d3LmNubi5jb20vMjAyMy8wOS8xNC

In [5]:
from dataclasses import dataclass
import numpy as np
from typing import List, Dict

@dataclass
class EmbeddedNewsArticle:
    title: str
    description: str
    url: str
    embedding: np.ndarray

def generate_embedded_articles(gnews_response: List[Dict]):
    def embedded_article_factory(article: Dict):
        title = article["title"]
        description = article["description"]
        url = article["url"]
        chunk_to_embed = description # might want to do some more here
        embedding_list = openai.Embedding.create(
            model="text-embedding-ada-002",
            input=description
        )["data"][0]["embedding"]
        embedding = np.array(embedding_list)
        return EmbeddedNewsArticle(title=title, description=description, url=url, embedding=embedding)
    return list(map(embedded_article_factory, gnews_response)) # careful passing around a generator

embedded_articles = generate_embedded_articles(articles)


In [11]:
def rank(query: str, docs: EmbeddedNewsArticle, k: int = 10):
    embedded_query_list = openai.Embedding.create(
        model="text-embedding-ada-002",
        input=query
    )["data"][0]["embedding"]
    embedded_query = np.array(embedded_query_list)
    # naive ranking using ada
    scored_articles = map(lambda ea: (ea.embedding.dot(embedded_query), ea), docs)
    sorted_articles = sorted(list(scored_articles), reverse=True)
    return list(map(lambda  record: record[1], sorted_articles[:k]))


search_results  = rank(query, embedded_articles, 3)

In [12]:
def to_prompt(articles: List[EmbeddedNewsArticle]):
    descriptions = map(lambda a: a.description, articles)
    return "\n".join(descriptions)

response = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
    {"role": "system", "content": "Help me understand these news articles based on my question"},
    {"role": "user", "content": to_prompt(search_results)},
    {"role": "user", "content": query}
  ]
)
response.choices[0].message

<OpenAIObject at 0x137430230> JSON: {
  "role": "assistant",
  "content": "The information provided does not contain details about any alien hearings in Mexico this week. However, it does note that NASA has announced the findings of a study on Unidentified Aerial Phenomena (UAP), commonly known as UFOs, and plans to appoint a chief for ongoing UFO research. Currently, no evidence of aliens has been found. This is based on the report's headlines from various sources such as Reuters, BBC, LiveNOW from FOX, and Yahoo News."
}