# Making a news processor with LLMs and vector search
## The problem: out of date training sets

In [1]:
import openai

openai.api_key = input("please provide you openai api key")

In [9]:
query = "What's been going on in mexicos alien hearings this week??"
response = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": query}
  ]
)
response.choices[0].message

<OpenAIObject at 0x10955d910> JSON: {
  "role": "assistant",
  "content": "I'm sorry, but as an AI, I don't have real-time access to current affairs or news. However, I suggest checking the latest news from reliable sources for any updates about the topic mentioned. You might find updates on websites of major news organizations, in their international affairs or technology section."
}

In [6]:
import gnews

articles = gnews.GNews(period='7d', max_results=100).get_top_news()

In [7]:
articles

[{'title': 'Hunter Biden indicted by special counsel on felony gun charges - ABC News',
  'description': "Hunter Biden indicted by special counsel on felony gun charges  ABC NewsHunter Biden indicted by federal prosecutors on gun charges  News 5 Cleveland WEWSRep. Raskin reacts to Hunter Biden's federal indictment  MSNBCHunter Biden indicted on federal gun charges  Fox NewsHunter Biden indicted on federal firearms charges in long-running probe weeks after plea deal failed  Yahoo NewsView Full Coverage on Google News",
  'published date': 'Thu, 14 Sep 2023 18:27:31 GMT',
  'url': 'https://news.google.com/rss/articles/CBMiZWh0dHBzOi8vYWJjbmV3cy5nby5jb20vVVMvaHVudGVyLWJpZGVuLWluZGljdGVkLXNwZWNpYWwtY291bnNlbC1mZWxvbnktZ3VuLWNoYXJnZXMvc3Rvcnk_aWQ9MTAzMTY4MzEy0gFpaHR0cHM6Ly9hYmNuZXdzLmdvLmNvbS9hbXAvVVMvaHVudGVyLWJpZGVuLWluZGljdGVkLXNwZWNpYWwtY291bnNlbC1mZWxvbnktZ3VuLWNoYXJnZXMvc3Rvcnk_aWQ9MTAzMTY4MzEy?oc=5&hl=en-US&gl=US&ceid=US:en',
  'publisher': {'href': 'https://abcnews.go.com', 'title':

In [23]:
from dataclasses import dataclass
import numpy as np
from typing import List, Dict

@dataclass
class EmbeddedNewsArticle:
    title: str
    description: str
    url: str
    embedding: np.ndarray

def generate_embedded_articles(gnews_response: List[Dict]):
    def embedded_article_factory(article: Dict):
        title = article["title"]
        description = article["description"]
        url = article["url"]
        chunk_to_embed = description # might want to do some more here
        embedding_list = openai.Embedding.create(
            model="text-embedding-ada-002",
            input=description
        )["data"][0]["embedding"]
        embedding = np.array(embedding_list)
        return EmbeddedNewsArticle(title=title, description=description, url=url, embedding=embedding)
    return list(map(embedded_article_factory, gnews_response)) # careful passing around a generator

embedded_articles = generate_embedded_articles(articles)


In [24]:
def rank(query: str, docs: EmbeddedNewsArticle, k: int = 10):
    embedded_query_list = openai.Embedding.create(
    model="text-embedding-ada-002",
    input=query
    )["data"][0]["embedding"]
    embedded_query = np.array(embedded_query_list)
    # naive ranking using ada
    scored_articles = map(lambda ea: (ea.embedding.dot(embedded_query), ea), docs)
    sorted_articles = sorted(list(scored_articles), reverse=True)
    return sorted_articles[:k]


rank(query, embedded_articles)

[(0.8025254021210677,
  EmbeddedNewsArticle(title='NASA names chief of UFO research; panel sees no alien evidence - Reuters', description="NASA names chief of UFO research; panel sees no alien evidence  ReutersNasa's UFO report: What we learned from UAP study  BBCAncient Aliens Live at the Moore Theatre tonight  FOX 13 SeattleNASA Names UFO Chief to Investigate Sightings of Mystery Craft  BloombergNASA says it will appoint secret UFO chief  Yahoo NewsView Full Coverage on Google News", url='https://news.google.com/rss/articles/CBMiYmh0dHBzOi8vd3d3LnJldXRlcnMuY29tL3NjaWVuY2UvbmFzYS1wYW5lbC1jYWxscy1hZ2VuY3ktcGxheS1sYXJnZXItcm9sZS1zdHVkeWluZy11Zm9zLTIwMjMtMDktMTQv0gEA?oc=5&hl=en-US&gl=US&ceid=US:en', embedding=array([ 0.01178558, -0.00706467,  0.00550884, ...,  0.00690441,
         -0.00302485, -0.00620997]))),
 (0.7681939306000528,
  EmbeddedNewsArticle(title="House Republicans push to condemn New Mexico gov for 'blatantly violating' Second Amendment - Fox News", description="House Repub