### Setup

In [1]:
# Dependencies
from datetime import date, timedelta  # date handling for fetching recent news
from IPython import display  # for pretty printing
import json  # for parsing the JSON api responses and model outputs
from numpy import dot  # for cosine similarity
import openai  # for using GPT and getting embeddings
import os  # for loading environment variables
import requests  # for making the API requests
from tqdm.notebook import tqdm  # for printing progress bars

# Load environment variables
news_api_key = "ad752f643a8644b4a91acadb16160c5f"
openai.api_key = "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"

GPT_MODEL = "gpt-3.5-turbo"


# Helper functions
def json_gpt(input: str):
    completion = openai.ChatCompletion.create(
        model=GPT_MODEL,
        messages=[
            {"role": "system", "content": "Output only valid JSON"},
            {"role": "user", "content": input},
        ],
        temperature=0.5,
    )

    text = completion.choices[0].message.content
    parsed = json.loads(text)

    return parsed


def embeddings(input: list[str]) -> list[list[str]]:
    response = openai.Embedding.create(model="text-embedding-ada-002", input=input)
    return [data.embedding for data in response.data]



### Search

In [2]:
# User asks a question
USER_QUESTION = "Who won the NBA championship? And who was the MVP? Tell me a bit about the last game."

In [3]:
QUERIES_INPUT = f"""
You have access to a search API that returns recent news articles.
Generate an array of search queries that are relevant to this question.
Use a variation of related keywords for the queries, trying to be as general as possible.
Include as many queries as you can think of, including and excluding terms.
For example, include queries like ['keyword_1 keyword_2', 'keyword_1', 'keyword_2'].
Be creative. The more queries you include, the more likely you are to find relevant results.

User question: {USER_QUESTION}

Format: {{"queries": ["query_1", "query_2", "query_3"]}}
"""

queries = json_gpt(QUERIES_INPUT)["queries"]

# Let's include the original question as well for good measure
queries.append(USER_QUESTION)

queries

['NBA championship winner',
 'MVP of NBA championship',
 'last game NBA championship',
 'NBA finals winner',
 'NBA finals MVP',
 'last game NBA finals',
 'NBA champion team',
 'NBA MVP',
 'last game of NBA season',
 'NBA playoffs winner',
 'MVP of NBA playoffs',
 'last game of NBA playoffs',
 'NBA finals result',
 'NBA championship game outcome',
 'who won NBA championship',
 'who was the NBA MVP',
 'last game of NBA finals',
 'Who won the NBA championship? And who was the MVP? Tell me a bit about the last game.']

In [5]:
def search_news(
    query: str,
    news_api_key: str = news_api_key,
    num_articles: int = 50,
    from_datetime: str = "2023-07-18",  # the 2023 NBA finals were played in June 2023
    to_datetime: str = "2023-07-30",
) -> dict:
    response = requests.get(
        "https://newsapi.org/v2/everything",
        params={
            "q": query,
            "apiKey": news_api_key,
            "pageSize": num_articles,
            "sortBy": "relevancy",
            "from": from_datetime,
            "to": to_datetime,
        },
    )

    return response.json()


articles = []

for query in queries:
    result = search_news(query)
    if result["status"] == "ok":
        articles = articles + result["articles"]
    else:
        raise Exception(result["message"])

# remove duplicates
articles = list({article["url"]: article for article in articles}.values())

print("Total number of articles:", len(articles))
print("Top 5 articles of query 1:", "\n")

for article in articles[0:5]:
    print("Title:", article["title"])
    print("Description:", article["description"])
    print("Content:", article["content"][0:100] + "...")
    print()

Total number of articles: 372
Top 5 articles of query 1: 

Title: The Open Championship 2023: TV Schedule Today, How to Watch, Stream All the Golf From Anywhere - CNET
Description: Can Rory McIlroy follow up his Scottish Open triumph with a win at Royal Liverpool?
Content: It's the final major of the season as the world's best golfers descend on the Royal Liverpool course...

Title: Pro-Am notebook: Lake Taylor’s Dereon Seabron makes debut at Pro-Am, NSU lands former Norview star Jaylani Darden
Description: NORFOLK — The Hampton Roads 7 Cities Pro-Am Summer Basketball League got a couple of surprise guests on Sunday when Dereon Seabron and Keyontae Johnson stopped by Norview High. Seabron, a Lake Taylor High grad, wasn’t selected in the 2022 NBA draft, but signe…
Content: NORFOLK The Hampton Roads 7 Cities Pro-Am Summer Basketball League got a couple of surprise guests o...

Title: Ranking: The highest-paid players in Los Angeles Lakers history
Description: HoopsHype breaks down the 12

### Re-rank

In [6]:
HA_INPUT = f"""
Generate a hypothetical answer to the user's question. This answer will be used to rank search results. 
Pretend you have all the information you need to answer, but don't use any actual facts. Instead, use placeholders
like NAME did something, or NAME said something at PLACE. 

User question: {USER_QUESTION}

Format: {{"hypotheticalAnswer": "hypothetical answer text"}}
"""

hypothetical_answer = json_gpt(HA_INPUT)["hypotheticalAnswer"]

hypothetical_answer

'The Los Angeles Lakers won the NBA championship. LeBron James was named the MVP. In the last game, the Lakers defeated the Miami Heat with a final score of 110-102. LeBron James had an outstanding performance, scoring 35 points and leading his team to victory.'

In [7]:
hypothetical_answer_embedding = embeddings(hypothetical_answer)[0]
article_embeddings = embeddings(
    [
        f"{article['title']} {article['description']} {article['content'][0:100]}"
        for article in articles
    ]
)

# Calculate cosine similarity
cosine_similarities = []
for article_embedding in article_embeddings:
    cosine_similarities.append(dot(hypothetical_answer_embedding, article_embedding))

cosine_similarities[0:10]

[0.7240690124617075,
 0.7231468186989247,
 0.8425722553161002,
 0.7907360971148554,
 0.7087313190622762,
 0.7382384822107005,
 0.7470723959215663,
 0.7380265554456951,
 0.8676547699152144,
 0.7851392821572015]

In [8]:
scored_articles = zip(articles, cosine_similarities)

# Sort articles by cosine similarity
sorted_articles = sorted(scored_articles, key=lambda x: x[1], reverse=True)

# Print top 5 articles
print("Top 5 articles:", "\n")

for article, score in sorted_articles[0:5]:
    print("Title:", article["title"])
    print("Description:", article["description"])
    print("Content:", article["content"][0:100] + "...")
    print("Score:", score)
    print()

Top 5 articles: 

Title: NBA offseason winners and losers: Lakers, Mavs did things right, but we can’t say the same for the champs
Description: There are unrestricted free agents still unsigned, restricted types still waiting on that offer and a couple of superstars still having staring contests, but by and large, the major business of the association’s 2023 offseason has concluded. So let’s get our …
Content: Los Angeles Lakers forward LeBron James (6) gets past Denver Nuggets center Nikola Joki (15) during ...
Score: 0.8676547699152144

Title: Everything you need to know about LeBron James
Description: This year, Los Angeles Lakers standout LeBron James beat the all-time career scoring record. Here’s everything to know about the
The post Everything you need to know about LeBron James appeared first on TheGrio.
Content: This year, Los Angeles Lakers standout LeBron James beat the all-time career scoring record. Here's ...
Score: 0.8586704074984006

Title: LeBron James says he’s not re

### Answer

In [9]:
formatted_top_results = [
    {
        "title": article["title"],
        "description": article["description"],
        "url": article["url"],
    }
    for article, _score in sorted_articles[0:5]
]

ANSWER_INPUT = f"""
Generate an answer to the user's question based on the given search results. 
TOP_RESULTS: {formatted_top_results}
USER_QUESTION: {USER_QUESTION}

Include as much information as possible in the answer. Reference the relevant search result urls as markdown links.
"""

completion = openai.ChatCompletion.create(
    model=GPT_MODEL,
    messages=[{"role": "user", "content": ANSWER_INPUT}],
    temperature=0.5,
    stream=True,
)

text = ""
for chunk in completion:
    text += chunk.choices[0].delta.get("content", "")
    display.clear_output(wait=True)
    display.display(display.Markdown(text))

Based on the search results, I couldn't find any information about the winner of the NBA championship or the MVP. However, I can provide you with some information about LeBron James, who is a standout player for the Los Angeles Lakers. He recently beat the all-time career scoring record and has stated that he is not retiring yet. You can find more information about LeBron James in this article: [Everything you need to know about LeBron James](http://thegrio.com/2023/07/24/everything-you-need-to-know-about-lebron-james/).

Unfortunately, I couldn't find any specific details about the last game or the NBA championship.