In [None]:
# !pip install feedparser
# !pip install -U weaviate-client[agents]
# !pip install langchain langchain-text-splitters langchain-community
# !pip install --upgrade huggingface-hub
# !pip install -U "langchain[huggingface]"
# !pip install --upgrade transformers

In [None]:
# os.environ["WEAVIATE_URL"] =
# os.environ["WEAVIATE_API_KEY"] =
# os.environ["HUGGINGFACEHUB_API_TOKEN"] =

## Set up Weaviate

In [None]:
import os
import weaviate
from weaviate.classes.config import Configure

In [None]:
def get_weaviate_client():
  weaviate_url = os.environ["WEAVIATE_URL"]
  weaviate_api_key = os.environ["WEAVIATE_API_KEY"]
  client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=weaviate_api_key
  )
  return client


In [None]:
# create collection
with get_weaviate_client() as client:
  if client.collections.exists("Article"):
    client.collections.delete("Article")

  articles_collection = client.collections.create(
      name="Article",
      vector_config=Configure.Vectors.text2vec_weaviate()
  )

## Data Pipeline - Download and Load Articles into Weaviate

In [None]:
import newspaper

In [None]:
site = newspaper.build('https://rss.app/feeds/t59b2yLZDBBihPaf.xml', memoize_articles=False)
articles = []
for article in site.articles:
    try:
        article.download()
        article.parse()
        if article.text:
            articles.append(article)
    except Exception as e:
        print(f"Could not download or parse article from {article.url}: {e}")

In [None]:
def article_to_object(article):
  return {
      "title": article.title,
      "author": article.authors,
      "publish_date": article.publish_date,
      "text": article.text,
      "section": article.meta_data.get("meta-section", ""),
  }

In [None]:
with get_weaviate_client() as client:
  articles_collection = client.collections.get("Article")
  with articles_collection.batch.fixed_size(batch_size=200) as batch:
    for article in articles:
      data = article_to_object(article)
      batch.add_object(properties=data)

## Query Database

In [None]:
with get_weaviate_client() as client:
  articles_collection = client.collections.get("Article")
  response = articles_collection.query.near_text(
      query="=Tottenham Hotspur",
      limit=1
  )

for obj in response.objects:
        print(obj.properties['text'])

If you purchase an independently reviewed product or service through a link on our website, Variety may receive an affiliate commission.

In one of the most fierce and heated rivalries in all of sports, Tottenham Hotspur takes on Arsenal in a battle of North London Derby. These two clubs have been at odds since the early 1910s when Arsenal moved from Woolwich in the south to Highbury the north at the Spurs doorstep — just four miles away. Ever since, Spurs and Arsenal have been rivals, as Arsenal has dominated matchups.

On Sunday, Feb. 22, you can watch Tottenham Hotspur vs. Arsenal at Tottenham Hotspur Stadium in London, England. Kickoff is at 11:30 a.m. ET/8:30 a.m. PT, while the Premiere League match broadcasts on USA Network.

Want to watch the match online? USA Network is a network available via Sling Blue.

Right now, Sling Blue goes for $54.99 per month, while the streaming package comes with USA Network for the Texas vs. Georgia game. Sling Blue also comes with ABC, NBC and FO

## Build the Retriever and Prompt Constructer


In [None]:
def retrieve_context(query: str, k: int = 1) -> list[str]:
  with get_weaviate_client() as client:
    articles_collection = client.collections.get("Article")
    response = articles_collection.query.near_text(
        query=query,
        limit=k
    )
    return [obj.properties['text'] for obj in response.objects]

In [None]:
def construct_prompt(input: str):
    from datetime import datetime, UTC
    today = datetime.now(UTC).strftime("%B %d, %Y")
    prompt = [
        {
            "role": "system",
            "content": f"""
              You are a question-answering assistant.

              Answer the question using the provided context documents.

              Rules:
              1. Base your answer only on the provided context.
              2. Today's date is {today}. Use this when reasoning about time.
              3. If the context includes future or past dates, reason about them relative to today's date.
              4. If multiple documents are provided, synthesize the information clearly.
              5. Cite the source document numbers when relevant (e.g., "Source 2").
              6. If the answer cannot be determined from the context, respond exactly with:
                I don't know.

              Do not fabricate information.
              Be concise and directly answer the question.
              """
        },
        {
            "role": "user",
            "content": f"""Context:
              {retrieve_context(input)}

              Question:
              {input}"""
        },
    ]
    return prompt

## Model

In [None]:
from huggingface_hub import InferenceClient

client = InferenceClient(
    model="mistralai/Mistral-7B-Instruct-v0.2",
    token=os.environ["HUGGINGFACEHUB_API_TOKEN"]
)

def chat(question: str) -> str:
  messages = construct_prompt(question)
  response = client.chat.completions.create(
      messages=messages,
      max_tokens=512,
      temperature=0.2,
  )
  return response.choices[0].message.content

In [None]:
chat("What is the latest news about Tottenham Hotspur?")

'The latest news about Tottenham Hotspur provided in the context is that they are playing against Arsenal in a **North London Derby** on **Sunday, February 22, 2026**, at their **Tottenham Hotspur Stadium** in London, England. The match is part of the **Premiere League** (likely referring to the English Premier League) and will be broadcast on **USA Network** at **11:30 a.m. ET / 8:30 a.m. PT**. You can watch it online via **Sling Blue** (Source: all context documents).'