In [1]:
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

In [17]:
from tavily import TavilyClient

def tavily_search_context(query: str, max_results: int=5) -> str:
    resp = tavily.search(query, max_results=max_results)

    if "results" not in resp:
        return "No web results found."
    
    snippets = []
    for r in resp["results"]:
        title = r.get("title", "Untitled")
        snippet = r.get("snippet", "")
        url = r.get("url", "")
        snippets.append(f"- {title}: {snippet} (Source: {url})")
    return "\n".join(snippets)

def choose_index(query, client, index_descriptions):
    system_prompt = "You are a router that decides which knowledge base to use for a query. Only respond with the index name from the list below:\n\n" + "\n".join([f"{k}: {v}" for k, v in index_descriptions.items()])

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Query: {query}"}
        ],
        temperature=0
    )

    choice = response.choices[0].message.content.strip()

    if choice not in index_descriptions:
        choice = "weekly-stats"
    return choice

def retrieve_context(query, embedder, indices, index_descriptions, client, top_k=5, research_mode=False):
    if research_mode:
        query_embedding = embedder.encode(query).tolist()
        results = []
        for name, idx in indices.items():
            res = idx.query(vector=query_embedding, top_k=top_k, include_metadata=True)
            for match in res['matches']:
                match['source'] = name
                results.append(match)
        return sorted(results, key=lambda x: x['score'], reverse=True)[:top_k]
    
    else:
        index_name = choose_index(query, client, index_descriptions)
        query_embedding = embedder.encode(query).tolist()
        res = indices[index_name].query(vector=query_embedding, top_k=top_k, include_metadata=True)

        results = []
        for match in res['matches']:
            match['source'] = index_name
            results.append(match)
        return results
    

def build_context(results):
    context = "\n\n".join([
        f"Source: {r['source']}\nText: {r['metadata']['text']}" for r in results
    ])
    return context

def answer_question(query, embedder, indices, index_descriptions, client, top_k=5, research_mode=False, score_threshold=0.5):
    results = retrieve_context(query, embedder, indices, index_descriptions, client, top_k=top_k)
    if not results or all(r["score"] < score_threshold for r in results):
        context = tavily_search_context(query)
        source = "web (Tavily)"
    else:
        context = build_context(results)
        source = f"index ({results[0]["source"]})"

    if research_mode:
        prompt = "You are an NFL research assistant. Retrieve and summarize the most relevant information. Always cite snippets from context."
    else:
        prompt = "You are an NFL Q&A assistant. Answer clearly and concisely using the provided context."

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content":prompt},
            {"role": "user", "content":f"Question: {query}\n\nContext:\n{context}"}
        ],
        temperature=0.2
    )

    return response.choices[0].message.content, results, source

In [19]:
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

embedder = SentenceTransformer("all-MiniLM-L6-v2")
client = OpenAI(api_key=OPENAI_API_KEY)
pc = Pinecone(api_key=PINECONE_API_KEY)

indices = {
    "weekly_stats": pc.Index("weekly-stats"),
    "play_by_play": pc.Index("pbp"),
    "profiles": pc.Index("player-profiles"),
    "injuries": pc.Index("injuries"),
    "schedules": pc.Index("schedules")
}

index_descriptions = {
    "profiles": "Player biographies, colleges, draft info, positions, and general background.",
    "weekly_stats": "Player weekly performance stats (yards, touchdowns, completions, etc.).",
    "injuries": "Injury reports, player status, and health updates.",
    "schedules": "Game schedules, opponents, outcomes, and dates.",
    "play_by_play": "Detailed play-by-play data including drives, quarters, and play outcomes."
}

question = "Who did the Bills play in the AFC championship?"

answer, refs, _ = answer_question(
    question,
    embedder,
    indices,
    index_descriptions,
    client,
    top_k=8,
    research_mode=False
)

print("Answer:", answer)
print("\nReferences:")
for r in refs:
    print(f"Index: {r['source']} | Score: {r['score']:.4f}")
    print(f"Snippet: {r['metadata']['text'][:150]}...\n")

Answer: The Buffalo Bills played against the Kansas City Chiefs in the AFC Championship Game.

References:
Index: schedules | Score: 0.4185
Snippet: game_id: 2019_11_CIN_OAK | season: 2019 | game_type: REG | week: 11 | gameday: 2019-11-17 | weekday: Sunday | gametime: 16:25 | away_team: CIN | away_...

Index: schedules | Score: 0.4097
Snippet: game_id: 2006_17_STL_MIN | season: 2006 | game_type: REG | week: 17 | gameday: 2006-12-31 | weekday: Sunday | gametime: 13:00 | away_team: STL | away_...

Index: schedules | Score: 0.4087
Snippet: game_id: 2019_04_OAK_IND | season: 2019 | game_type: REG | week: 4 | gameday: 2019-09-29 | weekday: Sunday | gametime: 13:00 | away_team: OAK | away_s...

Index: schedules | Score: 0.4085
Snippet: game_id: 2018_02_LAC_BUF | season: 2018 | game_type: REG | week: 2 | gameday: 2018-09-16 | weekday: Sunday | gametime: 13:00 | away_team: LAC | away_s...

Index: schedules | Score: 0.4067
Snippet: game_id: 2019_04_TEN_ATL | season: 2019 | game_type: REG | week

In [12]:
from tavily import TavilyClient

tavily = TavilyClient(api_key=TAVILY_API_KEY)

In [13]:
resp = tavily.search("Who did the Bills play week 17 2024")
resp["results"]

[{'url': 'https://www.buffalobills.com/video/bills-vs-jets-game-highlights-week-17',
  'title': 'Bills vs. Jets game highlights | Week 17',
  'content': 'Watch highlights from the Week 17 matchup between the New York Jets and the Buffalo Bills during the 2024 NFL season.',
  'score': 0.8415267,
  'raw_content': None},
 {'url': 'https://www.newyorkjets.com/video/jets-vs-bills-game-highlights-week-17-2024',
  'title': 'Full Game Highlights | Jets at Bills | Week 17',
  'content': 'Full Game Highlights | Jets at Bills | Week 17. Dec 29, 2024. Watch highlights from the Week 17 game between the Jets and Bills. NOW PLAYING.',
  'score': 0.82425016,
  'raw_content': None},
 {'url': 'https://www.nfl.com/videos/jets-vs-bills-highlights-week-17-x0252',
  'title': 'New York Jets vs. Buffalo Bills highlights | Week 17',
  'content': 'Watch highlights from the Week 17 matchup between the New York Jets and the Buffalo Bills during the 2024 NFL season.',
  'score': 0.80844593,
  'raw_content': None},