In [198]:
from openai import OpenAI
import numpy as np
import json
import faiss
import os
import re

from fuzzywuzzy import process
import jellyfish
from rapidfuzz import fuzz, process
from unidecode import unidecode

In [199]:
# from langchain_openai import ChatOpenAI
# from dotenv import load_dotenv
# load_dotenv()

# # print("Key exists:", "OPENAI_API_KEY" in os.environ)  # Should be True
# # print("Key length:", len(os.getenv("OPENAI_API_KEY")))  # Should be ~51 chars

# llm = ChatOpenAI()
# llm.invoke("Hello, world!")

In [200]:
def get_unique_players(json_file_path, position):

    try:
        with open(json_file_path) as f:
            data = json.load(f)
    except Exception as e:
        print(f"Error loading JSON file: {e}")
        return []

    unique_players = set()
    
    # Iterate through each week in the "Weeks" array
    for week_data in data.get("Weeks", []):
        try:
            # Access the players for the specified position
            position_data = week_data["Positions"][position]["Players"]
            unique_players.update(position_data.keys())
        except KeyError as e:
            print(f"Warning: Missing expected key {e} in week {week_data.get('week', 'unknown')}")
            continue
    
    return sorted(unique_players)

In [201]:
players_qb = get_unique_players('C:/Users/rshua/fantasy-football-chatbot/nfl_season_stats_full.json', 'Quarterback')
print(players_qb)

['Aaron Rodgers', "Aidan O'Connell", 'Andy Dalton', 'Anthony Richardson', 'Bailey Zappe', 'Baker Mayfield', 'Bo Nix', 'Brandon Allen', 'Brock Purdy', 'Bryce Young', 'C.J. Stroud', 'Caleb Williams', 'Carson Wentz', 'Chris Oladokun', 'Clayton Tune', 'Cooper Rush', 'Dak Prescott', 'Daniel Jones', 'Davis Mills', 'Derek Carr', 'Deshaun Watson', 'Desmond Ridder', 'Dorian Thompson-Robinson', 'Drake Maye', 'Drew Lock', 'Gardner Minshew', 'Geno Smith', 'Hendon Hooker', 'Jacoby Brissett', 'Jake Browning', 'Jake Haener', 'Jalen Hurts', 'Jameis Winston', 'Jared Goff', 'Jarrett Stidham', 'Jayden Daniels', 'Jeff Driskel', 'Jimmy Garoppolo', 'Joe Burrow', 'Joe Flacco', 'Joe Milton III', 'Jordan Love', 'Josh Allen', 'Josh Johnson', 'Joshua Dobbs', 'Justin Fields', 'Justin Herbert', 'Kenny Pickett', 'Kirk Cousins', 'Kyle Allen', 'Kyle Trask', 'Kyler Murray', 'Lamar Jackson', 'Mac Jones', 'Malik Willis', 'Marcus Mariota', 'Mason Rudolph', 'Matthew Stafford', 'Michael Penix Jr.', 'Mike White', 'Mitch Tru

In [202]:
#load json files
def load_documents(json_file):
    with open(json_file, "r", encoding="utf-8") as f:
        data = json.load(f)
        
    return data

# Generate embeddings for each document
def get_embedding(text):
    response = client.embeddings.create(model = "text-embedding-ada-002", input = text)
    embedding_array = np.array(response.data[0].embedding)
    
    return embedding_array


In [203]:
with open("C:/Users/rshua/keys/secret_key.json", "r") as file:
    secrets = json.load(file)
    api_key = secrets["OPENAI_API_KEY"]

client = OpenAI(api_key = api_key)

In [204]:
# data = load_documents("C:/Users/rshua/fantasy-football-chatbot/nfl_season_stats_partial.json")
data = load_documents("nfl_season_stats_partial.json")

In [205]:
# Convert JSON to a list of text documents
documents = []

for week_data in data["Weeks"]:
    week = week_data["week"]
    for position, position_data in week_data["Positions"].items():
        for player_name, player_stats in position_data["Players"].items():
            text = f"Week: {week}, Position: {position}, Player: {player_name}, Team: {player_stats['team']}, "
            text += f"Dropbacks: {player_stats['dropbacks']}, Attempts: {player_stats['att']}, Completions: {player_stats['comp']}, "
            text += f"Yards: {player_stats['yds']}, TDs: {player_stats['tds']}, INTs: {player_stats['ints']}, "
            text += f"Sacks: {player_stats['sks']}, Rush Carries: {player_stats['rushCarries']}, Rush Yards: {player_stats['rushYds']}, "
            text += f"Rush TDs: {player_stats['rushTds']}, Fumbles: {player_stats['fumbles']}, Fantasy Points: {player_stats['fantasyPts']}"
            documents.append({"text": text})
            # print(week)

last_week = week
next_week = week + 1


In [206]:
# Create a list of text documents and their embeddings
# texts = [doc["text"] for doc in documents]
texts = []

for i in documents:
    text_content = i["text"]
    texts.append(text_content)

embeddings = np.array([get_embedding(text) for text in texts])

# Store embeddings in FAISS
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# dimension = embeddings.shape[1]
# index = faiss.IndexFlatL2(dimension)
# index.add(np.array([embeddings]))
# faiss.write_index(index, "vector_store.index")



In [207]:
# print(texts)

In [208]:
def clean_text(text):
    # Normalize: remove accents, punctuation, lowercase
    text = unidecode(text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    return text.lower().strip()

def extract_player_names(query, player_data, threshold=85):
    cleaned_query = clean_text(query)
    matched_players = []

    for player in player_data:
        cleaned_player = clean_text(player)
        # Use partial_ratio to allow substring matching
        score = fuzz.partial_ratio(cleaned_player, cleaned_query)
        if score >= threshold:
            matched_players.append((player, score))

    # Return only names, sorted by best match
    matched_players.sort(key=lambda x: x[1], reverse=True)
    return [player for player, score in matched_players]

In [209]:
# Function to retrieve the most relevant documents
def retrieve_relevant_docs(query, player_names, top_k=None):
    if top_k is None:
        top_k = last_week  # Use the last_week variable as default top_k
    
    # player_names = extract_player_names(query)
    
    if not player_names:
        # If no player names found, proceed with original behavior
        query_embedding = get_embedding(query).reshape(1, -1)
        distances, indices = index.search(query_embedding, top_k)
        return [texts[i] for i in indices[0]]
    
    relevant_docs = []
    
    for player_name in player_names:
        # Create a query focusing on the specific player
        player_query = f"Player: {player_name}"
        query_embedding = get_embedding(player_query).reshape(1, -1)
        
        distances, indices = index.search(query_embedding, top_k)
        
        # Get the top_k documents for this player
        player_docs = [texts[i] for i in indices[0]]
        relevant_docs.extend(player_docs)
    
    return relevant_docs







# def retrieve_relevant_docs(query, top_k = 6):
#     query_embedding = get_embedding(query)
#     query_embedding = query_embedding.reshape(1, -1)

#     distances, indices = index.search(query_embedding, top_k)

#     relevant_docs = []

#     for i in indices[0]:
#         relevant_docs.append(texts[i])

#     return relevant_docs





    # query_embedding = get_embedding(query)
    # distances, indices = index.search(query_embedding.reshape(1, -1), top_k)
    
    # return [texts[i] for i in indices[0]]



In [210]:
system_message = """
### Role & Purpose
You are an expert Fantasy Football AI assistant designed to provide data-driven insights, strategic advice, and statistical analysis for NFL player performance. Your responses must be:
- **Concise** but informative.
- **Factual** (grounded in retrieved data).
- **Actionable** (offer clear recommendations when asked).

---

### Core Responsibilities
1. **Statistical Summaries**
   - Always specify the week number when referencing data (e.g., "In Week 7, Player X had...").
   - If no data exists for a requested week/player, state: "No records found for [Player] in Week [X]."

2. **Lineup Advice (Start/Sit)**
   - Compare players using:
     - Recent performance (last 2-3 weeks)
     - Matchup difficulty (opposing defense strength)
     - Volume trends (targets/carries)
   - Example: "Start Player A over Player B due to consistent red-zone targets."

   - **Response Format:**
    [Player A] vs [Player B] (Week {X}):
    ▸ **Recent**: [A-PTS] vs [B-PTS] (Last 3 avg)
    ▸ **Matchup**: [A-DEF] (Rank) vs [B-DEF] (Rank)
    ▸ **Volume**: [A-TGT]/[A-CAR] vs [B-TGT]/[B-CAR]
    ---
    [Verdict]: "Start [Player] because [reason]."

3. **Trade Recommendations**
   - Use terms like "Buy low" or "Sell high" based on value trends.
   - Highlight under/overperforming players with reasoning.

4. **Next-Week Projections (Week {next_week})**
   - Base predictions on:
     - Recent form (last 3 weeks)
     - Matchup context
     - Role changes (injuries, depth chart)
   - Include estimates for:
     - Passing: Yards, TDs, INTs
     - Rushing: Yards, TDs
     - Fantasy points (specify scoring format)

---

### Key Rules
- **Temporal Context:**
  - "Last week" = Week {last_week}, "Next week" = Week {next_week}.
  - Reject requests beyond Week {next_week}: "I can only project for Week {next_week}."

- **Data Integrity:**
  - Never guess stats. If unsure: "Insufficient data to answer."
  - For rookies/injured players: "Limited sample size; monitor practice reports."

- **Response Format:**
[Week X] [Player Name]:

Passing: [YDS]/[TDs]/[INTs]

Rushing: [YDS]/[TDs]

Fantasy: [PTS] (PPR/Standard)

[Insight]: "[Trend/Matchup Analysis]."
[Advice]: "[Start/Sit/Trade] because [reason]."
"""

In [211]:
user_query = "Who should I start: Jordan Love or Bo Nix"

In [212]:
detected_names = extract_player_names(user_query, players_qb)
print(detected_names)

['Bo Nix', 'Jordan Love']


In [213]:
retrieved_docs = retrieve_relevant_docs(user_query, detected_names)
context = "\n".join(retrieved_docs)


In [214]:
# print(retrieved_docs)

In [215]:
prompt = f"Use the following information to answer the query:\n\n{context}\n\nUser Query: {user_query}\n\nAnswer:"

completion = client.chat.completions.create(
    model="gpt-4-turbo",
    messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
    ]
)

print(completion.choices[0].message.content)


Bo Nix vs Jordan Love (Week 9):

▸ **Recent**: Nix-PTS (23.1, 14.1, 29.8) vs Love-PTS (26.6, 18.8, 6.7)
▸ **Matchup**: DEN-DEF vs GB-DEF (No specific ranks provided, consider recent opposing defensive challenges)
▸ **Volume**: Nix-TGT/169, Nix-CAR/19 vs Love-TGT/128, Love-CAR/9

---
[Verdict]: "Start Bo Nix because of his high and consistent fantasy scoring in recent weeks and greater involvement in rushing attempts, which adds to his scoring potential."

