In [1]:
from langchain_community.document_loaders import JSONLoader

# oyunc datalarƒ±
player_loader = JSONLoader(
    file_path='../data/nba_fantasy_players.json',
    jq_schema='.[]',
    text_content=False
)
player_data = player_loader.load()

# takƒ±m datalarƒ±
team_loader = JSONLoader(
    file_path='../data/nba_league_summary.json',
    jq_schema='.[]',
    text_content=False
)
team_data = team_loader.load()

# tek liste haline getirme
data = player_data + team_data

In [2]:
print(f'toplam veri sayƒ±sƒ±: {len(data)}, oyuncu veri sayƒ±sƒ±: {len(player_data)}, takƒ±m veri sayƒ±sƒ±: {len(team_data)}')

toplam veri sayƒ±sƒ±: 452, oyuncu veri sayƒ±sƒ±: 436, takƒ±m veri sayƒ±sƒ±: 16


In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=0
)

docs = text_splitter.split_documents(data)

In [4]:
print(f"Number of documents after chunking: {len(docs)}")

Number of documents after chunking: 452


In [5]:
docs[12]

Document(metadata={'source': 'C:\\Users\\kaann\\Projects\\nba_fantasy_chatbot\\data\\nba_fantasy_players.json', 'seq_num': 13}, page_content='{"player_id": "6707", "name": "Tari Eason", "current_team": "Haramball", "position": "F/PF/SF", "AVG_PTS": 11.5, "AVG_REB": 5.1, "AVG_AST": 1.5, "AVG_ST": 1.1, "AVG_BLK": 0.6, "AVG_3PTM": 2.2, "AVG_TO": 1.5, "FG%": 0.507, "FT%": 0.6, "TOTAL_PTS": 184, "TOTAL_REB": 82, "TOTAL_AST": 24, "TOTAL_ST": 17, "TOTAL_BLK": 10, "TOTAL_3PTM": 35, "TOTAL_TO": 24, "FGM/A": "70/138", "FTM/A": "9/15"}')

In [6]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
load_dotenv("../.env") 

True

In [8]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]

[-0.023955047130584717,
 0.011876456439495087,
 -0.003361367853358388,
 -0.058413900434970856,
 0.0015592977870255709]

In [9]:
from langchain_chroma import Chroma

In [10]:
import time
from langchain_chroma import Chroma

vector_store = Chroma(
    embedding_function=embeddings,
    persist_directory="../nba_fantasy_db"
)

# 2. √áok daha g√ºvenli bir tempo ile y√ºkleme (Batch: 10, Bekleme: 15sn)
batch_size = 10 
print(f"üöÄ Toplam {len(docs)} d√∂k√ºman √ßok g√ºvenli modda y√ºkleniyor...")

for i in range(0, 10, batch_size):
    batch = docs[i : i + batch_size]
    try:
        vector_store.add_documents(documents=batch)
        print(f"‚úÖ {i + len(batch)} / {len(docs)} tamamlandƒ±...")
        
        # √úcretsiz plan dakikada 15 istek sƒ±nƒ±rƒ± olabilir. 
        # 10'arlƒ± gruplar ve 15 saniye bekleme ile dakikada 40 d√∂k√ºman i≈üleriz.
        time.sleep(15) 
        
    except Exception as e:
        if "429" in str(e):
            print(f"‚ö†Ô∏è Kota doldu! 60 saniye dinleniyoruz...")
            time.sleep(60) # 429 hatasƒ±nda 1 tam dakika bekle
            vector_store.add_documents(documents=batch) # Tekrar dene
            print(f"‚úÖ Tekrar deneme ba≈üarƒ±lƒ±: {i + len(batch)} / {len(docs)}")
        else:
            print(f"‚ùå Beklenmedik hata: {e}")

print("\n‚ú® T√úM VERƒ∞LER BA≈ûARIYLA Y√úKLENDƒ∞!")

üöÄ Toplam 452 d√∂k√ºman √ßok g√ºvenli modda y√ºkleniyor...
‚úÖ 10 / 452 tamamlandƒ±...

‚ú® T√úM VERƒ∞LER BA≈ûARIYLA Y√úKLENDƒ∞!


In [11]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":10})

In [12]:
retrieved_docs = retriever.invoke("Haramball takƒ±mƒ±nƒ±n oyuncularƒ± kimler?")

In [13]:
len(retrieved_docs)

10

In [14]:
print(retrieved_docs[9].page_content)

{"player_id": "10468", "name": "Cooper Flagg", "current_team": "Haramball", "position": "F/G/PG/SF/SG", "AVG_PTS": 19.4, "AVG_REB": 6.4, "AVG_AST": 4, "AVG_ST": 1.2, "AVG_BLK": 0.8, "AVG_3PTM": 1, "AVG_TO": 2.2, "FG%": 0.486, "FT%": 0.805, "TOTAL_PTS": 640, "TOTAL_REB": 211, "TOTAL_AST": 132, "TOTAL_ST": 40, "TOTAL_BLK": 27, "TOTAL_3PTM": 32, "TOTAL_TO": 74, "FGM/A": "244/502", "FTM/A": "120/149"}


In [15]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash-lite", temperature=0.3,max_tokens=500)

In [16]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [18]:
from langchain_core.output_parsers import StrOutputParser
intent_system_prompt = """Analyze the question and return ONLY ONE word: 
TRADE, STATS, GREETING, or GENERAL.
Question: {query}"""
intent_prompt = ChatPromptTemplate.from_template(intent_system_prompt)
intent_chain = intent_prompt | llm | StrOutputParser()

In [19]:
stats_prompt_str = """You are an NBA Data Analyst. Your goal is to provide precise statistical rankings.

Follow these steps:
1. Extract all players and their relevant numeric values (e.g., AVG_PTS, TOTAL_REB) from the provided Context.
2. Convert these text-based numbers into a mental list and SORT them numerically (Descending/Ascending as requested).
3. If the user asks for "top" or "highest", provide the top results based on your sorted list.
4. Always cite the exact numbers for each player mentioned.
5. If the data for a specific player is not in the context, state that you don't have that information.

Context:
{context}

Question: {input}"""

In [20]:
trade_prompt_str = """You are a professional NBA Fantasy Trade Consultant. 
Use the provided Context to analyze trades and team needs.

CORE INSTRUCTIONS:
1. IF TWO PLAYERS ARE PROVIDED: Compare their statistics (AVG_PTS, AVG_REB, AVG_AST, AVG_ST, AVG_BLK, FG_PCT, etc.). Analyze who wins the trade based on which categories they improve. Say 'Accept' or 'Decline' at the end.
2. IF USER ASKS FOR A 'FAIR TRADE': Look through the Context for players who have similar statistical profiles (e.g., similar AVG_PTS and similar roles). Suggest 2-3 names that would be a fair swap based on their overall contribution.
3. IF USER WANTS TO IMPROVE A SPECIFIC STAT (e.g., "I need more blocks"): 
   - Identify players in the Context who have high values in that specific category (e.g., high AVG_BLK).
   - Suggest a strategic swap: "Trade a player with high AVG_AST for a player with high AVG_BLK if you need defensive stats."

CONSTRAINTS:
- Use ONLY the provided Context data. No external NBA knowledge.
- If you don't have enough players in the Context to make a suggestion, say so.
- Be concise and strategic.

Context:
{context}

Question: {input}"""

In [21]:
general_prompt_str = "You are a professional NBA Fantasy expert.\nContext:\n{context}"

In [23]:

query = input("Sorgunuzu yazƒ±n: ")

if query:

    intent = intent_chain.invoke({"query": query}).strip().upper()
    print(f"--- Belirlenen Niyet: {intent} ---")
    
    if "GREETING" in intent:
        print("Merhaba! Ben NBA Fantasy asistanƒ±yƒ±m. ƒ∞statistik veya takas sorabilirsin.")
    else:
        
        if "TRADE" in intent:
            sys_prompt = trade_prompt_str
        elif "STATS" in intent:
            sys_prompt = stats_prompt_str
        else:
            sys_prompt = general_prompt_str
            
        qa_prompt = ChatPromptTemplate.from_messages([("system", sys_prompt), ("user", "{input}")])
        qa_chain = create_stuff_documents_chain(llm, qa_prompt)
        rag_chain = create_retrieval_chain(retriever, qa_chain)
        
        response = rag_chain.invoke({"input": query})
        
        print("\nBotun Cevabƒ±:\n")
        print(response["answer"])

--- Belirlenen Niyet: STATS ---

Botun Cevabƒ±:

Here are Luka Donƒçiƒá's stats:

*   **AVG_PTS**: 33.7
*   **AVG_REB**: 8.3
*   **AVG_AST**: 8.6
*   **AVG_ST**: 1.6
*   **AVG_BLK**: 0.6
*   **AVG_3PTM**: 3.4
*   **AVG_TO**: 4.3
*   **FG%**: 0.462
*   **FT%**: 0.803
*   **TOTAL_PTS**: 775
*   **TOTAL_REB**: 191
*   **TOTAL_AST**: 198
*   **TOTAL_ST**: 37
*   **TOTAL_BLK**: 14
*   **TOTAL_3PTM**: 79
*   **TOTAL_TO**: 99
