In [1]:
import os
from dotenv import load_dotenv

from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool

from langchain_community.document_loaders.pdf import PyPDFLoader
import requests
import tempfile
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter


_ = load_dotenv() # Loads the .env file - e.g. the OPENAI_API_KEY

In [2]:
def load_pdf_from_url(url: str):
    """
    Load a PDF from a URL and extract its text content.
    
    Args:
        url: The URL of the PDF file
    
    Returns:
        List of documents with page content
    """
    try:
        # Download the PDF content
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        
        # Create a temporary file to store the PDF
        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
            temp_file.write(response.content)
            temp_file_path = temp_file.name
        
        # Load the PDF using PyPDFLoader
        loader = PyPDFLoader(temp_file_path)
        documents = loader.load()
        
        # Clean up the temporary file
        os.unlink(temp_file_path)
        
        return documents
    
    except Exception as e:
        print(f"Error loading PDF from URL: {str(e)}")
        return None

comp_rules = load_pdf_from_url("https://en.onepiece-cardgame.com/pdf/rule_comprehensive.pdf?20250221")
tourney_rules = load_pdf_from_url("https://en.onepiece-cardgame.com/pdf/tournament_rules_manual.pdf?20250613")

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large",
)

# Create a persistent directory for your vector store
CACHE_DIRECTORY = os.path.join(os.path.expanduser("~"), ".cache", "onepiece_vectorstore")
os.makedirs(CACHE_DIRECTORY, exist_ok=True)
PERSIST_DIRECTORY = os.path.join(CACHE_DIRECTORY, "onepiece_vectorstore")


In [3]:
def create_or_load_vectorstore():
    """Create or load the persistent vector store"""
    
    # Check if vector store already exists
    if os.path.exists(PERSIST_DIRECTORY) and os.listdir(PERSIST_DIRECTORY):
        print("Loading existing vector store...")
        vectorstore = Chroma(
            persist_directory=PERSIST_DIRECTORY,
            embedding_function=embeddings
        )
        return vectorstore
    
    else:
        print("Creating new vector store...")
        
        # Split documents into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            separators=["\n\n", "\n", " ", ""]
        )
        
        # Combine and split documents
        all_docs = []
        if comp_rules:
            all_docs.extend(comp_rules)
        if tourney_rules:
            all_docs.extend(tourney_rules)
        
        if not all_docs:
            print("No documents loaded. Please check PDF URLs.")
            return None
            
        doc_chunks = text_splitter.split_documents(all_docs)
        print(f"Split documents into {len(doc_chunks)} chunks")
        
        # Create vector store with persistence
        vectorstore = Chroma.from_documents(
            documents=doc_chunks,
            embedding=embeddings,
            persist_directory=PERSIST_DIRECTORY
        )
        
        print(f"Vector store created and saved to {PERSIST_DIRECTORY}")
        return vectorstore

# Create or load the vector store
vectorstore = create_or_load_vectorstore()

Creating new vector store...
Split documents into 221 chunks
Vector store created and saved to C:\Users\tyson\.cache\onepiece_vectorstore\onepiece_vectorstore
Vector store created and saved to C:\Users\tyson\.cache\onepiece_vectorstore\onepiece_vectorstore


In [15]:
# RAG For Docs
vectorstore_rag = vectorstore.as_retriever(
    search_kwargs={"k": 5}  # Adjust k based on your needs
    )
vectorstore_rag.search_type = "similarity"
vectorstore_rag.invoke("similarity_search")

[Document(metadata={'source': 'C:\\Users\\tyson\\AppData\\Local\\Temp\\tmpxyyd9j_g.pdf', 'moddate': '2025-06-11T20:02:39+09:00', 'page': 23, 'producer': 'Microsoft® Word 2021', 'total_pages': 41, 'page_label': '24', 'title': 'Tournament Rules Manual', 'creator': 'Microsoft® Word 2021', 'creationdate': '2025-02-22T22:45:09-05:00'}, page_content='reporting match results. \nMatch results cannot be altered once they have been submitted. Players should verify the match \nresult slip carefully before submitting it. \n5.2 End of Round Procedures \nSometimes a match may not finish prior to time being called for the round. Both players are \nresponsible for noting who is the active player when time is called. \nPlayers who need assistance with End of Round procedures should raise their hand and call for a \njudge. The judge should be notified the game is ongoing and whose turn it is. The judge should \nensure both players understand the End of Round procedures, and may stay to supervise. \nIf t

In [5]:
@tool
def rulebook_lookup(query: str) -> str:
    """Looks up a rule in the One Piece TCG rulebook."""
    # In a real implementation, this would query a database or API.
    rules = {
        "What happens if two characters with the same name are played on the same team?": "Characters with the same name cannot be played on the same team. If you already have a character in play, you cannot play another character with the same name.",
        "How does the Don!! system work?": "Don!! cards are used to pay costs and activate abilities. You can attach Don!! cards to characters to increase their power or use them to pay for events and character abilities.",
        "What is the difference between active and rest positions?": "Active position means the card is upright and can attack or use abilities. Rest position means the card is turned sideways and cannot attack until it becomes active again during your next turn.",
        "How do you win the game?": "You win by reducing your opponent's life to 0. Life is reduced when your opponent takes damage and has no cards left in their life area to trash.",
        "What is a counter ability?": "Counter abilities can be activated during your opponent's turn when specific conditions are met, usually when one of your characters is being attacked."
    }
    return rules.get(query, "Rule not found.")

In [6]:
tools = [rulebook_lookup]
agent = create_react_agent(
    model=ChatOpenAI(model="gpt-4.1-mini", temperature=0),
    name="RulebookAgent",
    tools=tools, 
    prompt="You are a helpful assistant that helps people find information in the Rulebook for One Piece TCG. You have access to the following tools: {tools}. Use them to find the information the user is looking for. If you don't know the answer, just say you don't know. Do not try to make up an answer.",
)

In [7]:
response = agent.invoke(
    {"messages": [{
        "role": "user", 
        "content": "What happens if two characters with the same name are played on the same team?"
    }]}
)
for m in response["messages"]:
    m.pretty_print()


What happens if two characters with the same name are played on the same team?
Name: RulebookAgent
Tool Calls:
  rulebook_lookup (call_MxGwQYFqGpHKF6jDwXiNbwL0)
 Call ID: call_MxGwQYFqGpHKF6jDwXiNbwL0
  Args:
    query: two characters with the same name on the same team
Name: rulebook_lookup

Rule not found.
Name: RulebookAgent

I couldn't find a specific rule in the One Piece TCG rulebook about what happens if two characters with the same name are played on the same team. If you have any other questions or need information on a related topic, feel free to ask!


In [8]:
rules = {
        "What happens if two characters with the same name are played on the same team?": "Characters with the same name cannot be played on the same team. If you already have a character in play, you cannot play another character with the same name."}
rules.get("What happens if two characters with the same name are played on the same team?", "Rule not found.")

'Characters with the same name cannot be played on the same team. If you already have a character in play, you cannot play another character with the same name.'

In [9]:
# Example: Load a PDF from a URL
# Replace this with your actual PDF URL
pdf_url = "https://example.com/your-pdf-file.pdf"

# Uncomment the lines below to test with a real PDF URL
# documents = load_pdf_from_url(pdf_url)
# if documents:
#     print(f"Successfully loaded {len(documents)} pages from the PDF")
#     
#     # Show first page content (first 500 characters)
#     if len(documents) > 0:
#         print(f"\nFirst page content preview:")
#         print(documents[0].page_content[:500] + "...")
#         
#         # Show metadata
#         print(f"\nPage metadata:")
#         print(documents[0].metadata)
# else:
#     print("Failed to load PDF")

In [10]:
# Debug cell - check Path object
print(f"Path type: {type(Path)}")
print(f"Path.home() type: {type(Path.home())}")
print(f"Path.home() value: {Path.home()}")

# Test the problematic line step by step
home = Path.home()
print(f"home: {home}, type: {type(home)}")

cache = home / ".cache"
print(f"cache: {cache}, type: {type(cache)}")

final_path = cache / "onepiece_vectorstore"
print(f"final_path: {final_path}, type: {type(final_path)}")

NameError: name 'Path' is not defined