In [8]:
import os
from dotenv import load_dotenv

from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent
from langchain_core.tools import tool

from langchain_community.document_loaders.pdf import PyPDFLoader
import requests
import tempfile
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

from pathlib import Path
import shutil 

_ = load_dotenv() # Loads the .env file - e.g. the OPENAI_API_KEY

In [None]:
# Document loader function to load PDF files from a URL

def load_pdf_from_url(url: str):
    """
    Load a PDF from a URL and extract its text content.
    
    Args:
        url: The URL of the PDF file
    
    Returns:
        List of documents with page content
    """
    try:
        # Download the PDF content
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        
        # Create a temporary file to store the PDF
        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
            temp_file.write(response.content)
            temp_file_path = temp_file.name
        
        # Load the PDF using PyPDFLoader
        loader = PyPDFLoader(temp_file_path)
        documents = loader.load()
        
        # Clean up the temporary file
        os.unlink(temp_file_path)
        
        return documents
    
    except Exception as e:
        print(f"Error loading PDF from URL: {str(e)}")
        return None

## Dummy PDF URL for testing purposes to reduce embedding costs with OpenAI
# docs = load_pdf_from_url("https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf")

def loader_optcg_rulebooks():
    """
    Load the One Piece Card Game rules from the official website.
    
    Returns:
        List of documents with page content
    """
    comp_rules = load_pdf_from_url("https://en.onepiece-cardgame.com/pdf/rule_comprehensive.pdf?20250221")
    tourney_rules = load_pdf_from_url("https://en.onepiece-cardgame.com/pdf/tournament_rules_manual.pdf?20250613")
    
    # Verifies BOTH comprehensive and tournament rules are loaded
    # If either is None, it means loading failed
    # Ensures we have both sets of rules before proceeding
    if comp_rules is None or tourney_rules is None:
        print("Failed to load One Piece Card Game rules.")
        print("Please check the URLs or your internet connection.")
        return None # Exit early if loading fails
    
    # Tag each page/chunk with its source
    for page in comp_rules:
        page.metadata["source"] = "comprehensive_rules"
    for page in tourney_rules:
        page.metadata["source"] = "tournament_rules"

    return comp_rules + tourney_rules



# Test the loader function
x = loader_optcg_rulebooks()
print("First and last 5 pages of the loaded documents:")
x[:5], x[-5:]

First and last 5 pages of the loaded documents:


([Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-02-20T12:30:45+09:00', 'moddate': '2025-02-20T14:31:46+09:00', 'title': 'Comprehensive Rules', 'source': 'comprehensive_rules', 'total_pages': 26, 'page': 0, 'page_label': '1'}, page_content='1 \n \nONE PIECE CARD GAME Comprehensive Rules \nVersion 1.1.8 \n \nLast updated: 2/21/2025 \n \nTable of Contents \nContents of Comprehensive Rules ............................................................................................ \n1. Game Overview .................................................................................................................. 1 \n2. Card Information ................................................................................................................ 3 \n3. Game Areas ........................................................................................................................ 6 \n4. Basic Gam

In [None]:
def create_or_load_vectorstore_optcg_rulebooks():
    """Create or load the persistent vector store"""
    
    # Define the cache directory and persistent directory
    CACHE_DIRECTORY = Path.home() / ".cache"
    CACHE_DIRECTORY.mkdir(parents=True, exist_ok=True)
    PERSIST_DIRECTORY = CACHE_DIRECTORY / "optcg_rulebooks_vectorstore"

    # Define the embedding model
    embeddings = OpenAIEmbeddings(
        model="text-embedding-3-large"
        )

    # Check if vector store already exists and load it
    if os.path.exists(PERSIST_DIRECTORY) and os.listdir(PERSIST_DIRECTORY):
        print("Loading existing vector store...")
        vectorstore = Chroma(
            persist_directory=str(PERSIST_DIRECTORY),
            embedding_function=embeddings
        )
        return vectorstore
    
    # If vector store does not exist, create it
    else: 
        print("Creating new vector store...")
        
        # Define the text splitter for document chunking
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            separators=["\n\n", "\n", ". "," ", ""]
        )
        
        # Load and split documents
        docs = loader_optcg_rulebooks()
        if not docs:
            print("No documents loaded. Please check PDF URLs.")
            return None # Exit if no documents are loaded. Will not create a vector store.
            
        doc_chunks = text_splitter.split_documents(docs)
        print(f"Split documents into {len(doc_chunks)} chunks")
        
        # Create vector store with persistence
        vectorstore = Chroma.from_documents(
            documents=doc_chunks,
            embedding=embeddings,
            persist_directory=str(PERSIST_DIRECTORY)

        )
        
        print(f"Vector store created and saved to {PERSIST_DIRECTORY}")
        return vectorstore

# Create or load the vector store
vectorstore = create_or_load_vectorstore_optcg_rulebooks()

Loading existing vector store...


  vectorstore = Chroma(


Testing vector store with a sample query...


AttributeError: 'list' object has no attribute 'page_content'

In [None]:
# ## Test the vector store
# print("Testing vector store with a sample query...")
# sample_query = "What is the maximum number of cards in a player's hand at the end of their turn?"
# results = vectorstore.similarity_search(sample_query, k=3)
# results_text, results_source = [(results.page_content, results.metadata.source) for result in results]

In [None]:
[result.metadata for result in results]

[Document(metadata={'page_label': '22', 'creator': 'Microsoft® Word 2021', 'total_pages': 41, 'page': 21, 'source': 'tournament_rules', 'creationdate': '2025-02-22T22:45:09-05:00', 'title': 'Tournament Rules Manual', 'moddate': '2025-06-11T20:02:39+09:00', 'producer': 'Microsoft® Word 2021'}, page_content='• Which cards are in a player’s hand; \n• Which cards are in an opponent’s hand; \n• Which cards were played or activated during the previous turn, unless there is an active \ngameplay effect.'),
 Document(metadata={'title': 'Comprehensive Rules', 'page_label': '12', 'moddate': '2025-02-20T14:31:46+09:00', 'creationdate': '2025-02-20T12:30:45+09:00', 'creator': 'Microsoft® Word for Microsoft 365', 'producer': 'Microsoft® Word for Microsoft 365', 'total_pages': 26, 'page': 11, 'source': 'comprehensive_rules'}, page_content='5-2-1-6. Each player draws 5 cards from their deck  as their opening hand. Then, \nbeginning with the player going first, each player may redraw their hand once \n

In [5]:
### NOTE: The following function is for deleting the vector store from disk.
## This should only be used if you need to update the vector store with new documents or changes. Otherwise, only if you absoutely need to delete the vector store from the disk. 

## The implentation of storing the vector store on disk is to ensure that it persists across sessions and does not need to be recreated every time you run the code. Limiting the need to re-embed the documents every time you run the code. Thus, reducing costs and improving performance.


def delete_vectorstore_optcg_rulebooks():
    """Delete the persistent vector store for One Piece Card Game rules"""
    
    # Rudimentary confirmation prompt
    confirmation = input("Are you sure you want to delete the vector store? This action cannot be undone. Type 'yes' to confirm: ")
    if confirmation.strip().lower() != 'yes':
        print("Deletion cancelled.")
        return
    
    # Delete the vector store directory if it exists
    PERSIST_DIRECTORY = Path.home() / ".cache" / "optcg_rulebooks_vectorstore"
    if PERSIST_DIRECTORY.exists():
        try:
            # Use shutil.rmtree to recursively delete the entire directory tree
            shutil.rmtree(PERSIST_DIRECTORY)
            print(f"Deleted vector store at {PERSIST_DIRECTORY}")
        except PermissionError as e:
            print(f"Permission error: {e}")
            print("Please ensure no files are open in the vector store directory.")
            print("You may need to close any applications using the vector store before deleting it. Try restarting your Jupyter kernel and running delete again.")
        except Exception as e:
            print(f"Error deleting vector store: {e}")
    else:
        print("No vector store found to delete.")

In [5]:
@tool
def rulebook_lookup(query: str) -> str:
    """Looks up a rule in the One Piece TCG rulebook."""
    # In a real implementation, this would query a database or API.
    rules = {
        "What happens if two characters with the same name are played on the same team?": "Characters with the same name cannot be played on the same team. If you already have a character in play, you cannot play another character with the same name.",
        "How does the Don!! system work?": "Don!! cards are used to pay costs and activate abilities. You can attach Don!! cards to characters to increase their power or use them to pay for events and character abilities.",
        "What is the difference between active and rest positions?": "Active position means the card is upright and can attack or use abilities. Rest position means the card is turned sideways and cannot attack until it becomes active again during your next turn.",
        "How do you win the game?": "You win by reducing your opponent's life to 0. Life is reduced when your opponent takes damage and has no cards left in their life area to trash.",
        "What is a counter ability?": "Counter abilities can be activated during your opponent's turn when specific conditions are met, usually when one of your characters is being attacked."
    }
    return rules.get(query, "Rule not found.")

In [6]:
tools = [rulebook_lookup]
agent = create_react_agent(
    model=ChatOpenAI(model="gpt-4.1-mini", temperature=0),
    name="RulebookAgent",
    tools=tools, 
    prompt="You are a helpful assistant that helps people find information in the Rulebook for One Piece TCG. You have access to the following tools: {tools}. Use them to find the information the user is looking for. If you don't know the answer, just say you don't know. Do not try to make up an answer.",
)

In [7]:
response = agent.invoke(
    {"messages": [{
        "role": "user", 
        "content": "What happens if two characters with the same name are played on the same team?"
    }]}
)
for m in response["messages"]:
    m.pretty_print()


What happens if two characters with the same name are played on the same team?
Name: RulebookAgent
Tool Calls:
  rulebook_lookup (call_PKkircNK3uQSxN1NV6lDwEsx)
 Call ID: call_PKkircNK3uQSxN1NV6lDwEsx
  Args:
    query: two characters with the same name on the same team
Name: rulebook_lookup

Rule not found.
Name: RulebookAgent

I couldn't find a specific rule in the One Piece TCG rulebook about what happens if two characters with the same name are played on the same team. If you have any other questions or need information on a related topic, feel free to ask!


In [8]:
rules = {
        "What happens if two characters with the same name are played on the same team?": "Characters with the same name cannot be played on the same team. If you already have a character in play, you cannot play another character with the same name."}
rules.get("What happens if two characters with the same name are played on the same team?", "Rule not found.")

'Characters with the same name cannot be played on the same team. If you already have a character in play, you cannot play another character with the same name.'

In [9]:
# Example: Load a PDF from a URL
# Replace this with your actual PDF URL
pdf_url = "https://example.com/your-pdf-file.pdf"

# Uncomment the lines below to test with a real PDF URL
# documents = load_pdf_from_url(pdf_url)
# if documents:
#     print(f"Successfully loaded {len(documents)} pages from the PDF")
#     
#     # Show first page content (first 500 characters)
#     if len(documents) > 0:
#         print(f"\nFirst page content preview:")
#         print(documents[0].page_content[:500] + "...")
#         
#         # Show metadata
#         print(f"\nPage metadata:")
#         print(documents[0].metadata)
# else:
#     print("Failed to load PDF")

In [10]:
# Debug cell - check Path object
print(f"Path type: {type(Path)}")
print(f"Path.home() type: {type(Path.home())}")
print(f"Path.home() value: {Path.home()}")

# Test the problematic line step by step
home = Path.home()
print(f"home: {home}, type: {type(home)}")

cache = home / ".cache"
print(f"cache: {cache}, type: {type(cache)}")

final_path = cache / "onepiece_vectorstore"
print(f"final_path: {final_path}, type: {type(final_path)}")

Path type: <class 'type'>
Path.home() type: <class 'pathlib._local.WindowsPath'>
Path.home() value: C:\Users\tyson
home: C:\Users\tyson, type: <class 'pathlib._local.WindowsPath'>
cache: C:\Users\tyson\.cache, type: <class 'pathlib._local.WindowsPath'>
final_path: C:\Users\tyson\.cache\onepiece_vectorstore, type: <class 'pathlib._local.WindowsPath'>
