MTG Judge RAG
---------------------------------------------------
This is a simple Python script for building an AI-powered MTG rules assistant
using Retrieval-Augmented Generation (RAG) with OpenAI + FAISS.

- Loads the Comprehensive Rules from a text file.
- Splits rules into chunks.
- Creates embeddings with OpenAI.
- Stores them in ChromaDB for fast search (not using FAISS due to py versioning)
- Lets you ask questions, retrieves relevant rules, and asks the LLM to answer.

In [45]:
# -------- IMPORTS --------
import os
import re
import json
import chromadb

from openai import OpenAI
client = OpenAI()   # don’t pass api_key explicitly

from dotenv import load_dotenv
load_dotenv()

True

In [46]:
# -------- CONFIG --------
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
EMBED_MODEL = "text-embedding-3-large"
CHAT_MODEL = "gpt-4o-mini"
CHROMA_DB_DIR = "./chroma_db"
os.makedirs(CHROMA_DB_DIR, exist_ok=True) # to create folder if it doesn't exist
RULES_FILE = "./ComprehensiveRules.txt"
CHUNK_SIZE = 700 # words approximation
TOP_K = 6

In [48]:
def load_rules(path):
    """Load the MTG comprehensive rules from a text file."""
    if not os.path.exists(path):
        print(f"Rules file not found at {path}")
        return []

    docs = []
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            # Rules usually like: 603.1. Some text
            match = re.match(r"^(\d{1,3}(?:\.\d+)+)\s+(.*)$", line)
            if match:
                rule_id, body = match.groups()
                docs.append({
                    "id": f"CR:{rule_id}",
                    "text": f"{rule_id} {body}",
                    "rule_id": rule_id,
                    "source": "Comprehensive Rules"
                })
    return docs

def chunk_text(text, chunk_size=CHUNK_SIZE):
    """Split text into smaller chunks so embeddings don't get too big."""
    sentences = re.split(r'(?<=[.!?]) +', text)
    chunks = []
    current = []
    length = 0

    for s in sentences:
        tokens = len(s.split())
        if length + tokens > chunk_size:
            chunks.append(" ".join(current))
            current = [s]
            length = tokens
        else:
            current.append(s)
            length += tokens
    if current:
        chunks.append(" ".join(current))

    return chunks

def build_index():
    """Create ChromaDB collection from the rules and generate embeddings."""
    client = OpenAI()  # new SDK picks up API key from environment
    chroma_client = chromadb.PersistentClient(path=CHROMA_DB_DIR)
    collection = chroma_client.get_or_create_collection(name="mtg_rules")

    # Clear previous data
    chroma_client.delete_collection("mtg_rules")  # remove old collection
    collection = chroma_client.get_or_create_collection(name="mtg_rules")  # recreate empty collection

    print("Loading rules...")
    rules = load_rules(RULES_FILE)

    texts = []
    metas = []
    ids = []

    for r in rules:
        chunks = chunk_text(r["text"])
        for i, ch in enumerate(chunks):
            ch = ch.strip()
            if ch:  # only non-empty
                texts.append(ch)
                metas.append(r)
                ids.append(f"{r['id']}_{i}")

    if not texts:
        raise ValueError("No valid rule chunks found to embed.")

    print(f"Total chunks: {len(texts)}")

    print("Creating embeddings...")
    embeddings = client.embeddings.create(model=EMBED_MODEL, input=texts)
    vecs = [d.embedding for d in embeddings.data]

    # Add to ChromaDB
    collection.add(
        ids=ids,
        embeddings=vecs,
        documents=texts,
        metadatas=metas
    )

    print("Index built and saved with ChromaDB!")

def search_index(query, top_k=TOP_K):
    """Search ChromaDB for relevant rule chunks."""
    query = query.strip()
    if not query:
        raise ValueError("Empty query provided.")

    client = OpenAI()
    emb = client.embeddings.create(model=EMBED_MODEL, input=[query])
    vec = emb.data[0].embedding

    chroma_client = chromadb.PersistentClient(path=CHROMA_DB_DIR)
    collection = chroma_client.get_or_create_collection(name="mtg_rules")

    results = collection.query(query_embeddings=[vec], n_results=top_k)

    docs = []
    for i, doc in enumerate(results["documents"][0]):
        docs.append({
            "text": doc,
            "meta": results["metadatas"][0][i]
        })
    return docs

def answer_question(query):
    """Retrieve context and ask the LLM for an answer."""
    results = search_index(query)
    context_blocks = []

    for i, r in enumerate(results, 1):
        text = r["text"]
        meta = r["meta"]
        context_blocks.append(f"[{i}] {text}")

    context = "\n\n".join(context_blocks)

    user_prompt = f"Question: {query}\n\nUse these sources:\n{context}\n\nAnswer:"

    client = OpenAI()
    resp = client.chat.completions.create(
        model=CHAT_MODEL,
        messages=[
            {"role": "system", "content": "You are an expert MTG judge assistant. Cite rule numbers when possible."},
            {"role": "user", "content": user_prompt}
        ]
    )
    return resp.choices[0].message.content

In [49]:
# -------- TESTING --------
build_index()  # only first time

question = "If I imprint Time Walk on Panoptic Mirror, do I get infinite turns?"
print(answer_question(question))

Loading rules...
Total chunks: 1
Creating embeddings...
Index built and saved with ChromaDB!
To determine if imprinting *Time Walk* on *Panoptic Mirror* grants you infinite turns, we need to analyze how both cards interact according to the rules.

*Panoptic Mirror* has the ability to imprint a sorcery card and allows you to pay 2 colorless mana and tap it to cast the imprinted card, which is *Time Walk* in this case.

When you cast *Time Walk* from *Panoptic Mirror*, it allows you to take an extra turn after the current one. The important detail here is that after that extra turn is completed, *Time Walk* is put into the graveyard (as per the card's rules), and you cannot activate *Panoptic Mirror* again without imprinted a new spell (or the same card again).

To clarify the situation:

1. When you cast *Time Walk* via *Panoptic Mirror*, you take an extra turn.
2. After the extra turn ends, *Time Walk* goes to the graveyard.
3. You cannot use *Panoptic Mirror* to cast *Time Walk* again