# Data Loading

In [1]:
import os
import dotenv
import openai
import langchain
from langchain_openai import ChatOpenAI
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_chroma.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFium2Loader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

dotenv.load_dotenv()

True

In [2]:
llm_model_name = "gpt-4o-mini"

In [3]:
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
loader = WebBaseLoader([
    "https://thronesdb.com/rulesreference", 
    "https://thronesdb.com/faq"
    ])

In [5]:
docs = loader.load()

In [6]:
docs

[Document(metadata={'source': 'https://thronesdb.com/rulesreference', 'title': 'Rules · ThronesDB', 'description': 'Rules Reference', 'language': 'en'}, page_content='\n\n\nRules · ThronesDB\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ThronesDB\n                    \n\n\n\nMy decks\nDecklists\nCards\nReviews\n\n\nRules \n\nRules Reference\nF.A.Q.\n\nRestricted and Banned Cards\n\n\nTournament Regulations\n\n\n\n\n\n\n\n\n\n\n\n\n\nSyntax\n\n\n\nSyntax\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSyntax\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nThe Only Game That Matters\nToo many strange faces, Tyrion thought, too many new players. The game changed while I lay rotting\n            in my bed, and no one will tell me the rules.\n        — George R.R. Martin, A Song of Ice and Fire\nThis document is intended as the definitive source for rules information, but does not teach players how to play\n        the game. Players should first read the Learn to Play book in its entire

In [235]:
file_path = 'agot_rag/GOT_FAQ_v4.1.pdf'
loader = PyPDFium2Loader(file_path)
data = loader.load()

'agot_rag/GOT_FAQ_v4.1.pdf'

In [5]:
# Load PDF
loaders = [
    PyPDFium2Loader("./agot_rag/GOT_FAQ_v4.1.pdf"),
    PyPDFium2Loader("./agot_rag/Rules · ThronesDB.pdf")
]
docs = []
for loader in loaders:
    docs.extend(loader.load())



In [6]:
docs

[Document(metadata={'source': './agot_rag/GOT_FAQ_v4.1.pdf', 'page': 0}, page_content='0\n'),
 Document(metadata={'source': './agot_rag/GOT_FAQ_v4.1.pdf', 'page': 1}, page_content='NOTES AND ERRATA\r\nThis section contains notes and errata pertaining to specific\r\ncards or sections of the rulebook. The document version\r\nnumber in which an entry first appeared is listed with that entry\r\nin order to establish a history of when each change was made.\r\nErrata overrides the originally printed information on the card it\r\napplies to. Unless errata for a card appears below, the original\r\nEnglish non-draft product printing of that card and all of its\r\ninformation is considered accurate, and overrides all other\r\nprintings. This includes translated cards, promotional or\r\norganized play cards, and printings which may appear in\r\nalternate products.\r\nRULEBOOK ERRATA\r\n(v1.0) Rules Reference page 26, column 1, paragraph 9\r\nThe phrase "all relevant reserve modifiers" should read

In [7]:
chunk_size =1000    
chunk_overlap = 400

In [8]:
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)

rec_splits = r_splitter.split_documents(docs)

In [9]:
print(rec_splits[1].page_content)

NOTES AND ERRATA
This section contains notes and errata pertaining to specific
cards or sections of the rulebook. The document version
number in which an entry first appeared is listed with that entry
in order to establish a history of when each change was made.
Errata overrides the originally printed information on the card it
applies to. Unless errata for a card appears below, the original
English non-draft product printing of that card and all of its
information is considered accurate, and overrides all other
printings. This includes translated cards, promotional or
organized play cards, and printings which may appear in
alternate products.
RULEBOOK ERRATA
(v1.0) Rules Reference page 26, column 1, paragraph 9
The phrase "all relevant reserve modifiers" should read: "all
relevant initiative modifiers."
CARD ERRATA
(v1.0) Ser Horas Redwyne (Westeros, 63)
Should read: "...choose and stand a Lady character."
(v1.4) Driftwood Cudgel (Flight of Crows, 112)


In [10]:
embedding = OpenAIEmbeddings()

In [11]:
simple_doc_dir = './agot_rag/embeddings/chroma/'

In [12]:
!rm -rf ./agot_rag/embeddings/chroma  # remove old database files if any

In [13]:
vectordb = Chroma.from_documents(
    documents=rec_splits,
    embedding=embedding,
    persist_directory=simple_doc_dir
)

In [11]:
# for loading: 
vectordb = Chroma(
    persist_directory=simple_doc_dir, 
    embedding_function=embedding
)

In [14]:
print(vectordb._collection.count())

257


In [92]:
question = "Can player marschal a unique card if another copy is in his or her dead pile?"

In [15]:
question = "Can a player marshal a unique card if opponent already marshaled copy of that card and is under control of his or her control?"

## For Full Answer

In [16]:
max_relev_docs = vectordb.max_marginal_relevance_search(
    query=question,
    k=4,
    fetch_k=20
    )

max_relev_docs

[Document(metadata={'page': 2, 'source': './agot_rag/GOT_FAQ_v4.1.pdf'}, page_content='considered to be “marshaling a duplicate.” While marshaling\r\na duplicate, that card is not subject to restrictions based on\r\nthe card’s printed characteristics, including cardtype,\r\nfaction, keywords, etc.\r\n(1.3) Unique cards in play and in the dead pile\r\nThe following defines the control and ownership rules\r\nsurrounding unique cards:\r\n✦A player cannot marshal, put into play, or take/gain\r\ncontrol of a unique card if another copy of that card is\r\nin his or her dead pile.\r\n✦A player cannot marshal or put into play a copy of an\r\nopponent’s unique card if another copy of that unique\r\ncard (owned either by the player attempting to bring\r\nthe card into play, or by the opponent who owns the\r\nunique card that is attempting to enter play) is already\r\nin play or is in its owner’s dead pile.\r\n✦An ability that puts a unique card into play from a\r\nplayer’s dead pile functions on

In [17]:
print(max_relev_docs[1].page_content)

If upon completion of attaching cards during setup one
attachment has created a game state in which another
attachment is illegally attached, immediately discard the
illegal attachment.
If I win a challenge in which I control two attacking Knight
characters and one of them is targeted by Ghaston Grey (Core,
116), returning it to my hand and leaving me with one attacking
Knight character, may I subsequently play Lady Sansa’s Rose
(Westeros, 24) by virtue of that single Knight who is now attacking
alone?
Yes, you may. Lady Sansa’s Rose reads “...in which you
control a Knight character that is attacking alone,” and the
check on “is attacking alone” is made at the time the event
would be played.
If I play a limited card as a duplicate, does it count as my limited
card for the round?
No. When you marshal a card as a duplicate, it is not
considered to have any of its printed keywords, including the
limited keyword.


## For short answer only

In [261]:
compressor_chat = ChatOpenAI(temperature=0, model="gpt-4o-mini")
compressor = LLMChainExtractor.from_llm(compressor_chat)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type = "mmr", search_kwargs={"k": 5})
)

In [267]:
compressed_docs = compression_retriever.invoke(question)

In [264]:
compressed_docs

[Document(metadata={'page': 2, 'source': './agot_rag/GOT_FAQ_v4.1.pdf'}, page_content='✦A player cannot marshal, put into play, or take/gain control of a unique card if another copy of that card is in his or her dead pile.'),
 Document(metadata={'page': 2, 'source': './agot_rag/GOT_FAQ_v4.1.pdf'}, page_content='An ability that puts a unique card into play from a player’s dead pile functions only if there would be no other copies of that card in that dead pile upon resolution of the ability.'),
 Document(metadata={'page': 1, 'source': './agot_rag/Rules · ThronesDB.pdf'}, page_content='A card that has been put into play is not considered to have been "marshaled." If an additional copy of a unique card a player already owns and controls is put into play under his or her control, it enters play as a duplicate.'),
 Document(metadata={'page': 0, 'source': './agot_rag/Rules · ThronesDB.pdf'}, page_content="A single instance of a unique card in a player's dead pile does not prevent an efect th

## Odpoved od LLM

In [18]:
llm = ChatOpenAI(model_name=llm_model_name, temperature=0)

  llm = ChatOpenAI(model_name=llm_model_name, temperature=0)


In [19]:
template = """Use the following pieces of context to answer the question at the end and explain why. Also, return the all numbers of all chapters you found the answer in. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
qa_chain_prompt = PromptTemplate.from_template(template)

In [20]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": qa_chain_prompt}
)

In [21]:
result = qa_chain({"query": question})

  result = qa_chain({"query": question})


In [22]:
result

{'query': 'Can a player marshal a unique card if opponent already marshaled copy of that card and is under control of his or her control?',
 'result': 'No, a player cannot marshal a unique card if an opponent already has a copy of that card in play or in their dead pile. This is explicitly stated in the rules regarding unique cards. \n\nChapters: 1.3\n\nThanks for asking!',
 'source_documents': [Document(metadata={'page': 2, 'source': './agot_rag/GOT_FAQ_v4.1.pdf'}, page_content='considered to be “marshaling a duplicate.” While marshaling\r\na duplicate, that card is not subject to restrictions based on\r\nthe card’s printed characteristics, including cardtype,\r\nfaction, keywords, etc.\r\n(1.3) Unique cards in play and in the dead pile\r\nThe following defines the control and ownership rules\r\nsurrounding unique cards:\r\n✦A player cannot marshal, put into play, or take/gain\r\ncontrol of a unique card if another copy of that card is\r\nin his or her dead pile.\r\n✦A player cannot m

In [23]:
print(result["result"])

No, a player cannot marshal a unique card if an opponent already has a copy of that card in play or in their dead pile. This is explicitly stated in the rules regarding unique cards. 

Chapters: 1.3

Thanks for asking!
