In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
import os
import sys

# Document Loading

In [2]:
# === 1. Charger tous les PDF du répertoire "dataset/" ===
pdf_dir = "dataset"
loaders = []
for filename in os.listdir(pdf_dir):
    if filename.endswith(".pdf"):
        full_path = os.path.join(pdf_dir, filename)
        loaders.append(PyPDFLoader(full_path))

In [3]:
# Charger tous les documents depuis les PDFs
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [4]:
# === 2. Splitter les documents ===
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
split_docs = text_splitter.split_documents(docs)

In [5]:
# === 3. Embedding et vecteur store FAISS ===
modelPath = "sentence-transformers/all-MiniLM-l6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
db = FAISS.from_documents(split_docs, embeddings)

  embeddings = HuggingFaceEmbeddings(


In [6]:
# === 4. Chargement du modèle de QA ===
model_name = "Intel/dynamic_tinybert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

question_answerer = pipeline(
    "question-answering",
    model=model,
    tokenizer=tokenizer,
    return_tensors="pt"
)

llm = HuggingFacePipeline(
    pipeline=question_answerer,
    model_kwargs={"temperature": 0.7, "max_length": 512},
)


Invalid model-index. Not loading eval results into CardData.
Device set to use cuda:0
  llm = HuggingFacePipeline(


In [7]:
# === 5. Création du retriever et de la chaîne QA ===
retriever = db.as_retriever(search_kwargs={"k": 4})

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=retriever,
    return_source_documents=False
)

In [8]:
# === 6. Question test ===
question = "What is the name of the game?"

def err_remove(er):
    lin = "------------"
    er = str(er)
    start_index = er.find(lin) + len(lin)
    end_index = er.rfind(lin)
    Answer = er[start_index:end_index].strip()
    return Answer

try:
    result = qa.invoke({"query": question})
    print(result["result"])
except:
    _, error, _ = sys.exc_info()
    answer = err_remove(error)
    print(answer)

The 56 RISK@ Cards: 42 marked with a territory and a picture of Infantry, Cavalry, or 
Artillery l 2 “wild” cards marked with all three pictures, but no territory l 12 Secret 
Mission cards used only in Secret Mission Risk, page 13. 
OBJECT OF THE GAME To conquer the world by occupying every territory on the board, 
thus eliminating all your opponents. 
SETUP Unlike most games, RISK demands careful planning before you actually start to 
play. This Initial Army Placement sets the stage for the battles you’ll fight later on. 
INITIAL ARMY PLACEMENT consists of these steps: 1. 2. 3. 4. Select a color and, 
depending on the number of players, count out the “armies” you’ll need to start the 
game. If 2 are playing, see instructions on page 11. If 3 are playing, each player counts 
out 35 Infantry. If 4 are playing, each player counts out 30 Infantry. If 5 are playing, each 
player counts out 25 Infantry. If 6 are playing, each player counts out 20 Infantry. Roll


